In [1]:
import re
import json
import numpy as np

from pathlib import Path
from datasets import load_dataset



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
"""
Simple evaluator that handles
1. Load benchmark data
2. Parse prediction string
3. Evaluate success based on `delta` parameter
"""

class QSpatialEvaluator:
    delta = 2
    
    def __init__(self, benchmark_split):
        assert benchmark_split in ["QSpatial_plus", "QSpatial_scannet"]
        self.dataset = load_dataset("andrewliao11/Q-Spatial-Bench", split=benchmark_split)
        
    def evaluate(self, data_ind, vlm_response):
                
        #### Parse ground truth
        value = self.dataset["answer_value"][data_ind]
        unit = self.dataset["answer_unit"][data_ind]
        ground_truth_value_in_cms = value * self._get_multiplier(unit)

        #### Parse prediction
        # Value
        pattern = r'scalar{([^}]*)}'
        str_inside_scalar_boxes = re.findall(pattern, vlm_response)[-1]
        scalar_list = re.findall(r'\d+\.?\d*', str_inside_scalar_boxes)
        parsed_scalar = np.array(scalar_list).astype(float).mean()

        # Unit
        pattern = r'distance_unit{([^}]*)}'
        str_inside_unit_boxes = re.findall(pattern, vlm_response)
        parsed_unit = str_inside_unit_boxes[-1]

        pred_value_in_cms = parsed_scalar * self._get_multiplier(parsed_unit)
        success = max(pred_value_in_cms / ground_truth_value_in_cms, ground_truth_value_in_cms / pred_value_in_cms) < self.delta

        return dict(
            ground_truth_value_in_cms = ground_truth_value_in_cms,
            pred_value_in_cms = pred_value_in_cms,
            success = success
        )
    
    def _get_multiplier(self, unit):
                
        unit = unit.lower()
        if unit in ["meters", "meter", "m", "metre", "metres"]:
            multiplier = 100
        elif unit in ["centimeters", "centimeter", "cm"]:
            multiplier = 1
        elif unit in ["feet", "foot", "ft"]:
            multiplier =  30.48
        elif unit in ["inch", "inches", "in"]:
            multiplier =  2.54
        elif unit in ["mm"]:
            multiplier =  0.1
        else: 
            #raise ValueError(f"Unknown unit: {unit}")
            print(f"Unknown unit: {unit}")
            multiplier = 1
            
        return multiplier

In [3]:
evaluator = QSpatialEvaluator(benchmark_split="QSpatial_plus")

In [4]:
# Example VLM responses from GPT-4o
vlm_response = "To determine the minimum distance between the two speckled pattern stool chairs in the image, let's follow these steps:\\n\\n1. **Identify the Stools**: Locate the two speckled pattern stools in the image. They are positioned in front of the couches.\\n\\n2. **Reference Points**: Choose reference points on each stool to measure the distance. The closest points on the stools would be the edges facing each other.\\n\\n3. **Estimate the Distance**: Visually estimate the distance between these two closest points. Given the perspective and the relative size of the stools, we can approximate the distance.\\n\\nConsidering the size of the stools and the space between them, the minimum distance between the two speckled pattern stool chairs is approximately:\\n\\n\\\\scalar{1} \\\\distance_unit{meter}\n"

print(vlm_response)
print("Evaluation:", evaluator.evaluate(data_ind=41, vlm_response=vlm_response))

To determine the minimum distance between the two speckled pattern stool chairs in the image, let's follow these steps:\n\n1. **Identify the Stools**: Locate the two speckled pattern stools in the image. They are positioned in front of the couches.\n\n2. **Reference Points**: Choose reference points on each stool to measure the distance. The closest points on the stools would be the edges facing each other.\n\n3. **Estimate the Distance**: Visually estimate the distance between these two closest points. Given the perspective and the relative size of the stools, we can approximate the distance.\n\nConsidering the size of the stools and the space between them, the minimum distance between the two speckled pattern stool chairs is approximately:\n\n\\scalar{1} \\distance_unit{meter}

Evaluation: {'ground_truth_value_in_cms': 96.0, 'pred_value_in_cms': 100.0, 'success': True}
