## Import dependencies

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from osg.utils.general_utils import load_data, get_spatial_referents
from osg.vlm_library import vlm_library

## Setup

In [None]:
tmp_fldr=f"results/"
compression_strategy = "position_direction"                 #position_direction / direction / position
data_format = "robot"                                        #robot/r3d
data_path = "../data/sample_robot_data"                      #sample robotdata

# data_format = "r3d"     
# data_path = "../data/sample_r3d_data/bdai_kitchen.r3d"     #sample record3d data

vlm_instance = vlm_library(vl_model="owl_vit", data_src=data_format, seg_model= "mobile_sam", tmp_fldr=tmp_fldr) 

env_pointcloud, observations_graph = load_data(data_path,
                                               data_format, 
                                               tmp_fldr, 
                                               pcd_downsample=False, 
                                               compression_percentage=80,
                                               compression_technique=compression_strategy)

## Composible Referent Descriptors

In [None]:
## Composible Referent ## Composible Referent Descriptor (CRD) 
    # CRDs are propositional expressions that represent specific referent instances by chaining comparators that encode descriptive spatial information. 
    # For more details see: https://arxiv.org/abs/2402.11498

## CRD Syntax
    # referent_1::isbetween(referent_2,referent_3)  :denotes that referent_1 is between referent_2 and referent_3.
    # referent_1::isabove(referent_2)               :denotes that referent_1 is above referent_2.
    # referent_1::isbelow(referent_2)               :denotes that referent_1 is below referent_2.
    # referent_1::isleftof(referent_2)              :denotes that referent_1 is left of referent_2.
    # referent_1::isrightof(referent_2)             :denotes that referent_1 is right of referent_2.
    # referent_1::isnextto(referent_2)              :denotes that referent_1 is close to referent_2.
    # referent_1::isinfrontof(referent_2)           :denotes that referent_1 is in front of referent_2.
    # referent_1::isbehind(referent_2)              :denotes that referent_1 is behind referent_2.

## Examples
    # Desired referent:   table behind the fridge
    # CRD representation: table::isbehind(fridge) 

    # Desired referent:    chair between the green laptop and the yellow box below the sofa
    # CRD representation:  chair::isbetween(green_laptop,yellow_box::isbelow(sofa))

    # Desired referent:    brown bag between the television and the kettle on the left of the green seat
    # CRD representation:  brown_bag::isbetween(television, kettle::isleftof(green_seat))

## Ground referents and filter instances via spatial constraints

In [None]:
# Enter comma seperated referent names or composible referent descriptors you wish to ground
# referents_to_ground = ["coffee_machine", "red_cup::isnextto(microwave)"]

referents_to_ground = ["robot", "whiteboard::isinfrontof(green_plush_toy)"]
# referents_to_ground = ["microwave"]

## Extract spatial information
referent_spatial_details = get_spatial_referents(referents_to_ground)
print("referent_spatial_details: ",referent_spatial_details,"\n")

In [None]:
## Spatial grounding
relevant_element_details = vlm_instance.spatial_grounding(observations_graph, referent_spatial_details, visualize=True, use_segmentation=True, multiprocessing=True, workers=3)

In [None]:
print(f"\nReferents after spatial constraint filtering:",len(relevant_element_details))
#for all relevant elements print their ids
print(f"Filtered elements \n",[element['mask_id'] for element in relevant_element_details])