In [1]:
from controlled_experiment.settings import FULLY_LABELED_RECORDINGS

import json
import shutil
import traceback
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import src.api.controllers.generate_embeddings as generate_embeddings
import torch
import torchvision.transforms.functional as F
from sqlalchemy.orm import Session
from src.config import GAZE_FOVEA_FOV, TOBII_FOV_X
from src.db import engine
from src.db.models import Recording, SimRoomClass
from src.api.controllers.gaze_segmentation import (
    get_gaze_points,
    match_frames_to_gaze,
    parse_gazedata_file,
    mask_was_viewed
)
from src.utils import cv2_video_fps, cv2_video_frame_count, cv2_video_resolution
from torchvision.ops import masks_to_boxes
from torchvision.transforms import InterpolationMode
from ultralytics import FastSAM
from tqdm import tqdm
import faiss
from typing import Any

2025-04-09 00:08:14.914218: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-09 00:08:12.505071: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744150092.568406  163761 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744150092.585723  163761 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-09 00:08:12.736616: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
with open("experiment_metadata.json") as file:
    experiment_metadata = json.load(file)
    trial_recordings_metadata = experiment_metadata["trial_recordings_metadata"]
    trial_recording_uuids = list(trial_recordings_metadata.keys())
    labeling_same_background_uuid = experiment_metadata["labeling_same_background_uuid"]
    labeling_diff_background_uuid = experiment_metadata["labeling_diff_background_uuid"]

with Session(engine) as session:
    trial_recordings = (
        session.query(Recording).filter(Recording.uuid.in_(trial_recording_uuids)).all()
    )

In [3]:
dinov2 = generate_embeddings.load_model()
GAZE_SEGMENTATION_RESULTS_PATH = Path("data/gaze_segmentation_results")
SAME_BACKGROUND_VECTOR_INDEXES_PATH = Path("data/vector_indexes/same_background")
DIFF_BACKGROUND_VECTOR_INDEXES_PATH = Path("data/vector_indexes/diff_background")

In [4]:
def create_grounding_dataset(dataset_path: Path, gaze_segmentation_results: list[Any], index: faiss.IndexIDMap, k: int):
    """
    Create a grounding dataset CSV file with raw candidate distances for each object in each frame.

    For each gaze segmentation result:
      - Retrieves the frame index, regions of interest (rois), and associated object IDs.
      - Computes embeddings for each ROI and searches the provided index to get k candidate matches.
      - For each ROI, iterates over each candidate by pairing the candidate’s class ID with its raw distance.
      - Writes each candidate as an individual row in the output DataFrame with the following columns:
          "frame_idx", "object_id", "class_id", and "distance".
    
    Parameters:
        dataset_path (Path): The file path where the CSV dataset will be saved.
        gaze_segmentation_results (list[Any]): List containing gaze segmentation results; each element
                                                 must have "frame_idx", "rois", and "object_ids".
        index (faiss.IndexIDMap): A FAISS index (wrapped in an IndexIDMap) used to retrieve candidates.
        k (int): The number of nearest neighbors (candidates) to retrieve.
    
    Returns:
        None. The resulting DataFrame is saved to a CSV file at dataset_path.
    """
    result_rows = []
    for result in gaze_segmentation_results:
        frame_idx = result["frame_idx"]
        rois = result["rois"]
        object_ids = result["object_ids"]

        # Get embeddings (assuming one batch is returned)
        embeddings, _, _ = list(generate_embeddings.get_embeddings(dinov2, rois))[0]
        per_roi_distances, per_roi_class_ids = generate_embeddings.search_index(
            index, embeddings, k=k
        )

        for i, roi in enumerate(rois):  # iterate over each ROI in the frame
            object_id = object_ids[i]
            distances = per_roi_distances[i]
            class_ids = per_roi_class_ids[i]

            # Write each candidate (class id and corresponding raw distance) as a separate row.
            for cid, d in zip(class_ids, distances, strict=False):
                result_rows.append({
                    "frame_idx": frame_idx,
                    "object_id": object_id,
                    "class_id": cid,
                    "distance": d,
                })

    pd.DataFrame(result_rows).to_csv(dataset_path, index=False)


In [5]:
GROUNDING_DATASETS_PATH = Path("data/grounding_datasets")

for trial_recording in tqdm(trial_recordings, desc="Processing trial recordings"):
    if trial_recording.uuid not in FULLY_LABELED_RECORDINGS:
        continue

    # Load gaze segmentation results for this recording
    gaze_segmentation_results_path = GAZE_SEGMENTATION_RESULTS_PATH / trial_recording.uuid
    gaze_segmentation_results = list(gaze_segmentation_results_path.iterdir())
    gaze_segmentation_results.sort(key=lambda x: int(x.stem))
    gaze_segmentation_results = [
        np.load(result, allow_pickle=True) for result in gaze_segmentation_results
    ]

    # Create the grounding datasets directory for this recording
    grounding_datasets_path = GROUNDING_DATASETS_PATH / trial_recording.uuid
    if grounding_datasets_path.exists():
        shutil.rmtree(grounding_datasets_path)
    grounding_datasets_path.mkdir(parents=True, exist_ok=True)

    # Create grounding datasets for each index and for each k value
    vector_index_paths = list(SAME_BACKGROUND_VECTOR_INDEXES_PATH.iterdir())
    for vector_index_path in tqdm(vector_index_paths, desc="Processing vector indexes", leave=False):
        sample_count = int(vector_index_path.name.split("_")[0])
        index = faiss.read_index(str(vector_index_path))

        print(f"Creating grounding dataset for sample_count={sample_count}")
        grounding_dataset_path = grounding_datasets_path / f"grounding_dataset_{sample_count}_samples.csv"
        create_grounding_dataset(
            dataset_path=grounding_dataset_path,
            gaze_segmentation_results=gaze_segmentation_results,
            index=index,
            k=sample_count,
        )


Processing trial recordings:   0%|          | 0/14 [00:00<?, ?it/s]

Creating grounding dataset for sample_count=200




Creating grounding dataset for sample_count=300




Creating grounding dataset for sample_count=100




Creating grounding dataset for sample_count=600




Creating grounding dataset for sample_count=400




Creating grounding dataset for sample_count=500


Processing trial recordings:  14%|█▍        | 2/14 [02:15<13:30, 67.56s/it]

Creating grounding dataset for sample_count=200




Creating grounding dataset for sample_count=300




Creating grounding dataset for sample_count=100




Creating grounding dataset for sample_count=600




Creating grounding dataset for sample_count=400




Creating grounding dataset for sample_count=500


Processing trial recordings: 100%|██████████| 14/14 [05:14<00:00, 22.45s/it]
