In [1]:
import json
import os
import tempfile
from pathlib import Path

import cv2
import numpy as np
from sqlalchemy.orm import Session
from src.api.controllers.calibration_recording_controller import (
    AnnotatedClassResponse,
    get_annotated_classes,
    get_recording_path,
    get_gaze_data_path
)
from src.config import TOBII_GLASSES_FPS
from src.db import engine
from src.db.models import CalibrationRecording
from src.utils import (
    cv2_video_resolution,
    draw_annotation_on_frame,
    extract_frames_to_dir,
    cv2_video_fps,
    cv2_video_frame_count
)
from tqdm import tqdm
import torch
from src.api.controllers.gaze_segmentation import mask_was_viewed, parse_gazedata_file, get_gaze_points, match_frames_to_gaze, get_gaze_point_per_frame
from src.logic.glasses.domain import GazePoint
import pandas as pd
from src.config import VIEWED_RADIUS
import subprocess

In [2]:
with open("experiment_metadata.json") as file:
    experiment_metadata = json.load(file)
    trial_recordings_metadata = experiment_metadata["trial_recordings_metadata"]
    trial_recording_uuids = list(trial_recordings_metadata.keys())
    labeling_same_background_uuid = experiment_metadata["labeling_same_background_uuid"]
    labeling_diff_background_uuid = experiment_metadata["labeling_diff_background_uuid"]

with Session(engine) as session:
    calibration_recordings = session.query(CalibrationRecording).all()

    trial_recordings = {
        cr.recording_uuid: cr
        for cr in calibration_recordings
        if cr.recording_uuid in trial_recording_uuids
    }

# Validate Labeling Data

Here, we will validate the labeling data so that we can be sure that the data is correct and that the labels are correct. We will also check for any missing values in the data.

In [3]:
for recording_uuid, trial_metadata in trial_recordings_metadata.items():
    calibration_recording = trial_recordings[recording_uuid]
    annotated_classes = get_annotated_classes(calibration_recording.id)
    class_name_to_annotation_paths = {
        anno_class.class_name: anno_class.annotation_paths
        for anno_class in annotated_classes
    }
    trial_objects_metadata = trial_metadata["objects"]

    assert len(trial_objects_metadata) == 5, "Number of objects in trial is not 5"
    assert len(annotated_classes) == 5, "Number of annotated classes is not 5"

    for object_metadata in trial_objects_metadata:
        class_name, _, _ = object_metadata

        assert class_name in class_name_to_annotation_paths, (
            "Original object class name not in annotated classes"
        )

        annotation_paths = class_name_to_annotation_paths[class_name]
        assert len(annotation_paths) > 0, f"Annotation paths for {class_name} are empty"

# Create the ground truth dataset

In [4]:
print(
    np.load(
        "/home/zilian/projects/bachelorproef/experiments/controlled_experiment/data/labeling_results/2/1/0.npz"
    ).files
)

['mask', 'box', 'roi', 'class_id', 'frame_idx']


In [5]:
def get_viewed_annotations_per_frame(
    annotated_classes: list[AnnotatedClassResponse],
    gaze_point_per_frame: dict[int, GazePoint],
    video_resolution: tuple[int, int],
):
    # Gather all annotation paths for each annotated frame
    annotations_per_frame: dict[int, list[Path]] = {}
    for anno_class in annotated_classes:
        for annotation_path in anno_class.annotation_paths:
            frame_idx = int(annotation_path.stem)

            annotation_file = np.load(annotation_path)
            mask = annotation_file["mask"]
            x1, y1, x2, y2 = annotation_file["box"]

            # Put the mask in a tensor of the same size as the video frame
            mask_full = np.zeros(video_resolution, dtype=np.uint8)
            mask_full[y1:y2, x1:x2] = mask
            mask_full_torch = torch.from_numpy(mask_full)

            gaze_point = gaze_point_per_frame.get(frame_idx, None)
            if gaze_point is None:
                continue

            if mask_was_viewed(mask_full_torch, gaze_point.position):
                if frame_idx not in annotations_per_frame:
                    annotations_per_frame[frame_idx] = []

                annotations_per_frame[frame_idx].append(annotation_path)
    
    return annotations_per_frame
    

In [6]:
def draw_validation_video_frames(
    frames: list[Path],
    annotations_per_frame: dict[int, list[Path]],
    gaze_point_per_frame: dict[int, GazePoint],
    annotated_classes : list[AnnotatedClassResponse],
):
    class_id_to_annotated_class = {
        anno_class.id: anno_class for anno_class in annotated_classes
    }

    # Iterate over frames and draw the annotations on them if they exist
    for frame in tqdm(frames, desc="Drawing annotations on frames"):
        frame_idx = int(frame.stem) 
        frame_img = cv2.imread(str(frame))
        
        if annotations_per_frame.get(frame_idx) is not None:
            for annotation_path in annotations_per_frame[frame_idx]:
                annotation_file = np.load(annotation_path)
                class_id = int(annotation_file["class_id"])
                x1, y1, x2, y2 = annotation_file["box"]
                mask = annotation_file["mask"]

                # Squeeze mask if it has an extra dimension
                if mask.ndim == 3 and mask.shape[0] == 1:
                    mask = mask[0]
                if mask.dtype != bool:
                    mask = mask.astype(bool)

                class_color_hex = class_id_to_annotated_class[class_id].color
                class_name = class_id_to_annotated_class[class_id].class_name
                box = (x1, y1, x2, y2)

                # Annotate the frame using the reusable function
                frame_img = draw_annotation_on_frame(
                    frame_img, mask, box, class_color_hex, class_name
                )
        
        # Draw the gaze point on the frame
        gaze_point = gaze_point_per_frame.get(frame_idx, None)
        if gaze_point is not None:
            gaze_x, gaze_y = gaze_point.position
            cv2.circle(
                frame_img,
                (int(gaze_x), int(gaze_y)),
                radius=VIEWED_RADIUS,
                color=(0, 0, 255),
                thickness=2,
            )

        # Save the modified image back to its original location
        cv2.imwrite(str(frame), frame_img)

In [7]:
LABELING_VALIDATION_VIDEOS_PATH = Path("data/labeling_validation_videos")
if not LABELING_VALIDATION_VIDEOS_PATH.exists():
    os.makedirs(LABELING_VALIDATION_VIDEOS_PATH)
else:
    for file in LABELING_VALIDATION_VIDEOS_PATH.glob("*.mp4"):
        os.remove(file)

CREATE_VALIDATION_VIDEO = True

GROUND_TRUTH_PATH = Path("data/ground_truth.csv")
if GROUND_TRUTH_PATH.exists():
    GROUND_TRUTH_PATH.unlink()
ground_truth_df = pd.DataFrame(
    columns=["recording_uuid", "frame_idx", "class_id", "mask_area"]
)

for recording_uuid, trial_metadata in tqdm(trial_recordings_metadata.items()):
    calibration_recording = trial_recordings[recording_uuid]
    annotated_classes = get_annotated_classes(calibration_recording.id)

    # Get statistics of the video
    trial_recording_path = get_recording_path(calibration_recording.id)
    trial_video_resolution = cv2_video_resolution(trial_recording_path)
    trial_video_fps = cv2_video_fps(trial_recording_path)
    trial_video_frame_count = cv2_video_frame_count(trial_recording_path)

    # Load and preprocess gaze points
    print(f"Loading gaze data for {recording_uuid}")
    gaze_data_path = get_gaze_data_path(calibration_recording.id)
    gaze_point_per_frame = get_gaze_point_per_frame(
        gaze_data_path=gaze_data_path,
        resolution=trial_video_resolution,
        frame_count=trial_video_frame_count,
        fps=trial_video_fps,
    )

    # Get all annotations that were viewed
    print(f"Getting viewed annotations for {recording_uuid}")
    annotations_per_frame = get_viewed_annotations_per_frame(
        annotated_classes=annotated_classes,
        gaze_point_per_frame=gaze_point_per_frame,
        video_resolution=trial_video_resolution,
    )

    # Build the ground truth DataFrame
    # TODO: Might be interesting to add blur metric per frame to the ground truth dataset
    print(f"Building ground truth for {recording_uuid}")
    for frame_idx, annotation_paths in annotations_per_frame.items():
        for annotation_path in annotation_paths:
            annotation_file = np.load(annotation_path)
            class_id = int(annotation_file["class_id"])
            mask_area = np.sum(annotation_file["mask"])

            ground_truth_df = pd.concat(
                [
                    ground_truth_df,
                    pd.DataFrame(
                        {
                            "recording_uuid": [recording_uuid],
                            "frame_idx": [frame_idx],
                            "class_id": [class_id],
                            "mask_area": [mask_area],
                        }
                    ),
                ]
            )

    if CREATE_VALIDATION_VIDEO:
        # Extract frames from the video and save them to a temporary directory
        print(f"Extracting frames for {recording_uuid}")
        tmp_frames_dir = tempfile.TemporaryDirectory()
        tmp_frames_path = Path(tmp_frames_dir.name)
        extract_frames_to_dir(
            video_path=trial_recording_path, frames_path=tmp_frames_path, print_output=False
        )
        frames = sorted(list(tmp_frames_path.glob("*.jpg")), key=lambda x: int(x.stem))

        print(f"Drawing annotations for {recording_uuid}")
        draw_validation_video_frames(
            frames=frames,
            annotations_per_frame=annotations_per_frame,
            gaze_point_per_frame=gaze_point_per_frame,
            annotated_classes=annotated_classes,
        )

        print(f"Creating video for {recording_uuid}")
        cmd = f'ffmpeg -hwaccel cuda -y -pattern_type glob -framerate {TOBII_GLASSES_FPS} -i "{tmp_frames_path!s}/*.jpg" -c:v libx264 -pix_fmt yuv420p "{LABELING_VALIDATION_VIDEOS_PATH}/{recording_uuid}.mp4"'
        subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

ground_truth_df.to_csv(
    GROUND_TRUTH_PATH,
    index=False,
)


  0%|          | 0/14 [00:00<?, ?it/s]

Loading gaze data for 67b71a70-da64-467a-9fb6-91bc29265fd1
Getting viewed annotations for 67b71a70-da64-467a-9fb6-91bc29265fd1
Building ground truth for 67b71a70-da64-467a-9fb6-91bc29265fd1
Extracting frames for 67b71a70-da64-467a-9fb6-91bc29265fd1
Drawing annotations for 67b71a70-da64-467a-9fb6-91bc29265fd1


Drawing annotations on frames: 100%|██████████| 2064/2064 [00:14<00:00, 144.20it/s]


Creating video for 67b71a70-da64-467a-9fb6-91bc29265fd1


  7%|▋         | 1/14 [01:02<13:30, 62.32s/it]

Loading gaze data for 32f02db7-adc0-4556-a2da-ed2ba60a58c9
Getting viewed annotations for 32f02db7-adc0-4556-a2da-ed2ba60a58c9
Building ground truth for 32f02db7-adc0-4556-a2da-ed2ba60a58c9
Extracting frames for 32f02db7-adc0-4556-a2da-ed2ba60a58c9
Drawing annotations for 32f02db7-adc0-4556-a2da-ed2ba60a58c9


Drawing annotations on frames: 100%|██████████| 1365/1365 [00:09<00:00, 142.38it/s]


Creating video for 32f02db7-adc0-4556-a2da-ed2ba60a58c9


 14%|█▍        | 2/14 [01:43<10:01, 50.12s/it]

Loading gaze data for b8eeecc0-06b1-47f7-acb5-89aab3c1724d
Getting viewed annotations for b8eeecc0-06b1-47f7-acb5-89aab3c1724d
Building ground truth for b8eeecc0-06b1-47f7-acb5-89aab3c1724d
Extracting frames for b8eeecc0-06b1-47f7-acb5-89aab3c1724d
Drawing annotations for b8eeecc0-06b1-47f7-acb5-89aab3c1724d


Drawing annotations on frames: 100%|██████████| 1557/1557 [00:11<00:00, 136.83it/s]


Creating video for b8eeecc0-06b1-47f7-acb5-89aab3c1724d


 21%|██▏       | 3/14 [02:29<08:49, 48.17s/it]

Loading gaze data for d50c5f3b-2822-4462-9880-5a8f0dd46bfb
Getting viewed annotations for d50c5f3b-2822-4462-9880-5a8f0dd46bfb
Building ground truth for d50c5f3b-2822-4462-9880-5a8f0dd46bfb
Extracting frames for d50c5f3b-2822-4462-9880-5a8f0dd46bfb
Drawing annotations for d50c5f3b-2822-4462-9880-5a8f0dd46bfb


Drawing annotations on frames: 100%|██████████| 1500/1500 [00:10<00:00, 138.80it/s]


Creating video for d50c5f3b-2822-4462-9880-5a8f0dd46bfb


 29%|██▊       | 4/14 [03:18<08:05, 48.51s/it]

Loading gaze data for 9fa3e3b8-ed94-4b06-ba49-e66e3997d710
Getting viewed annotations for 9fa3e3b8-ed94-4b06-ba49-e66e3997d710
Building ground truth for 9fa3e3b8-ed94-4b06-ba49-e66e3997d710
Extracting frames for 9fa3e3b8-ed94-4b06-ba49-e66e3997d710
Drawing annotations for 9fa3e3b8-ed94-4b06-ba49-e66e3997d710


Drawing annotations on frames: 100%|██████████| 1229/1229 [00:08<00:00, 141.18it/s]


Creating video for 9fa3e3b8-ed94-4b06-ba49-e66e3997d710


 36%|███▌      | 5/14 [03:54<06:34, 43.83s/it]

Loading gaze data for 98128cdc-ffeb-40cb-9528-573e25028e87
Getting viewed annotations for 98128cdc-ffeb-40cb-9528-573e25028e87
Building ground truth for 98128cdc-ffeb-40cb-9528-573e25028e87
Extracting frames for 98128cdc-ffeb-40cb-9528-573e25028e87
Drawing annotations for 98128cdc-ffeb-40cb-9528-573e25028e87


Drawing annotations on frames: 100%|██████████| 1543/1543 [00:08<00:00, 172.03it/s]


Creating video for 98128cdc-ffeb-40cb-9528-573e25028e87


 43%|████▎     | 6/14 [04:39<05:54, 44.35s/it]

Loading gaze data for 89b60530-e0e4-4f5d-9ee6-af85c8d99ff4
Getting viewed annotations for 89b60530-e0e4-4f5d-9ee6-af85c8d99ff4
Building ground truth for 89b60530-e0e4-4f5d-9ee6-af85c8d99ff4
Extracting frames for 89b60530-e0e4-4f5d-9ee6-af85c8d99ff4
Drawing annotations for 89b60530-e0e4-4f5d-9ee6-af85c8d99ff4


Drawing annotations on frames: 100%|██████████| 1270/1270 [00:08<00:00, 143.46it/s]


Creating video for 89b60530-e0e4-4f5d-9ee6-af85c8d99ff4


 50%|█████     | 7/14 [05:17<04:56, 42.29s/it]

Loading gaze data for 2fe01600-c057-40ee-8434-4e9e0688ca2d
Getting viewed annotations for 2fe01600-c057-40ee-8434-4e9e0688ca2d
Building ground truth for 2fe01600-c057-40ee-8434-4e9e0688ca2d
Extracting frames for 2fe01600-c057-40ee-8434-4e9e0688ca2d
Drawing annotations for 2fe01600-c057-40ee-8434-4e9e0688ca2d


Drawing annotations on frames: 100%|██████████| 2041/2041 [00:13<00:00, 154.41it/s]


Creating video for 2fe01600-c057-40ee-8434-4e9e0688ca2d


 57%|█████▋    | 8/14 [06:21<04:54, 49.13s/it]

Loading gaze data for 67823ccd-a1f0-4cde-b954-3b9e5fe160c1
Getting viewed annotations for 67823ccd-a1f0-4cde-b954-3b9e5fe160c1
Building ground truth for 67823ccd-a1f0-4cde-b954-3b9e5fe160c1
Extracting frames for 67823ccd-a1f0-4cde-b954-3b9e5fe160c1
Drawing annotations for 67823ccd-a1f0-4cde-b954-3b9e5fe160c1


Drawing annotations on frames: 100%|██████████| 1554/1554 [00:11<00:00, 135.63it/s]


Creating video for 67823ccd-a1f0-4cde-b954-3b9e5fe160c1


 64%|██████▍   | 9/14 [07:09<04:03, 48.72s/it]

Loading gaze data for b214c60b-7521-495b-a699-e223da0c77c1
Getting viewed annotations for b214c60b-7521-495b-a699-e223da0c77c1
Building ground truth for b214c60b-7521-495b-a699-e223da0c77c1
Extracting frames for b214c60b-7521-495b-a699-e223da0c77c1
Drawing annotations for b214c60b-7521-495b-a699-e223da0c77c1


Drawing annotations on frames: 100%|██████████| 1440/1440 [00:10<00:00, 139.65it/s]


Creating video for b214c60b-7521-495b-a699-e223da0c77c1


 71%|███████▏  | 10/14 [07:51<03:06, 46.72s/it]

Loading gaze data for b8f453aa-5a12-4cbb-a0ec-20eb503f8797
Getting viewed annotations for b8f453aa-5a12-4cbb-a0ec-20eb503f8797
Building ground truth for b8f453aa-5a12-4cbb-a0ec-20eb503f8797
Extracting frames for b8f453aa-5a12-4cbb-a0ec-20eb503f8797
Drawing annotations for b8f453aa-5a12-4cbb-a0ec-20eb503f8797


Drawing annotations on frames: 100%|██████████| 1364/1364 [00:09<00:00, 142.77it/s]


Creating video for b8f453aa-5a12-4cbb-a0ec-20eb503f8797


 79%|███████▊  | 11/14 [08:27<02:09, 43.30s/it]

Loading gaze data for 7ae61789-7a26-4c31-abef-4ab49a34abfd
Getting viewed annotations for 7ae61789-7a26-4c31-abef-4ab49a34abfd
Building ground truth for 7ae61789-7a26-4c31-abef-4ab49a34abfd
Extracting frames for 7ae61789-7a26-4c31-abef-4ab49a34abfd
Drawing annotations for 7ae61789-7a26-4c31-abef-4ab49a34abfd


Drawing annotations on frames: 100%|██████████| 1358/1358 [00:09<00:00, 143.97it/s]


Creating video for 7ae61789-7a26-4c31-abef-4ab49a34abfd


 86%|████████▌ | 12/14 [09:07<01:24, 42.36s/it]

Loading gaze data for 6f3e2ccf-51f6-4377-8b84-63a3c16928a8
Getting viewed annotations for 6f3e2ccf-51f6-4377-8b84-63a3c16928a8
Building ground truth for 6f3e2ccf-51f6-4377-8b84-63a3c16928a8
Extracting frames for 6f3e2ccf-51f6-4377-8b84-63a3c16928a8
Drawing annotations for 6f3e2ccf-51f6-4377-8b84-63a3c16928a8


Drawing annotations on frames: 100%|██████████| 1458/1458 [00:08<00:00, 167.91it/s]


Creating video for 6f3e2ccf-51f6-4377-8b84-63a3c16928a8


 93%|█████████▎| 13/14 [09:51<00:43, 43.05s/it]

Loading gaze data for 5235be94-da01-43b5-8827-92a51d32ce30
Getting viewed annotations for 5235be94-da01-43b5-8827-92a51d32ce30
Building ground truth for 5235be94-da01-43b5-8827-92a51d32ce30
Extracting frames for 5235be94-da01-43b5-8827-92a51d32ce30
Drawing annotations for 5235be94-da01-43b5-8827-92a51d32ce30


Drawing annotations on frames: 100%|██████████| 1368/1368 [00:10<00:00, 132.19it/s]


Creating video for 5235be94-da01-43b5-8827-92a51d32ce30


100%|██████████| 14/14 [10:31<00:00, 45.14s/it]
