In [None]:
import gzip
import json
import os
import shutil
from collections import Counter
from pathlib import Path

from src.utils import extract_frames_to_dir
from tqdm import tqdm
from settings import RECORDING_FRAMES_PATH

# Check if the expected number of recordings are present

In [4]:
# Define the base paths using absolute paths
RAW_EXPERIMENT_RECORDINGS_PATH = os.path.abspath("data/raw/experiment_recordings/")
RAW_LABELING_RECORDINGS_PATH = os.path.abspath("data/raw/labeling_recordings/")

# List all subdirectories (full paths) in the experiment recordings folder
experiment_dirs = [
    os.path.join(RAW_EXPERIMENT_RECORDINGS_PATH, d)
    for d in os.listdir(RAW_EXPERIMENT_RECORDINGS_PATH)
    if os.path.isdir(os.path.join(RAW_EXPERIMENT_RECORDINGS_PATH, d))
]

# List all subdirectories (full paths) in the labeling recordings folder
labeling_dirs = [
    os.path.join(RAW_LABELING_RECORDINGS_PATH, d)
    for d in os.listdir(RAW_LABELING_RECORDINGS_PATH)
    if os.path.isdir(os.path.join(RAW_LABELING_RECORDINGS_PATH, d))
]

# Assert the counts
assert len(experiment_dirs) == 14, (
    f"Expected 14 recordings in experiment recordings folder, but found {len(experiment_dirs)}."
)
assert len(labeling_dirs) == 2, (
    f"Expected 2 recordings in labeling recordings folder, but found {len(labeling_dirs)}."
)

print("Experiment recordings count:", len(experiment_dirs))
print("Labeling recordings count:", len(labeling_dirs))

Experiment recordings count: 14
Labeling recordings count: 2


# Preprocess the necessary recording files and save them to processed directory 

In [5]:
PROCESSED_RECORDINGS_PATH = Path("data/recordings/")
if PROCESSED_RECORDINGS_PATH.exists():
    shutil.rmtree(PROCESSED_RECORDINGS_PATH)
PROCESSED_RECORDINGS_PATH.mkdir(parents=True, exist_ok=True)

In [6]:
def preprocess_recording(recording_path, output_path, assert_in_metadata=True):
    # read metadata
    with open(recording_path / "recording.g3") as file:
        metadata = json.load(file)
        recording_id = metadata["uuid"]

        if assert_in_metadata:
            # Check if the recording id is in the metadata
            assert recording_id in recordings_metadata.keys(), (
                f"Recording {recording_id} not found in metadata."
            )

    # copy the recording to the output path with name recording_id.mp4 (from recording_path/scenevideo.mp4)
    scene_video_path = recording_path / "scenevideo.mp4"
    output_video_path = output_path / f"{recording_id}.mp4"
    shutil.copy(scene_video_path, output_video_path)

    # unzip gaze data and save as recording_id.tsv (from recording_path/gazedata.gz)
    with gzip.open(recording_path / "gazedata.gz") as zipFile:
        with open(output_path / f"{recording_id}.tsv", "wb") as unzippedFile:
            for line in zipFile:
                unzippedFile.write(line)


for recording in experiment_dirs:
    preprocess_recording(Path(recording), PROCESSED_RECORDINGS_PATH)

for recording in labeling_dirs:
    preprocess_recording(
        Path(recording), PROCESSED_RECORDINGS_PATH, assert_in_metadata=False
    )

# Extract Frames for all Recordings

In [None]:
recordings = list(PROCESSED_RECORDINGS_PATH.glob("*.mp4"))
recording_ids = [recording.stem for recording in recordings]

if RECORDING_FRAMES_PATH.exists():
    shutil.rmtree(RECORDING_FRAMES_PATH)
RECORDING_FRAMES_PATH.mkdir(parents=True, exist_ok=True)

for recording_id in tqdm(recording_ids, desc="Extracting frames"):
    recording_video_path = PROCESSED_RECORDINGS_PATH / f"{recording_id}.mp4"
    recording_frames_path = RECORDING_FRAMES_PATH / recording_id
    recording_frames_path.mkdir(parents=True, exist_ok=True)
    extract_frames_to_dir(recording_video_path, recording_frames_path)

Extracting frames:   0%|          | 0/16 [00:00<?, ?it/s]

Extracting frames: 100%|██████████| 16/16 [02:28<00:00,  9.31s/it]


# Build the sqlite database for the application 

> # Note: This code is OUTDATED

In [None]:
# from datetime import datetime, timedelta

# from sqlalchemy.orm import Session
# from src.api.db import Base, engine
# from src.api.models.db import CalibrationRecording, Recording, SimRoom, SimRoomClass

In [None]:
# # Prepare an empty database
# Base.metadata.drop_all(bind=engine)
# Base.metadata.create_all(bind=engine)

# with Session(engine) as session:
#     # Create a SimRoom
#     simroom = SimRoom(name="Controlled Experiment Room")
#     session.add()
#     # Flush to get the ID without committing the transaction
#     session.flush()

#     # Create Classes:
#     for i, object_name in enumerate(object_counter.keys()):
#         simroom_class = SimRoomClass(
#             id=i + 1,
#             simroom_id=simroom.id,
#             class_name=object_name,
#         )
#         session.add(simroom_class)

#     # Create recordings
#     for recording_path in experiment_dirs + labeling_dirs:
#         with open(os.path.join(recording_path, "recording.g3")) as file:
#             metadata = json.load(file)
#             recording_id = metadata["uuid"]
#             visible_name = metadata["name"]
#             created = datetime.strptime(metadata["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
#             created = created.isoformat()
#             duration = str(timedelta(seconds=metadata["duration"]))
#             scene_video_url = "not important"
#             gaze_data_url = "not important"

#         with open(os.path.join(recording_path, "meta/participant")) as file:
#             meta = json.load(file)
#             participant = meta["name"]

#             # Manually override wrong participant name
#             if participant == "":
#                 participant = "Ilian Opname 12"
#             elif participant == "Ilian Opname 12":
#                 participant = "Ilian Opname 14"
#             elif participant == "Ilian Opname 10 b":
#                 participant = "Ilian Opname 10"

#         recording = Recording(
#             id=recording_id,
#             visible_name=visible_name,
#             created=created,
#             duration=duration,
#             scene_video_url=scene_video_url,
#             gaze_data_url=gaze_data_url,
#             participant=participant,
#         )
#         session.add(recording)
#         # Flush to get the ID without committing the transaction
#         session.flush()

#         calibration_recording = CalibrationRecording(
#             simroom_id=simroom.id, recording_id=recording_id
#         )
#         session.add(calibration_recording)

#     session.commit()