### 0. Getting the right folder and files & import statements ###

In [1]:
# import statements
#from ultralytics import YOLO #obj detection algorithm
import cv2 # videos
import pandas as pd
import numpy as np
import os #to get files automatically

In [11]:
# Get participant number input
participant_number = input("Enter participant number: ")

# Define the base path
base_path = "/Users/alina/Downloads/Thesis/raw/"

# Build the folder path for the participant
folder_path = os.path.join(base_path, f"Participant {participant_number}")

# Get recording number input
recording_number = input("Enter recording number: ")

# Build the recording folder path
recording_path = os.path.join(folder_path, f"P{participant_number}_{recording_number}")

# Read the CSV files with dynamic paths
world_timestamps_file = os.path.join(recording_path, "world_timestamps.csv")
detections_file = os.path.join(recording_path, f"detections_{participant_number}_{recording_number}.csv")

fixations_file = os.path.join(recording_path, "fixations.csv")
face_file = os.path.join(recording_path, "fixations_on_face.csv")
#saccades_file = os.path.join(recording_path, "saccades.csv")

# Load the data
world_timestamps_df = pd.read_csv(world_timestamps_file)
detections_df = pd.read_csv(detections_file)
fixations_df = pd.read_csv(fixations_file)
face_df = pd.read_csv(face_file)
#saccades_df = pd.read_csv(saccades_file)

print(f"Video frame timestamps data loaded from: {world_timestamps_file}")
print(f"Detections data loaded from: {detections_file}")
#print(f"Gaze data loaded from: {gaze_file}")
print(f"Fixations data loaded from: {fixations_file}")
print(f"Face data loaded from: {face_file}")


Video frame timestamps data loaded from: /Users/alina/Downloads/Thesis/raw/Participant 16/P16_2/world_timestamps.csv
Detections data loaded from: /Users/alina/Downloads/Thesis/raw/Participant 16/P16_2/detections_16_2.csv
Fixations data loaded from: /Users/alina/Downloads/Thesis/raw/Participant 16/P16_2/fixations.csv
Face data loaded from: /Users/alina/Downloads/Thesis/raw/Participant 16/P16_2/fixations_on_face.csv


## Run yolo (not on laptop) ##

In [None]:
### 1. Annotating video and making a detections file (Lab's computer) ###
# Load the YOLOv8 model
model = YOLO("yolov8n.pt")

# input video
video_path = "/Users/alina/Downloads/Thesis/Python notebooks/2.mp4"  # Replace with needed video file path
cap = cv2.VideoCapture(video_path)

# input video information
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # Total number of frames
fr = cap.get(cv2.CAP_PROP_FPS)  # Frame rate (frames per second)
video_duration = frame_count / fr  # Duration in seconds

print(f"Total frames: {frame_count}")
print(f"Frame rate: {fr} FPS")
print(f"Video duration: {video_duration:.2f} seconds")

# for output video (annotated video)
output_path = "/Users/alina/Downloads/Thesis/Python notebooks/2yolo.mp4"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_path, fourcc, fr, (int(cap.get(3)), int(cap.get(4)))) #making sure the output video has same attributes as the input


## detections df
detections = [] # a list to for detection results
frame_count = 0 # initialising frame count

while cap.isOpened(): # a loop getting frames until the video is finished
    ret, frame = cap.read() 
    if not ret:
        break

    frame_count += 1  # Track the frame number

    # Run YOLOv8 detection on the current frame
    results = model(frame,classes=0)

    # Save detections to the list
    for result in results[0].boxes.data:
        x_min, y_min, x_max, y_max, confidence, class_id = result
        class_label = model.names[int(class_id)]  # get class name from class ID
        detections.append([frame_count, x_min.item(), y_min.item(), x_max.item(), y_max.item(), confidence.item(), class_label])

    # Annotate frame with bounding boxes and save to output video
    annotated_frame = results[0].plot()
    out.write(annotated_frame)

    ##  to display the annotated frame during the process
    '''cv2.imshow("YOLO Detection", annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break'''

# making a df
columns = ["Frame", "X_min", "Y_min", "X_max", "Y_max", "Confidence", "Class"]
detections_df = pd.DataFrame(detections, columns=columns)
saving_path = os.path.join(recording_path, f"detections_{participant_number}_{recording_number}.csv")
detections_df.to_csv(saving_path, index=False)


cap.release() # closes the video file
out.release() # closes the video writer
cv2.destroyAllWindows() # closes any OpenCV windows

display(detections_df)

## Matching video frames and timestamps

In [12]:
def is_gaze_in_bbox(gaze_x, gaze_y, x_min, y_min, x_max, y_max):
    return x_min <= gaze_x <= x_max and y_min <= gaze_y <= y_max

'''def drop_person_if_other_present(group):
    if "person" in group["Class"].values and len(group["Class"].unique()) > 1:
        return group[group["Class"] != "person"]
    return group'''

def filter_objects_ny_area(group):
    group = group.copy()
    group["area"] = (group["X_max"] - group["X_min"]) * (group["Y_max"] - group["Y_min"])

    # Find the minimum area
    min_area = group["area"].min()
    
    # Get all objects with the minimum area
    smallest_objects = group[group["area"] == min_area]
    
    if len(smallest_objects) == 1:
        return smallest_objects
    else:
        person_objects = smallest_objects[smallest_objects["Class"] == "person"]
        if not person_objects.empty:
            # Return only one person object (the first one)
            return person_objects.iloc[[0]]

    # If no person among smallest, return just one of the smallest objects
    return smallest_objects.iloc[[0]]


In [13]:
## world_timestamps.csv file from pupil cloud represents timestamps for each video frame
# merge_asof is like a nearest match join for ordered time-series data

# renaming so all timestamp columns are named the same to merge the dfs
fixations_df["timestamp [ns]"] = fixations_df["start timestamp [ns]"]

## making a video_df which contains matching frames and ts
ts = world_timestamps_df['timestamp [ns]']

video_df = pd.DataFrame({
        "Frame": np.arange(len(world_timestamps_df)),
        "timestamp [ns]": ts,
    })

# Make sure everything is sorted by timestamp so merge_asof works properky
fixations_df = fixations_df.sort_values("start timestamp [ns]")
world_timestamps_df = world_timestamps_df.sort_values("timestamp [ns]")

# Merge-asof to assign the closest frame_id to each fixation
fixations_with_frame = pd.merge_asof(
    fixations_df,
    video_df,
    on="timestamp [ns]",
    direction="nearest",  # to match the closest timestamp from video_df, whether it's before or after the fixation timestamp
) 
# merging to assign fixations to frames
fixations_with_detections = pd.merge(
    fixations_with_frame,
    detections_df,
    on="Frame",
    how="left" # keep all the fixations, even if a frame has no detection => nan
)

# calculate in_bbox 
fixations_with_detections["in_bbox"] = fixations_with_detections.apply(
    lambda row: is_gaze_in_bbox(
        row["fixation x [px]"], row["fixation y [px]"],
        row["X_min"], row["Y_min"], row["X_max"], row["Y_max"]
    ),
    axis=1
)

# filter only fixations that match at least one bounding box
in_bbox_fixations = fixations_with_detections[fixations_with_detections["in_bbox"] == True]

#  group by fixation ID and drop "person" if there is another class in same fixation group
filtered_fixations = in_bbox_fixations.groupby(
    "Frame", group_keys=False # grouping on frame as i need fixation id column later
).apply(filter_objects_ny_area, include_groups=False)

fixations_on_persons = filtered_fixations.loc[filtered_fixations["Class"] == "person"]
fixations_on_persons.head()

Unnamed: 0,section id,recording id,fixation id,start timestamp [ns],end timestamp [ns],duration [ms],fixation x [px],fixation y [px],azimuth [deg],elevation [deg],timestamp [ns],X_min,Y_min,X_max,Y_max,Confidence,Class,in_bbox,area
26,9b5a0d1f-12c2-4988-9998-7b6b38f14f85,3580a3f9-c0d9-4d8f-9fc1-da7ef301db97,4,1746785095341716784,1746785095461850784,120,843.471,1025.316,1.534394,-26.81454,1746785095341716784,820.516479,69.223709,1600.0,1190.921387,0.517064,person,True,874344.9
28,9b5a0d1f-12c2-4988-9998-7b6b38f14f85,3580a3f9-c0d9-4d8f-9fc1-da7ef301db97,5,1746785095551969784,1746785095732091784,180,784.365,889.247,-2.451384,-18.10789,1746785095551969784,762.237244,22.584534,1600.0,1196.884033,0.734929,person,True,983784.4
32,9b5a0d1f-12c2-4988-9998-7b6b38f14f85,3580a3f9-c0d9-4d8f-9fc1-da7ef301db97,6,1746785095747103784,1746785095822216784,75,769.611,922.684,-3.458678,-20.23918,1746785095747103784,0.0,3.021393,934.661865,1198.393555,0.268208,person,True,1117269.0
37,9b5a0d1f-12c2-4988-9998-7b6b38f14f85,3580a3f9-c0d9-4d8f-9fc1-da7ef301db97,7,1746785095912216784,1746785096107465784,195,809.119,992.592,-0.832406,-24.721621,1746785095912216784,0.0,5.963669,1203.208618,1195.463623,0.269487,person,True,1431217.0
40,9b5a0d1f-12c2-4988-9998-7b6b38f14f85,3580a3f9-c0d9-4d8f-9fc1-da7ef301db97,8,1746785096152465784,1746785096407715784,255,801.852,1011.751,-1.337943,-25.946193,1746785096152465784,0.0,0.0,807.322388,1197.517822,0.351183,person,True,966782.9


In [14]:
filtered_fixations["Class"].unique()

array(['person', 'cell phone', 'baseball glove', 'bus', 'bicycle',
       'suitcase', 'horse', 'skateboard', 'remote', 'potted plant',
       'train', 'backpack', 'truck', 'handbag', 'car', 'parking meter',
       'umbrella', 'clock', 'bench', 'surfboard', 'airplane'],
      dtype=object)

In [15]:
#filtered_fixations[30:50]

In [16]:

fixations_on_persons = fixations_on_persons[fixations_on_persons["in_bbox"] == True]

categorized_fixations_all = []

for _, row in face_df.iterrows():
    timestamp = row['start timestamp [ns]']
    fixation_id = row['fixation id']
    fix_duration = (row['end timestamp [ns]'] - row['start timestamp [ns]'])/1000000 #converting to ms
    fixation_type = "background" #make default type

    # face fixations
    if row.get('fixation on face') == True:
        fixation_type = "face"

    # body fixations (only if not already labeled as face)
    elif not fixations_on_persons[fixations_on_persons['timestamp [ns]'] == timestamp].empty:
        fixation_type = "body"
        matching_row = fixations_on_persons[fixations_on_persons['timestamp [ns]'] == timestamp].iloc[0]
        fixation_id = matching_row['fixation id']
        fix_duration = matching_row['duration [ms]']

    categorized_fixations_all.append({
        'timestamp [ns]': timestamp,
        'type': fixation_type,
        'fixation id': fixation_id,
        'duration [ms]': fix_duration
    })

# a new df
all_categorized_fixations_df = pd.DataFrame(categorized_fixations_all)
saving_path = os.path.join(recording_path, f"fixations_on_everything_{participant_number}_{recording_number}.csv")
all_categorized_fixations_df.to_csv(saving_path, index=False)

In [17]:
#all_categorized_fixations_df[70:90]

In [18]:
'''## comparing with only person detections
categorized_fixations_file = os.path.join(recording_path, f"categorized_fixations_{participant_number}_{recording_number}.csv")
categorised_fixations_df = pd.read_csv(categorized_fixations_file)

n_face_fixations = len(categorised_fixations_df[categorised_fixations_df['type'] == 'face'])
n_body_fixations = len(categorised_fixations_df[categorised_fixations_df['type'] == 'body'])
n_background_fixations = len(categorised_fixations_df[categorised_fixations_df['type'] == 'background'])
print(f"Fixations - Face: {n_face_fixations}, Body: {n_body_fixations}, Background: {n_background_fixations}")

n_face = len(all_categorized_fixations_df[all_categorized_fixations_df['type'] == 'face'])
n_body = len(all_categorized_fixations_df[all_categorized_fixations_df['type'] == 'body'])
n_back = len(all_categorized_fixations_df[all_categorized_fixations_df['type'] == 'background'])

print(f"Fixations - Face: {n_face}, Body: {n_body}, Background: {n_back}")'''

'## comparing with only person detections\ncategorized_fixations_file = os.path.join(recording_path, f"categorized_fixations_{participant_number}_{recording_number}.csv")\ncategorised_fixations_df = pd.read_csv(categorized_fixations_file)\n\nn_face_fixations = len(categorised_fixations_df[categorised_fixations_df[\'type\'] == \'face\'])\nn_body_fixations = len(categorised_fixations_df[categorised_fixations_df[\'type\'] == \'body\'])\nn_background_fixations = len(categorised_fixations_df[categorised_fixations_df[\'type\'] == \'background\'])\nprint(f"Fixations - Face: {n_face_fixations}, Body: {n_body_fixations}, Background: {n_background_fixations}")\n\nn_face = len(all_categorized_fixations_df[all_categorized_fixations_df[\'type\'] == \'face\'])\nn_body = len(all_categorized_fixations_df[all_categorized_fixations_df[\'type\'] == \'body\'])\nn_back = len(all_categorized_fixations_df[all_categorized_fixations_df[\'type\'] == \'background\'])\n\nprint(f"Fixations - Face: {n_face}, Body: 

## For epochs ##

In [19]:
## For triggers ##
fixations_on_persons = fixations_with_detections[fixations_with_detections["in_bbox"] == True]

categorized_fixations_triggers = []

for _, row in face_df.iterrows():
    timestamp = row['start timestamp [ns]']
    fixation_type = "background"

    # face fixations
    if row.get('fixation on face') == True:
        fixation_type = "face"

    # body fixations (only if not already labeled as face)
    elif not fixations_on_persons[fixations_on_persons['timestamp [ns]'] == timestamp].empty:
        fixation_type = "body"
        matching_row = fixations_on_persons[fixations_on_persons['timestamp [ns]'] == timestamp].iloc[0]

    categorized_fixations_triggers.append({
        'timestamp [ns]': timestamp,
        'type': fixation_type,
    })

# a new df
categorized_fixations_triggers_df = pd.DataFrame(categorized_fixations_triggers)
saving_path = os.path.join(recording_path, f"for_epochs_{participant_number}_{recording_number}.csv")
categorized_fixations_triggers_df.to_csv(saving_path, index=False)
