In [1]:
# IMPORTS


import numpy as np
import pandas as pd
import av

### VIDEOFRAME INFO EXTRACTOR

In [None]:
def frametimes(video_path):

    # Open the video file
    container = av.open(video_path)

    # Get the video stream (usually index 0 for the first video stream)
    video_stream = container.streams.video[0]


    # Retrieve video info
    total_frames = video_stream.frames  # Number of frames
    duration_ts = video_stream.duration  # Total duration in time units (PTS)
    time_base = video_stream.time_base  # Time base to convert duration_ts to seconds
    duration_seconds = float(duration_ts * time_base) # Duration of the video in seconds

    # Print the results
    print(f"Total Frames: {total_frames}")
    print(f"Duration (PTS units): {duration_ts}")
    print(f"Time Base: {time_base}")
    print(f"Duration (seconds): {duration_seconds} \n")


    # Storage for frame intervals
    frame_intervals = []


    # Decode video frames
    for i, frame in enumerate(container.decode(video=0)):
        # TODO: If the video stream is corrupted, it could cause infinitely repeated decoding attempts. Might need if check in case it's unstable during testing

        # Update frame info
        start_time = float(frame.pts * time_base)
        frame_intervals.append(start_time)
    
        if i < 5:
            print(f"Frame {i + 1}: starttime {start_time}, PTS {frame.pts}, DTS {frame.dts}")

    end_time = duration_seconds
    frame_intervals.append(end_time)

    # print("")
    # print(frame_intervals)

    return total_frames, frame_intervals

### GET IMU DATAFRAME

In [4]:
def getIMU(dataframe_path):
    # Load the CSV file into a pandas DataFrame
    df = pd.read_csv(dataframe_path)


    # Display the top and bottom rows
    print("Top rows:")
    print(df.head())

    print("\nBottom rows:")
    print(df.tail())

    return df

### ADD FRAME_INDEX TO DATAFRAME

In [5]:
def add_frame_index(df, total_frames, frame_intervals):

    # Make a list of all frame indices
    frame_labels = list(range(1, total_frames + 1))

    if "TIMESTAMP" not in df.columns:
        raise ValueError("The dataframe must contain a 'TIMESTAMP' column.")

    # Match FRAME_INDEX to TIMESTAMP
    df["FRAME_INDEX"] = pd.cut(df["TIMESTAMP"], bins=frame_intervals, labels=frame_labels, include_lowest=True)

    return df

### CONVERT LABELED FRAMES TO DATAFRAME 

In [6]:
def dict_to_labeledframes(dict_list):
    
    df_label = pd.DataFrame(dict_list)

    return df_label

### MAKE A DICTIONARY FOR LABELS

In [7]:
# Function that stores frame indices to a label
def sort_frametolabels(df_label):

    label_dict = {}
    
    for _, row in df_label.iterrows():
        frame_indices = list(range(row["frame_start"], row["frame_end"] + 1))
        label = row["label"]


        if label in label_dict:
            label_dict[label].extend(frame_indices)
        else:
            label_dict[label] = frame_indices

    return label_dict

### ADD LABEL TO DATAFRAME

In [8]:
# Function to assign labels
def assign_label(frame_index, label_dict):
    for label, frame_indices in label_dict.items():
        if frame_index in frame_indices:
            return label
    return None  # For values not in the label_dict


# Match LABEL to FRAME_INDEX
def match_labeltoframe(df, label_dict):

    if "FRAME_INDEX" not in df.columns:
        raise ValueError("The dataframe must contain a 'FRAME_INDEX' column.")

    df["LABEL"] = df["FRAME_INDEX"].apply(lambda frame_index: assign_label(frame_index, label_dict))

    return df

### FUNCTION TO RUN EVERYTHING

In [9]:
def runner(video_path, dataframe, dataframe_labeled_frames):

    total_frames, frame_intervals = frametimes(video_path)
    dataframe = add_frame_index(dataframe, total_frames, frame_intervals)
    label_dict = sort_frametolabels(dataframe_labeled_frames)
    dataframe = match_labeltoframe(dataframe, label_dict)
    dataframe.dropna(subset=['FRAME_INDEX'], inplace=True) # Remove data outside video duration (before the first or after the last frame)

    return dataframe

In [None]:
# Path to the video
video_path = "data/GH010041.MP4"

# Path to DataFrame
dataframe_path = "data/IMU_GH010041.csv"
dataframe = getIMU(dataframe_path)
print("")

# Path to labeled DataFrame
#dict_list = [{'label': 'opstappen', 'frame_start': 1, 'frame_end': 1550}, {'label': 'links', 'frame_start': 29225, 'frame_end': 29350}, {'label': 'rechtdoor', 'frame_start': 25416, 'frame_end': 25883}, {'label': 'links', 'frame_start': 25884, 'frame_end': 25937}, {'label': 'rechtdoor', 'frame_start': 28847, 'frame_end': 29224}, {'label': 'rechtdoor', 'frame_start': 28509, 'frame_end': 28728}, {'label': 'rechtdoor', 'frame_start': 29651, 'frame_end': 31357}, {'label': 'rechtdoor', 'frame_start': 1873, 'frame_end': 3765}, {'label': 'rechtdoor', 'frame_start': 7693, 'frame_end': 9644}, {'label': 'rechtdoor', 'frame_start': 5025, 'frame_end': 5993}, {'label': 'links', 'frame_start': 1767, 'frame_end': 1872}, {'label': 'rechtdoor', 'frame_start': 3898, 'frame_end': 4874}, {'label': 'rechts', 'frame_start': 26007, 'frame_end': 26052}, {'label': 'rechts', 'frame_start': 27293, 'frame_end': 27493}, {'label': 'rechts', 'frame_start': 17205, 'frame_end': 17352}, {'label': 'rechts', 'frame_start': 15396, 'frame_end': 15567}, {'label': 'rechts', 'frame_start': 9645, 'frame_end': 9735}, {'label': 'links', 'frame_start': 31358, 'frame_end': 31447}, {'label': 'rechts', 'frame_start': 10508, 'frame_end': 10667}]
#dataframe_labeled_frames = dict_to_labeledframes(dict_list)
#print(dataframe_labeled_frames)
#print("")

# Path to labeled DataFrame
labeled_dataframe_path = "data\GH010041_anonymized_labeled.csv"
dataframe_labeled_frames = getIMU(labeled_dataframe_path)
print(dataframe_labeled_frames)
print("")

dataframe = runner(video_path, dataframe, dataframe_labeled_frames)

In [None]:
display(dataframe.head(5095))
display(dataframe.tail(1360))

In [None]:
display(dataframe.head(3940))

In [13]:
path = 'data/labeled_GH010041.csv'
dataframe.to_csv(path, index=False)