In [2]:
import cv2
import math
import numpy as np

In [20]:
def get_frames_with_labels(video_filepath, event_timestamp_millis, sequence_length, frame_step):
    vid_cap = cv2.VideoCapture(video_filepath)

    if not vid_cap.isOpened():
        print("Error opening video")
        return None

    vid_frame_count = vid_cap.get(cv2.CAP_PROP_FRAME_COUNT)
    
    vid_fps = vid_cap.get(cv2.CAP_PROP_FPS)
    event_frame_index = math.floor(event_timestamp_millis / 1000 * vid_fps) # todo: revisit this if there is issue with indexing too early

    print(f'Event is at frame {event_frame_index}.')
    
    max_frame_steps_event_to_beginning = math.floor(event_frame_index / frame_step)
    max_frame_steps_event_to_end = math.floor((vid_frame_count - 1 - event_frame_index)/ frame_step) # reason for minus 1 is this is about index and not about frame count
    max_possible_sequence_length = max_frame_steps_event_to_end + max_frame_steps_event_to_beginning + 1
    
    # plus 1 to include the event frame itself
    if sequence_length > max_possible_sequence_length:
        print(f"Not possible for frame step {frame_step} and sequence length {sequence_length}. Maximum possible sequence length is {max_possible_sequence_length}")
        return None

    # Min and max start frame in which the event frame is still included at the exact point
    # considering the requested sequence length and frame step size.
    min_start_frame_idx = event_frame_index - frame_step * min(sequence_length - 1, max_frame_steps_event_to_beginning)
    max_start_frame_idx = event_frame_index - frame_step * max(0, sequence_length - 1 - max_frame_steps_event_to_end)

    frame_indexes = []
    
    for start_frame_idx in range(min_start_frame_idx, (max_start_frame_idx + frame_step), frame_step):
        # generate the index
        frame_indexes.append([i for i in range(start_frame_idx, start_frame_idx + sequence_length * frame_step, frame_step)])

    frame_indexes = np.array(frame_indexes)
    labels = np.array(frame_indexes >= event_frame_index, dtype=np.int8)
    
    return frame_indexes, labels

# test
video_filepath = 'data/running_1.mp4'

sample_result = get_frames_with_labels(video_filepath, event_timestamp_millis=1000, sequence_length=40, frame_step=13)
assert sample_result is not None
print(f'Data shape: {sample_result[0].shape}')
print(f'Label shape: {sample_result[1].shape}')
print(sample_result)

sample_result = get_frames_with_labels(video_filepath, event_timestamp_millis=1000, sequence_length=774, frame_step=13)
assert sample_result is None

sample_result = get_frames_with_labels(video_filepath, event_timestamp_millis=1000, sequence_length=60, frame_step=13)
assert sample_result is not None
print(f'Data shape: {sample_result[0].shape}')
print(f'Label shape: {sample_result[1].shape}')
print(sample_result)

Event is at frame 30.
Data shape: (3, 40)
Label shape: (3, 40)
(array([[  4,  17,  30,  43,  56,  69,  82,  95, 108, 121, 134, 147, 160,
        173, 186, 199, 212, 225, 238, 251, 264, 277, 290, 303, 316, 329,
        342, 355, 368, 381, 394, 407, 420, 433, 446, 459, 472, 485, 498,
        511],
       [ 17,  30,  43,  56,  69,  82,  95, 108, 121, 134, 147, 160, 173,
        186, 199, 212, 225, 238, 251, 264, 277, 290, 303, 316, 329, 342,
        355, 368, 381, 394, 407, 420, 433, 446, 459, 472, 485, 498, 511,
        524],
       [ 30,  43,  56,  69,  82,  95, 108, 121, 134, 147, 160, 173, 186,
        199, 212, 225, 238, 251, 264, 277, 290, 303, 316, 329, 342, 355,
        368, 381, 394, 407, 420, 433, 446, 459, 472, 485, 498, 511, 524,
        537]]), array([[0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1