This notebook can be used to extract frames from raw drone footage in a systematic way so that the annotation for the training data can be rich and diverse enough to train a robust object detection model.

### Imports

In [3]:
import os
import sys
import cv2
import json
import numpy as np

sys.path.append("..")
from src.labelbox_processing import random_top_left, save_frame

### TODO

In [4]:
# fill in the path on your machine
videos_folder = "/data/huanga/Synchrony/videos" # where the raw training videos are stored
frames_folder = "/data/huanga/Synchrony/frames" # where the training frames will be saved

os.makedirs(videos_folder, exist_ok=True)
os.makedirs(frames_folder, exist_ok=True)

### Get the videos and define extraction rules

In [None]:
# get the videos to extract frames from
json_file = "list_videos.json"
with open(json_file) as f:
    video_splits = json.load(f)   
video_type = "train"
video_names = video_splits[video_type]


min_frame = 0         # minimum frame index to extract
max_frame = None      # maximum frame index to extract
num_extract = 20      # number of frames to extract and save
save_triplet = True   # save triplet of frames around focal frame
triplet_spacing = 30  # number of frames between focal frame and triplet frames
crop_size = 1000      # Either none for whole image or size in pixels (square crops)
save_folder = os.path.join(frames_folder, video_type)   # folder to save frames

### Extract frames from videos

In [None]:
# extract frames from each video
for video_name in video_names:

    # get the properties of the video
    video_file = os.path.join(videos_folder, video_name)
    video_name = os.path.splitext(video_name)[0] 
    cap = cv2.VideoCapture(video_file)
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    max_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + 1
    if save_triplet:
        max_frame -= triplet_spacing
        min_frame += triplet_spacing
    
    # Randomly choose the specified number of frames to extract from the given range
    frame_nums_to_save = np.random.randint(min_frame, max_frame, num_extract)
    for frame_num in frame_nums_to_save:
        
        # handle file naming
        frame_file = os.path.join(save_folder, f"{video_name}_frame_{frame_num}")
        if crop_size:
            top_left = random_top_left([height, width], crop_size, gaussian=True)
            frame_file += f"_top_{top_left[0]}_left_{top_left[1]}"
        
        # 1) append an 'f' to the file name if the frame is the focal frame
        save_frame(cap, frame_num, frame_file+"_f.jpg", crop_size, top_left)
        if save_triplet:
            
            # 2) append an 'a' to the file name if the frame is the next frame in the triplet
            next_frame_num = frame_num + triplet_spacing
            frame_file = os.path.join(save_folder, f"{video_name}_frame_{frame_num}")
            if crop_size:
                frame_file += f"_top_{top_left[0]}_left_{top_left[1]}"
            save_frame(cap, next_frame_num, frame_file+"_a.jpg", crop_size, top_left)
            
            # 3) append a 'b' to the file name if the frame is the previous frame in the triplet
            prev_frame_num = frame_num - triplet_spacing 
            frame_file = os.path.join(save_folder, f"{video_name}_frame_{frame_num}")
            if crop_size:
                frame_file += f"_top_{top_left[0]}_left_{top_left[1]}"
            save_frame(cap, prev_frame_num, frame_file+"_b.jpg",crop_size, top_left)
    cap.release()