### After videos used for annotation have been split into train, val, and test sets this notebook extracts specific frames (or crops of frames) for annotation
#### User specifies number of frames, size of crops, and if crops should be more likely to come from center of image

In [7]:
import os
import glob
import json
import cv2
import numpy as np


geladas_root = os.path.dirname(os.path.dirname(os.getcwd()))
local_paths_file = os.path.join(geladas_root, 'local-paths.json')
print(local_paths_file)
with open(local_paths_file, "r") as json_file:
    local_paths = json.load(json_file)

import sys
sys.path.append(local_paths['general_dection_path'])
from augmentation.augmentation import random_top_left 
from functions.video_processing import save_frame

/home/golden/overhead-video-worked-examples/geladas/local-paths.json


In [2]:
import json

# This file records which raw video files should be used for 
# training, validation, and testing
# Format of .json is:
# {"train": ["file1.mp4", "file2.mp4"], 
#  "val": ["file3.mp4", "file4.mp4"],
#  "test": ["file5.mp4", "file6.mp4"]
# }
json_file = "video_train_val_test_split.json"
with open(json_file) as f:
    video_splits = json.load(f)

In [7]:
# Path to main folder within which images will be saved
annotations_folder = os.path.join(local_path['annotations_folder'],
                                  "extracted_frames", "crops")
os.makedirs(annotations_folder, exist_ok=True)
# Path to folder where videos are saved
video_folder_path = local_path['videos_folder']

# Which type of videos to extract from based on naming in our saved .json file
# this could be "train", "validation", or "test"
video_type = "validation"

# list of video names to use
video_names = video_splits[video_type]

save_folder = os.path.join(annotations_folder, video_type)
# setting min and max frames can be used if part of video will go in the training set
# and part of the video will go in the validation set
min_frame = 0 # begining of range frames could be extracted from
max_frame = None # end of range (exclusive) frames could be extracted from (
                 # set to "None" if range should extend through last frame)
num_extract = 3 # number of frames to extract and save
save_triplet = True # If True, for each frame should save a frame a little before 
                    # and a little after focal frame (Can be helpful when 
                    # movement helps find cryptic individuals)
triplet_spacing = 30 # How many frames in future and past to space outer 
                     # triplet frames around focal frame (ignored if 
                    # save_triplet is False)
crop_size = 1000 # Either none for whole image or size in pixels (square crops)

In [8]:
for video_name in video_names:
    video_file = os.path.join(video_folder_path, video_name)
    video_name = os.path.splitext(video_name)[0] # remove extension

    cap = cv2.VideoCapture(video_file)
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    if max_frame is None:
        max_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + 1
    if save_triplet:
        # Only choose frames for annotation that have space before and after
        # for all frames in triplet
        max_frame -= triplet_spacing
        min_frame += triplet_spacing
    # Randomly choose the specified number of frames to extract from the given range
    frame_nums_to_save = np.random.randint(min_frame, max_frame, num_extract)
    for frame_num in frame_nums_to_save:
        frame_file = os.path.join(save_folder, f"{video_name}_frame_{frame_num}")
        if crop_size:
            # If gaussian is True, then random crops sampled from gaussian 
            # centered at center of frame with 1 std equal to half height/width 
            # of the frame
            top_left = random_top_left([height, width], crop_size, gaussian=True)
            # Add where crop comes from to file name so we can find it in the 
            # original image later if we want to
            frame_file += f"_top_{top_left[0]}_left_{top_left[1]}"
        # Naming convention here is to append an 'f' if the focal frame that will
        # be annotated and a 'a' or 'b' if the first or last frame in a triplet
        save_frame(cap, frame_num, frame_file+"_f.jpg", crop_size, top_left)
        if save_triplet:
            next_frame_num = frame_num + triplet_spacing
            frame_file = os.path.join(save_folder, f"{video_name}_frame_{frame_num}")
            if crop_size:
                frame_file += f"_top_{top_left[0]}_left_{top_left[1]}"
            save_frame(cap, next_frame_num, frame_file+"_a.jpg", 
                       crop_size, top_left
                      )
            prev_frame_num = frame_num - triplet_spacing 
            frame_file = os.path.join(save_folder, f"{video_name}_frame_{frame_num}")
            if crop_size:
                frame_file += f"_top_{top_left[0]}_left_{top_left[1]}"
            save_frame(cap, prev_frame_num, frame_file+"_b.jpg",
                       crop_size, top_left
                      )
    cap.release()