# Label video frames for training supervised ML models

Simple approach to loading in a movie and allowing the user to label each frame using keystrokes assigned to particular behaviors

In [1]:
import cv2
import numpy as np
from tqdm.notebook import tqdm
from tqdm import tnrange

In [2]:
#####################################################
####### Load Video Frames ##########################
#####################################################
def LoadVideoFrames(video_file,num_frames=None):
    video = cv2.VideoCapture(file)
    frames = []
    
    if num_frames is None:
        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

    for i in tqdm(range(num_frames),desc='Loading video'):
        # Read video capture
        ret, frame = video.read()
        gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)

        frames.append(gray)

        key = cv2.waitKey(1)

    video.release()
    
    return frames

#####################################################
####### Play Video Frames ###########################
#####################################################
def PlayVideoFrames(frames):
    
    playVideo = True

    frame_counter = 0
    while playVideo is True:

        frame = frames[frame_counter]
        cv2.imshow('video',frame)

        key = cv2.waitKey(0)

        while key not in [ord('q'),ord(','),ord('.')]:
            key = cv2.waitKey(0)

        if key == ord('.'):
            frame_counter += 1
        elif key == ord(','):
            frame_counter -= 1
        if key == ord('q'):
            break
            
    cv2.destroyAllWindows()

#####################################################
####### Play & Label Video Frames ###################
#####################################################
def on_trackbar(val):
    return

def PlayAndLabelFrames(frames,label_dict = {'w':'walking','t':'turning','s':'standing'}):
    '''
    Set up variables
    '''
    #create numpy array to store the labels. Initialize as strings of zeros
    labels = np.zeros(len(frames)).astype('str')

    #get the key ords and names for each label
    label_ords = [ord(k) for k in list(label_dict.keys())]
    label_names = list(label_dict.values())
    #create a dictionary that maps the key ords to the label names
    #i.e. replacing keystrokes with key ords as the dict keys
    label_key_dict = {}
    for label_ord,label_name in zip(label_ords,label_names):
        label_key_dict[label_ord] = label_name

    #initialize frame_counter, set PlayVideo boolean to True, and start displaying video
    #for labeling
    playVideo = True
    frame_counter = 0

    # create display window
    cv2.namedWindow('Video',cv2.WINDOW_NORMAL)
    cv2.resizeWindow('Video',800,800)
    cv2.createTrackbar('frame', 'Video', 0,len(frames),on_trackbar)
    '''
    Play & Label Video
    '''
    while playVideo is True:

        #if at the end of the video, loop back to beginning
        if frame_counter == len(frames):
            frame_counter -= len(frames)

        #get current frame & display it
        frame_counter = cv2.getTrackbarPos('frame','Video')
        frame = frames[frame_counter]
        cv2.imshow('Video',frame)

        #wait for keypress
        key = cv2.waitKey(0)

        '''
        Check to see if the user pressed any of the label keys
        '''
        if key in label_ords:
            #get the label name
            label = label_key_dict[key]
            #annotate the frame with the label text
            cv2.rectangle(frame,(0,1024),(250,950),(0,0,0),-1) #need a solid background so that...
            #...the labels can be overwritten
            cv2.putText(frame,label,(0,1000),cv2.FONT_HERSHEY_COMPLEX,1,(255,255,255),2,cv2.LINE_AA)

            #update the frame (with annotation)
            frames[frame_counter] = frame
            #update the label array with current label
            labels[frame_counter] = label

            '''
        Now check to see if the user to trying to control the playback
        '''
        elif key == ord(','): # if `<` then go back
            frame_counter -= 1
            cv2.setTrackbarPos("frame","Video", frame_counter)

        elif key == ord('.'): # if `>` then advance
            frame_counter += 1
            cv2.setTrackbarPos("frame","Video", frame_counter)

        elif key == ord('q'): #if `q` then quit
            playVideo = False


    #close any opencv windows    
    cv2.destroyAllWindows()
    
    #return labels
    return labels

In [3]:
label_dict = {'w':'walking','t':'turning','s':'standing'}

In [3]:
#%%time
file = '/home/sneufeld/Desktop/752_openfield.avi'
num_frames = 1000
frames = LoadVideoFrames(file,num_frames)

HBox(children=(IntProgress(value=0, description='Loading video', max=1000, style=ProgressStyle(description_wid…




In [4]:
labels = PlayAndLabelFrames(frames)

In [5]:
labels

array(['0.0', '0.0', '0.0', '0.0', '0.0', 'turning', 'turning', 'turning',
       'turning', 'turning', 'turning', 'turning', 'turning', 'turning',
       'turning', 'turning', 'turning', 'turning', '0.0', '0.0', '0.0',
       'turning', 'turning', 'turning', 'turning', 'turning', '0.0',
       '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0',
       '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0',
       '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0',
       '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0',
       '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0',
       '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0',
       '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0',
       '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0',
       '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0',
       '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0',
       '0.0