In [23]:
#Template Project to build off of
import cv2 as cv2
import numpy as np
import tensorflow as tf
from tensorflow import keras
import os

In [24]:
IMG_SIZE = 256
LABELS = ["FIST", "HAND"]

In [18]:
def crop_center(frame):
    y, x = frame.shape[0:2]
    min_dimension = min(y, x)
    start_x = x//2 - (min_dimension//2)
    start_y = y//2 - (min_dimension//2)
    return frame[start_y:start_y+min_dimension,start_x:start_x+min_dimension]

In [20]:
#From a video file (.mp4) extract every n-th frame and return them as a numpy array
#This will allow us to extract frames from a video and use them to train our model
#Every frame would be way too much data but we can test and hone in exactly how many 
#frames will be needed for a good model

def load_video(video_file, max_frames, resize=(IMG_SIZE, IMG_SIZE), n=1):
    video = cv2.VideoCapture(video_file)
    frames = []
    
    try:
        current_frame = 0
        while(True):
            ret,frame = video.read()
            if not ret:
                break
            if current_frame % n == 0:
                frame = crop_center(frame)
                frame = cv2.resize(frame, resize)
                frame = frame[:, :, [2,1,0]]
                frames.append(frame)

            if len(frames) == max_frames:
                break
            current_frame += 1
    finally:
        video.release()
    return np.array(frames)

In [None]:
#A feature extractor will allow us to extarct only the most important parts
#of each frame and discard the rest. This will allow us to train our model
#faster and more efficiently
def create_feature_extractor():
    feature_extractor = keras.applications.InceptionV3(
        wights='imagenet',
        include_top=False,
        pooling="avg",
        input_shape=(IMG_SIZE, IMG_SIZE, 3)
    )
    pre_process_input = keras.applications.inception_v3.preprocess_input

    inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    preprocessed = pre_process_input(inputs)
    outputs = feature_extractor(preprocessed)
    return keras.Model(inputs, outputs, name="feature_extractor")

In [25]:
label_processor = keras.layers.StringLookup(
    num_oov_indices=0, vocabulary=np.unique(LABELS)
)
print(label_processor.get_vocabulary())

['FIST', 'HAND']


In [14]:
# NEXT STEP: Create function using the above helper functions to load all the 
# video data that we plan to use, splitting them into training and validation sets
# along with their labels
# Then we can make a simple model to train on the data on, hopefully capable of 
# predicting the correct label for a given video (Fist or Hand)