In [None]:
pip install keras_video_generators



In [None]:
import keras
from tensorflow.keras.applications import ResNet50V2
import zipfile
import keras_video
import os
import glob
import keras
import keras_video.utils

In [None]:
zip_ref = zipfile.ZipFile("/content/drive/MyDrive/Colab Notebooks/HMDB.zip", 'r')
zip_ref.extractall("/tmp")
zip_ref.close()

#Define our class names based on the folder structure
classNames = ["sit", "throw", "walk"]

In [None]:
def convolutionalLayer():
    m = keras.Sequential()
    m.add(keras.layers.Conv2D(filters=64,
                              kernel_size=(4,4),
                              activation="tanh",
                              input_shape = (224,224,3)))
    m.add(keras.layers.BatchNormalization())
    m.add(keras.layers.GlobalMaxPool2D()) 
    m.add(keras.layers.Dropout(0.4))    

    return(m)


def exampleRNN():
    
    m = keras.models.Sequential() 
    
    #The "TimeDistributed" layer here is what applies the convolutionaLayer we defined above to every frame.
    #Note the input shape starts with 10, or the number of frames we created for each video.
    m.add(keras.layers.TimeDistributed(convolutionalLayer(), input_shape=(10,224,224,3)))
    
    #Here, we implement a Simple RNN, which is a fully connected layer with a recurrent element.
    #The number of units you define here will define the number of inputs into the SimpleRNN during future
    #iterations. I.e., in this example, our RNN will have 1024 outputs (which are then used as inputs)
    #and a dropoout of 0.5 to support regularization.
    #https://keras.io/api/layers/recurrent_layers/simple_rnn/ 
    m.add(keras.layers.LSTM(units=1024, dropout = 0.5)) 
    m.add(keras.layers.Dense(units=512))
    m.add(keras.layers.Dropout(0.2))
    m.add(keras.layers.Dense(units=3)) #8 classes in our reduced HMDB dataset
    m.compile(optimizer=keras.optimizers.SGD(learning_rate=.1),
                                            loss='categorical_hinge',
                                            metrics=['categorical_accuracy'])
    
    return(m)

transforms = keras.preprocessing.image.ImageDataGenerator(horizontal_flip=True)

train = keras_video.VideoFrameGenerator(
    classes = classNames,
    glob_pattern = '/tmp/HMDB/{classname}/*.avi',
    nb_frames = 10, 
    batch_size = 24, 
    target_shape = (224,224),
    nb_channel = 3, 
    split_val = 0.25, 
    transformation = transforms,
    use_frame_cache = False #You may have to turn this off - it requires a lot of memory to work, but speeds things up.
)

test = train.get_validation_generator()

#Note we need more time to train our RNN, as we have added a large number of parameters that need to be fit.
model = exampleRNN()
model.fit(train, epochs=200, validation_data=test)

model.save("./Q4.h5")

class sit, validation count: 35, train count: 107
class throw, validation count: 25, train count: 77
class walk, validation count: 137, train count: 411
Total data: 3 classes for 595 files for train
Total data: 3 classes for 197 files for validation
Epoch 1/200
Epoch 2/200
Epoch 3/200

KeyboardInterrupt: ignored