## Dependencies

In [None]:
import os
import shutil
import cv2
import glob
import random
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam

## Data Preprocess & Split

In [10]:
keep_hmdb51 = ["clap", "climb", "drink", "jump", "pour", "ride_bike", "ride_horse", 
        "run", "shoot_bow", "smoke", "throw", "wave"]

TRAIN_TAG, TEST_TAG = 1, 2
train_files, test_files = [], []
train_labels, test_labels = [], []
split_pattern_name = f"*test_split1.txt"
split_pattern_path = os.path.join('new_HMDB51/test_train_splits', split_pattern_name)
annotation_paths = glob.glob(split_pattern_path)
for filepath in annotation_paths:
    class_name = '_'.join(filepath.split('/')[-1].split('_')[:-2])
    if class_name not in keep_hmdb51:
        continue  # skipping the classes that we won't use.
    with open(filepath) as fid:
        lines = fid.readlines()
    for line in lines:
        video_filename, tag_string = line.split()
        tag = int(tag_string)
        if tag == TRAIN_TAG:
            train_files.append(video_filename)
            train_labels.append(class_name)
        elif tag == TEST_TAG:
            test_files.append(video_filename)
            test_labels.append(class_name)

## Frame Selection

### Start, End & Random

In [7]:
options = ['start', 'end', 'random']

for option in options:
    # Loop over all categories/classes and extract random frame
    for category in keep_hmdb51:
        # set the path to the directory containing the videos
        video_dir = f'new_HMDB51/video_data/{category}'

        # get a list of all .avi files in the directory
        video_files = [f for f in os.listdir(video_dir) if f.endswith('.avi')]

        # iterate over each video file
        for video_file in video_files:

            video_path = os.path.join(video_dir, video_file)
            video = cv2.VideoCapture(video_path)
            num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

            if option == 'start':
                # generate a frame near the start 
                frame_num = num_frames//10
            elif option == 'end':
                # generate a frame near the end
                frame_num = num_frames//1.1
            elif option == 'random':
                # generate a random frame 
                frame_num = random.randint(5, num_frames - 5)

            # set the frame number to read
            video.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
            ret, frame = video.read()
            video.release()
            if ret:
                path = f'new_HMDB51/frames/{option}/{category}'
                os.makedirs(path, exist_ok=True)
                # set the path to save the image
                img_path = os.path.join(path, os.path.splitext(video_file)[0] + '.jpg')

                # save the image
                cv2.imwrite(img_path, frame)

            else:
                print('Unable to extract frame!')

## Data Load

### Start frame

In [5]:
x_train = []
x_test = []

for file in train_files:
    vid_name = file[:-4]
    for path, _, files in os.walk('new_HMDB51/frames/start'):
        for file_name in files:
            if file_name[:-4] == vid_name:
                img = cv2.imread(os.path.join(path, file_name))
                img = cv2.resize(img, (112, 112))
                x_train.append(img)
                
for file in test_files:
    vid_name = file[:-4]
    for path, _, files in os.walk('new_HMDB51/frames/start'):
        for file_name in files:
            if file_name[:-4] == vid_name:
                img = cv2.imread(os.path.join(path, file_name))
                img = cv2.resize(img, (112, 112))
                x_test.append(img)
                
x_train = np.asarray(x_train)/255.
x_test = np.asarray(x_test)/255.

# Dict to convert string labels to ints
str_to_int = {"clap":0, "climb":1, "drink":2, "jump":3, "pour":4, "ride_bike":5, "ride_horse":6, 
              "run":7, "shoot_bow":8, "smoke":9, "throw":10, "wave":11}

y_train = np.array([str_to_int[label] for label in train_labels])
y_test = np.array([str_to_int[label] for label in test_labels])

### End frame

In [10]:
x_train = []
x_test = []

for file in train_files:
    vid_name = file[:-4]
    for path, _, files in os.walk('new_HMDB51/frames/end'):
        for file_name in files:
            if file_name[:-4] == vid_name:
                img = cv2.imread(os.path.join(path, file_name))
                img = cv2.resize(img, (112, 112))
                x_train.append(img)
                
for file in test_files:
    vid_name = file[:-4]
    for path, _, files in os.walk('new_HMDB51/frames/end'):
        for file_name in files:
            if file_name[:-4] == vid_name:
                img = cv2.imread(os.path.join(path, file_name))
                img = cv2.resize(img, (112, 112))
                x_test.append(img)
                
x_train = np.asarray(x_train)/255.
x_test = np.asarray(x_test)/255.

# Dict to convert string labels to ints
str_to_int = {"clap":0, "climb":1, "drink":2, "jump":3, "pour":4, "ride_bike":5, "ride_horse":6, 
              "run":7, "shoot_bow":8, "smoke":9, "throw":10, "wave":11}

y_train = np.array([str_to_int[label] for label in train_labels])
y_test = np.array([str_to_int[label] for label in test_labels])

### Random frame

In [None]:
x_train = []
x_test = []

for file in train_files:
    vid_name = file[:-4]
    for path, _, files in os.walk('new_HMDB51/frames/random'):
        for file_name in files:
            if file_name[:-4] == vid_name:
                img = cv2.imread(os.path.join(path, file_name))
                img = cv2.resize(img, (112, 112))
                x_train.append(img)
                
for file in test_files:
    vid_name = file[:-4]
    for path, _, files in os.walk('new_HMDB51/frames/random'):
        for file_name in files:
            if file_name[:-4] == vid_name:
                img = cv2.imread(os.path.join(path, file_name))
                img = cv2.resize(img, (112, 112))
                x_test.append(img)
                
x_train = np.asarray(x_train)/255.
x_test = np.asarray(x_test)/255.

# Dict to convert string labels to ints
str_to_int = {"clap":0, "climb":1, "drink":2, "jump":3, "pour":4, "ride_bike":5, "ride_horse":6, 
              "run":7, "shoot_bow":8, "smoke":9, "throw":10, "wave":11}

y_train = np.array([str_to_int[label] for label in train_labels])
y_test = np.array([str_to_int[label] for label in test_labels])

## Model init

In [None]:
# Baseline from Assignment 4
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu', input_shape=(112, 112, 3)),
    tf.keras.layers.MaxPooling2D(pool_size=2),
    tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=2),
    tf.keras.layers.Conv2D(filters=128, kernel_size=3, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=512, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(units=12, activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=0.0005), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Load weights from Stanford dataset
model.load_weights('weights/stanford.h5')

# Freeze the convolutional layers of the model
model.layers[0].trainable = False
model.layers[2].trainable = False
model.layers[4].trainable = False

## Training & Testing

### Start frame

In [8]:
history = model.fit(x_train, y_train, epochs=10, batch_size=32)

2023-04-15 20:32:27.177206: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 126443520 exceeds 10% of free system memory.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [9]:
result = model.evaluate(x_test, y_test, batch_size=32)



### End frame

In [13]:
history = model.fit(x_train, y_train, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
result = model.evaluate(x_test, y_test, batch_size=32)



### Random frame

In [14]:
history = model.fit(x_train, y_train, epochs=10, batch_size=32)

Epoch 1/10


2023-04-15 20:46:38.050349: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 126443520 exceeds 10% of free system memory.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
result = model.evaluate(x_test, y_test, batch_size=32)

