In [None]:
import utils
import numpy as np
import os
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import copy
import tensorflow as tf
import matplotlib.pyplot as plt

# Data loading

In [None]:
classes = ['hug', 'kiss', 'highfive', 'handshake']
csv_list = os.listdir('out/handshake')

In [None]:
class_videos = {}
total = []

factorize_classes = {_class: key for (_class, key) in zip(classes, range(len(classes)))}

idx = 0

for _class in classes:
    if os.path.isdir('out/'+_class):
        class_videos[_class] = os.listdir('out/'+_class)

for _class, files in class_videos.items():
    for file in files:
        if os.path.isfile("out/handshake/"+file):
            if file.split('.')[-1] == 'csv':
                csv = pd.read_csv("out/handshake/"+file)
                
                file_id = int(file.split('.')[0].lstrip('0'))
                video_number = pd.DataFrame({'_id': idx, 'video':[file_id]*csv.shape[0]})

                csv['result'] = factorize_classes[_class]
                total.append(pd.concat([video_number, csv], axis=1))
                idx += 1


In [None]:
result = pd.concat(total, ignore_index=True)

# Data preprocessing

In [None]:
score_columns = list(filter(lambda x: x.endswith('score'), list(result.columns)))

In [None]:
result = result.fillna(0)

In [None]:
result.iloc[:, 4:-1] = result.iloc[:, 4:-1].replace(0, -1)

In [None]:
result

In [None]:
(result.iloc[:, 4:-1] == 0).sum().sum() / result.iloc[:, 4:-1].size

In [None]:
print('Proportion of NAs cells in dataset: ' + str((result.iloc[:, 4:-1] == -1).sum().sum() / result.iloc[:, 4:-1].size))
print('Proportion of NAs rows in dataset: ' + str((result.iloc[:, 4:-1] == -1).sum(1).count() / result.iloc[:, 4:-1].shape[0]))

In [None]:
(result[score_columns] == -1).sum().sort_values(ascending=False)

In [None]:
# columns with parts of lower body which might poorly contribute to prediction of interactions
to_drop = list(filter(lambda x: x.find('Ankle') != -1 or x.find('Hip') != -1 or x.find('Knee') != -1, list(result.columns)))

In [None]:
result.drop(score_columns, axis=1, inplace=True)
#result.drop(set(to_drop)-set(score_columns), axis=1, inplace=True)

In [None]:
result.columns

In [None]:
result_array = np.array(result)

### Preparing data for feeding

In [None]:
class_indices = {}

#remember row indices by class
for class_id, _class in enumerate(classes):
    class_indices[_class] = (np.argwhere(result_array[:, -1] == class_id)).flatten().tolist()

In [None]:
video_id_indices = []

#remember row indices by video_id
for x in sorted(set(result_array[:, 0])):
    video_id_indices.append((np.argwhere(result_array[:, 0] == x)).flatten().tolist())

In [None]:
result_array = np.delete(result_array, 0, 1)
result_array = np.delete(result_array, 1, 1)
result_array = np.delete(result_array, 2, 1)

targets = result_array[:, -1]
result_array = np.delete(result_array, -1, 1)
n_features = result_array.shape

In [None]:
x = [result_array[i] for i in video_id_indices]
y = [targets[i] for i in video_id_indices]

In [None]:
y = [int(np.amax(y[i])) for i in range(len(y))] #reduce y shape to (200, )

In [None]:
from operator import itemgetter
def split(X, y, test_size=0.1):
    assert len(X) == len(y)
    
    y_arr = np.array(y)
    onehot = np.zeros((y_arr.size, y_arr.max()+1))
    onehot[np.arange(y_arr.size),y_arr] = 1
    
    shuffled = np.random.permutation(list(range(len(X))))
    split_at = int(len(shuffled) * test_size)
    
    X = itemgetter(*shuffled)(X)
    y = itemgetter(*shuffled)(onehot)
    
    train_X = X[split_at:]
    train_y = y[split_at:]
    
    test_X = X[:split_at]
    test_y = y[:split_at]

    return (train_X, test_X, train_y, test_y)

In [None]:
train_X, test_X, train_y, test_y = split(x, y)

# Model

In [None]:
epochs = 100

def gen_batch(X, y):
    assert len(x) == len(y)

    for _ in range(epochs):
        for i in range(len(X)):
            yield np.array([X[i]]), np.atleast_1d([y[i]])

In [None]:
train_batch = gen_batch(train_X, train_y)
test_batch = gen_batch(test_X, test_y)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

In [None]:
model = Sequential([
    layers.Input(shape=[None, n_features], dtype=tf.float64),
    layers.LSTM(50, input_shape=(None, n_features), return_sequences=True),
    layers.GlobalAveragePooling1D(),
    layers.Dense(70, activation='relu'),
    layers.Dense(50, activation='relu',kernel_regularizer=tf.keras.regularizers.L1()),
    layers.Dense(len(classes), activation='softmax')
])
model.summary()

In [None]:
model.compile(optimizer=tf.keras.optimizers.SGD(), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), metrics=['accuracy'])

In [None]:
h = model.fit(train_batch, verbose=1, epochs=100, validation_data=test_batch, steps_per_epoch=180, validation_steps=20)