In [107]:
import os
import re

import numpy as np 
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib as mpl
import matplotlib.pyplot as plt

from tensorflow import keras

# MLflow dashboard
import mlflow
mlflow.set_tracking_uri('http://35.228.45.76:5000')
os.environ['GOOGLE_APPLICATION_CREDENTIALS']='../../keys/mlflow-312506-8cfad529f4fd.json'

# Import data augmentation
import sys
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from augmentation.methods import *

In [108]:
import warnings
warnings.simplefilter('ignore')

In [109]:
random_state = 47
np.random.seed(random_state)

In [110]:
physical_devices = tf.config.experimental.list_physical_devices( 'GPU' )
print( 'Num GPUs Available: ', len( physical_devices ) )
if len( physical_devices ) > 0:
    tf.config.experimental.set_memory_growth( physical_devices[0], True )

Num GPUs Available:  0


# 1. Data Preparation
## 1.1 Load the Posenet files 


In [225]:
normal_dataset_path = '../../datasets/posenet-uncut/'
ugly_dataset_path = '../../datasets/good_ugly_posenet/'
subjective_score = pd.read_csv('../../datasets/VideoScoring.csv')

all_data_X = []
all_data_y = []

In [299]:
def add_scores(file_name,ugly=False):
    path = normal_dataset_path
    prefix = ''
    
    if ugly:
        path = ugly_dataset_path
        prefix = 'U' 
        
    df = pd.read_csv(path+file_name)
    
    df = df[df.columns.drop(list(df.filter(regex='_eye_')))]
    df = df[df.columns.drop(list(df.filter(regex='_ear_')))]
    df = df.rename(columns={'nose_x': 'head_x', 'nose_y': 'head_y', 'nose_score': 'head_score'})
        
    df['ConfidenceScore'] = np.mean(df[list(df.filter(regex='_score'))].mean(axis=1) * df['score'])
    sub_score = subjective_score.loc[subjective_score['FileName'] == prefix + file_name.replace('.csv', '')]['AVG']
    df['GoodnessScore'] = float(sub_score)
    
    return df.drop(columns=['ConfidenceScore', 'GoodnessScore']), df[['ConfidenceScore', 'GoodnessScore']]

In [300]:
# Add scores to the original videos
for file in os.listdir(normal_dataset_path):
    if not file.find(".csv",0) == -1:
        X,y = add_scores(file,False)
        
        all_data_X.append(X)
        all_data_y.append(y)

# Add scores to augmented videos
for file in os.listdir(ugly_dataset_path):
    if not file.find(".csv",0) == -1:
        X,y = add_scores(file,True)
        
        all_data_X.append( X )
        all_data_y.append( y )

In [301]:
TRAIN_SPLIT = int(len(all_data_X)*0.9)
TRAIN_SPLIT

504

In [302]:
train_X = all_data_X[:TRAIN_SPLIT]
test_X = all_data_X[TRAIN_SPLIT:]
train_y = all_data_y[:TRAIN_SPLIT]
test_y = all_data_y[TRAIN_SPLIT:]

In [362]:
from sklearn.preprocessing import StandardScaler

inputScaler = StandardScaler()

for i in range(0,len(train_X)):
    inputScaler.fit(train_X[i])
for i in range(0,len(train_X)):
    train_X[i] = inputScaler.transform(train_X[i])
    
for i in range(0,len(test_X)):
    test_X[i] = inputScaler.transform(test_X[i])

outputScaler = StandardScaler()
for i in range(0,len(train_y)):
    outputScaler.fit(train_y[i])
for i in range(0,len(train_y)):
    train_y[i] = outputScaler.transform(train_y[i])
    
for i in range(0,len(test_y)):
    test_y[i] = outputScaler.transform(test_y[i])

In [327]:
print(train_X[c].shape[1])
print(train_y[c].shape)

40
(580, 2)


In [363]:
def multivariate_data(dataset, target, start_series, end_series, history_size,
                      target_size, step):

    data = []
    labels = []
    l = 0

    for c in range(start_series, end_series):
        temp = np.array(dataset[c][:,0])
        start_index = history_size
        end_index = len(temp) - target_size
        for i in range(start_index, end_index):
            indices = range(i-history_size, i, step)
            features = []

            for l in range(0,dataset[c].shape[1]):
                features.append(np.transpose(np.array(dataset[c][:,l])[indices]))

        data.append(np.transpose(np.array(features)))
        labels.append(target[c][i+target_size,:])
        
    return np.array(data), np.array(labels)


In [365]:
from sklearn.metrics import mean_squared_error, r2_score, explained_variance_score, mean_absolute_error

def eval_metrics(actual, pred):
    mse = mean_squared_error(actual, pred)
    msa = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    variance = explained_variance_score(actual, pred)
    return mse, msa, r2, variance

In [338]:
history_size=20
target_size=0

X_train, y_train = multivariate_data(train_X, train_y, 0, TRAIN_SPLIT, history_size, target_size, 1)

In [368]:
X_test, y_test = multivariate_data(test_X, test_y, 0, len(test_y), history_size, target_size, 1)

In [371]:
X_train.shape

(504, 20, 40)

In [390]:
BATCH_SIZE = 256
BUFFER_SIZE = 100000

train_set = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_set = train_set.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

test_set = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_set = test_set.batch(BATCH_SIZE).repeat()

In [391]:
train_set

<RepeatDataset shapes: ((None, 20, 40), (None, 2)), types: (tf.float64, tf.float64)>

In [393]:
UNIT = 32
OPTIMIZER='adam'
LOSS='mse'

model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(UNIT,return_sequences=True,input_shape=X_train.shape[-2:]),
    tf.keras.layers.LSTM(16, activation='relu'),
    tf.keras.layers.Dense(2)
])

model.compile(optimizer=OPTIMIZER, loss=LOSS)
model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_13 (LSTM)               (None, 20, 32)            9344      
_________________________________________________________________
lstm_14 (LSTM)               (None, 16)                3136      
_________________________________________________________________
dense_9 (Dense)              (None, 2)                 34        
Total params: 12,514
Trainable params: 12,514
Non-trainable params: 0
_________________________________________________________________


In [394]:
EVALUATION_INTERVAL = 2000
EPOCHS = 10
model_name = 'goodness_LTSM'

with mlflow.start_run(run_name=model_name) as run:

    run_id = run.info.run_id
    
    history = model.fit(train_set, 
                  epochs=EPOCHS,
                  steps_per_epoch=EVALUATION_INTERVAL,
                  validation_data=test_set, 
                  validation_steps=50)
    
    # Log model, scaler, model parameters to MLflow
    mlflow.log_param("units", UNIT)
    mlflow.log_param("optimizer", OPTIMIZER)
    mlflow.log_param("loss", LOSS)
    mlflow.log_param("evaluation interval", EVALUATION_INTERVAL)
    mlflow.log_param("epochs", EPOCHS)
    mlflow.log_param("batch size", BATCH_SIZE)
    
    predictions = model.predict(X_test, verbose=1)
    # Invert transform on predictions
#     predictions = y_scaler.inverse_transform(predictions)
    (mse, msa, r2, variance) = eval_metrics(y_test, predictions)

    # Print metrics
    print("Optimizer={}:".format(OPTIMIZER))
    print('MSE: ', mse)
    print('MSA: ', msa)
    print('R-Squared: ', r2)
    print('Explained Variance Score: ', variance)
    
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("msa", msa)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("variance", variance)

    mlflow.keras.log_model(model, model_name)
    mlflow.sklearn.log_model(inputScaler, 'InputScaler')
    mlflow.sklearn.log_model(outputScaler, 'OutScaler')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Optimizer=adam:
MSE:  6.321309048803643
MSA:  2.1001269974007486
R-Squared:  -546.0041864798935
Explained Variance Score:  -0.1675097321829513




INFO:tensorflow:Assets written to: /var/folders/m8/zs0d09l904s28pmh_g_t5q640000gn/T/tmpzefa0mdr/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/m8/zs0d09l904s28pmh_g_t5q640000gn/T/tmpzefa0mdr/model/data/model/assets
