# Problem A (Kinect)

Section for configurations and imports.

In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

import mlflow
mlflow.set_tracking_uri('http://35.228.45.76:5000')
os.environ['GOOGLE_APPLICATION_CREDENTIALS']='./mlflow-312506-8cfad529f4fd.json'

from tensorflow import keras

# Import data augmentation
import sys
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from augmentation.methods import *

In [2]:
import warnings
warnings.simplefilter('ignore')

In [3]:
mpl.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

In [4]:
random_state = 47
np.random.seed(random_state)

In [5]:
physical_devices = tf.config.experimental.list_physical_devices( 'GPU' )
print( 'Num GPUs Available: ', len( physical_devices ) )
if len( physical_devices ) > 0:
    tf.config.experimental.set_memory_growth( physical_devices[0], True )

Num GPUs Available:  1


# 1. Data Preparation

## 1.1 Load files and classify each sample into good or bad depending on the file name

W is the start for file names with bad exercises, and all others are classified as "good".

In [6]:
file_path = 'https://raw.githubusercontent.com/digitacs/4dv652-ml/main/datasets/all_good_bad_videos/kinect_good_vs_bad_not_preprocessed/'

good_videos = []
bad_videos = []

### Cut leading and trailing frames

Method for removing leading and trailing frames from each data sample

In [7]:
cut_start_model = mlflow.keras.load_model('gs://mlflow-atlas/mlflow_artifacts/0/de66919788d44ed8a4106c95de1aaa1a/artifacts/cut_start_kinect')
cut_stop_model = mlflow.keras.load_model('gs://mlflow-atlas/mlflow_artifacts/0/5bc55dc1d2534a259459bc711d10cac9/artifacts/cut_start_kinect')

def cut_leading_trailing(data):
    trimmed_data = data.copy()

    predictions = cut_start_model.predict(data)
    predictions = predictions.round().astype(int)
    n = 0
    for pred in predictions:
        # TODO: Add handling of start/stop predictions when models have better performance
        n = n + 1

    return trimmed_data

In [8]:
df = pd.read_csv(file_path + 'A1.csv')
df.drop(columns=['FrameNo'], inplace=True)
df['quality'] = 1 # Good
good_videos.append('A1.csv')

numbers = ['01', '02', '03', '04', '05', '06', '07', '08', '09']

for i in numbers:
  try:
    file_name = 'G{}.csv'.format(i)
    temp = pd.read_csv(file_path +  file_name)
    temp.drop(columns=['FrameNo'], inplace=True)
    temp = cut_leading_trailing(temp)
    temp['quality'] = 1 # Good
    good_videos.append(file_name)
    df = df.append(temp, ignore_index=True)

    file_name = 'W{}.csv'.format(i)
    temp = pd.read_csv(file_path +  file_name)
    temp.drop(columns=['FrameNo'], inplace=True)
    temp = cut_leading_trailing(temp)
    temp['quality'] = 0 # Bad
    bad_videos.append(file_name)
    df = df.append(temp, ignore_index=True)
  except IOError as e:
    print('Could not find file: ', file_name)

for i in range(20, 83):
  try:
    file_name = 'G{}.csv'.format(i)
    temp = pd.read_csv(file_path +  file_name)
    temp.drop(columns=['FrameNo'], inplace=True)
    temp = cut_leading_trailing(temp)
    temp['quality'] = 1 # Good
    good_videos.append(file_name)
    df = df.append(temp, ignore_index=True)
  except IOError as e:
    print('Could not find file: ', file_name)

for i in range(10, 44):
  try:
    file_name = 'W{}.csv'.format(i)
    temp = pd.read_csv(file_path +  file_name)
    temp.drop(columns=['FrameNo'], inplace=True)
    temp = cut_leading_trailing(temp)
    temp['quality'] = 0 # Bad
    bad_videos.append(file_name)
    df = df.append(temp, ignore_index=True)
  except IOError as e:
    print('Could not find file: ', file_name)

print(df.shape)

Could not find file:  G67.csv
Could not find file:  G70.csv
(26259, 40)


## 1.2 Check class imbalance

To see if we need to use any dataset imbalanced techniques. We do not consider that there's a need for that in this case.

In [9]:
print('Total videos: ', len(good_videos + bad_videos))
print('Good videos:', len(good_videos))
print('Bad videos:', len(bad_videos))

Total videos:  114
Good videos: 71
Bad videos: 43


In [10]:
df_gc = df.groupby(['quality']).size()
print(
    'Total: {}\n\nGood: {} ({:.2f}% of total)\nBad: {} ({:.2f}% of total)\n'
    .format(
      len(df), 
      df_gc[1],
      df_gc[1] / len(df),
      df_gc[0],
      df_gc[0] / len(df)
    )
  )

Total: 26259

Good: 17563 (0.67% of total)
Bad: 8696 (0.33% of total)



## 1.3 Data Augmentation

### Mirror X coordinate

In [11]:
df = mirror(df,'x', append=True)
print(df.shape)

(52518, 40)


### Stretch X coordinate by 50%

In [12]:
df_temp = augMultiplier(df.drop(columns=['quality']), multiplier=1.5)
df_temp['quality'] = df['quality']
df = df.append(df_temp, ignore_index=True)
print(df.shape)

(52518, 40)


### Stretch Y coordinate by 25%

In [13]:
df_temp = augMultiplier(df.drop(columns=['quality']), multiplier=0.25)
df_temp['quality'] = df['quality']
df = df.append(df_temp, ignore_index=True)
print(df.shape)

(52518, 40)


### Rotate around the Y axis by p/7

In [14]:
angle = 3.1415 / 7
df_temp = rotate2(df.drop(columns=['quality']), angle=angle,posenet=False)
print(df.shape)

### Rotate around the Y axis by -p/9

In [15]:
angle = 3.1415 / -9
df_temp = rotate2(df.drop(columns=['quality']), angle=angle,posenet=False)
print(df.shape)

### Save dataset

In [None]:
df.to_csv('../../datasets/Problem_A_agumented.csv')

## 1.4 Split into sets for training, validation, and testing + use and save scaler

In [16]:
X = df.drop(columns=['quality'])
y = df['quality']

In [17]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=random_state)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=random_state)

In [18]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

print('Training features shape:', X_train.shape)
print('Training labels shape:', y_train.shape, '\n')

print('Validation features shape:', X_val.shape)
print('Validation labels shape:', y_val.shape, '\n')

print('Test features shape:', X_test.shape)
print('Test labels shape:', y_test.shape, '\n')

Training features shape: (37812, 39)
Training labels shape: (37812,) 

Validation features shape: (9454, 39)
Validation labels shape: (9454,) 

Test features shape: (5252, 39)
Test labels shape: (5252,) 



# 2. Dense Model

In [19]:
# here

# 3. CNN

In [20]:
# here

# 4. Comparison: Dense Model vs CNN

In [21]:
# here