# Problem A (Kinect)

Section for configurations and imports.

In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

import mlflow
mlflow.set_tracking_uri('http://35.228.45.76:5000')
os.environ['GOOGLE_APPLICATION_CREDENTIALS']='./mlflow-312506-8cfad529f4fd.json'

from tensorflow import keras

# Import data augmentation
import sys
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from augmentation.methods import *

In [2]:
import warnings
warnings.simplefilter('ignore')

In [3]:
mpl.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

In [4]:
random_state = 47
np.random.seed(random_state)

In [5]:
physical_devices = tf.config.experimental.list_physical_devices( 'GPU' )
print( 'Num GPUs Available: ', len( physical_devices ) )
if len( physical_devices ) > 0:
    tf.config.experimental.set_memory_growth( physical_devices[0], True )

Num GPUs Available:  1


## 1. Load Kinect files and classify each sample into good or bad depending on the file name

W is the start of file names containing a bad exercise, and all others file names are to be classified as good exercises.

In [6]:
file_path = 'https://raw.githubusercontent.com/digitacs/4dv652-ml/main/datasets/all_good_bad_videos/kinect_good_vs_bad_not_preprocessed/'

good_videos = []
bad_videos = []

### 1.1 Cut leading and trailing frames

When loading the files, we'll use this method for removing leading and trailing frames from each data sample-

In [7]:
cut_start_scaler = mlflow.sklearn.load_model('gs://mlflow-atlas/mlflow_artifacts/0/14b3d62fe0ac449d98a19e883e57133c/artifacts/InputScaler')
cut_start_model = mlflow.keras.load_model('gs://mlflow-atlas/mlflow_artifacts/0/14b3d62fe0ac449d98a19e883e57133c/artifacts/cut_start_kinect')

#cut_stop_scaler = mlflow.sklearn.load_model('gs://mlflow-atlas/mlflow_artifacts/0/14b3d62fe0ac449d98a19e883e57133c/artifacts/InputScaler')
#cut_stop_model = mlflow.keras.load_model('gs://mlflow-atlas/mlflow_artifacts/0/5bc55dc1d2534a259459bc711d10cac9/artifacts/cut_start_kinect')

In [8]:
def cut_leading_trailing(data):
    trimmed_data = data.copy()
    scaled_data = cut_start_scaler.transform(data, copy=True)

    predictions = cut_start_model.predict(scaled_data)
    predictions = predictions.round().astype(int)
    n = 0
    for pred in predictions:
        if pred == 1 and n < 100:
            trimmed_data.drop(index=n, inplace=True)
        n = n + 1

    return trimmed_data

In [9]:
df = pd.read_csv(file_path + 'A1.csv')
df.drop(columns=['FrameNo'], inplace=True)
df['quality'] = 1 # Good
good_videos.append('A1.csv')

numbers = ['01', '02', '03', '04', '05', '06', '07', '08', '09']

for i in numbers:
  try:
    file_name = 'G{}.csv'.format(i)
    temp = pd.read_csv(file_path +  file_name)
    temp.drop(columns=['FrameNo'], inplace=True)
    temp = cut_leading_trailing(temp)
    temp['quality'] = 1 # Good
    good_videos.append(file_name)
    df = df.append(temp, ignore_index=True)

    file_name = 'W{}.csv'.format(i)
    temp = pd.read_csv(file_path +  file_name)
    temp.drop(columns=['FrameNo'], inplace=True)
    temp = cut_leading_trailing(temp)
    temp['quality'] = 0 # Bad
    bad_videos.append(file_name)
    df = df.append(temp, ignore_index=True)
  except IOError as e:
    print('Could not find file: ', file_name)

for i in range(20, 83):
  try:
    file_name = 'G{}.csv'.format(i)
    temp = pd.read_csv(file_path +  file_name)
    temp.drop(columns=['FrameNo'], inplace=True)
    temp = cut_leading_trailing(temp)
    temp['quality'] = 1 # Good
    good_videos.append(file_name)
    df = df.append(temp, ignore_index=True)
  except IOError as e:
    print('Could not find file: ', file_name)

for i in range(10, 44):
  try:
    file_name = 'W{}.csv'.format(i)
    temp = pd.read_csv(file_path +  file_name)
    temp.drop(columns=['FrameNo'], inplace=True)
    temp = cut_leading_trailing(temp)
    temp['quality'] = 0 # Bad
    bad_videos.append(file_name)
    df = df.append(temp, ignore_index=True)
  except IOError as e:
    print('Could not find file: ', file_name)

print(df.shape)

Could not find file:  G67.csv
Could not find file:  G70.csv
(20780, 40)


## 2. Check class imbalance

To see if we need to use any dataset imbalanced techniques. We do not consider that there's a need for that in this case.

In [10]:
print('Total videos: ', len(good_videos + bad_videos))
print('Good videos:', len(good_videos))
print('Bad videos:', len(bad_videos))

Total videos:  114
Good videos: 71
Bad videos: 43


In [11]:
df_gc = df.groupby(['quality']).size()
print(
    'Total: {}\n\nGood: {} ({:.2f}% of total)\nBad: {} ({:.2f}% of total)\n'
    .format(
      len(df), 
      df_gc[1],
      df_gc[1] / len(df),
      df_gc[0],
      df_gc[0] / len(df)
    )
  )

Total: 20780

Good: 13828 (0.67% of total)
Bad: 6952 (0.33% of total)



## 3. Data Augmentation

### Mirror X coordinate

In [12]:
df = mirror(df,'x', append=True)
print(df.shape)

(41560, 40)


### Stretch by 50%

In [13]:
df_temp = augMultiplier(df.drop(columns=['quality']), multiplier=1.5)
df_temp['quality'] = df['quality']
df = df.append(df_temp, ignore_index=True)
print(df.shape)

(83120, 40)


### Compress by 25%

In [14]:
df_temp = augMultiplier(df.drop(columns=['quality']), multiplier=0.25)
df_temp['quality'] = df['quality']
df = df.append(df_temp, ignore_index=True)
print(df.shape)

(166240, 40)


### Rotate by p/7

In [15]:
angle = 3.1415 / 7
df_rotated = rotate(df.drop(columns=['quality']), angle=angle, posenet=False)
df_rotated['quality'] = df['quality'].append(df['quality'], ignore_index=True)
print(df_rotated.shape)

(332480, 40)


### Rotate by -p/9

In [16]:
angle = 3.1415 / -9
df_rotated_2 = rotate(df_rotated.drop(columns=['quality']), angle=angle, posenet=False)
df_rotated_2['quality'] = df_rotated['quality'].append(df_rotated['quality'], ignore_index=True)
print(df_rotated_2.shape)

(664960, 40)


## 4. Save as New Dataset

In [17]:
df_rotated_2.to_csv('../../datasets/all_good_bad_problemA_kinect/good_bad_kinect.csv', index=False)