# Problem B (PoseNet)

Section for configurations and imports.

In [2]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

import mlflow
mlflow.set_tracking_uri('http://35.228.45.76:5000')
os.environ['GOOGLE_APPLICATION_CREDENTIALS']='../../keys/mlflow-312506-8cfad529f4fd.json'

from tensorflow import keras

# Import data augmentation
import sys
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from augmentation.methods import *

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [3]:
import warnings
warnings.simplefilter('ignore')

In [4]:
mpl.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

In [5]:
random_state = 47
np.random.seed(random_state)

In [6]:
physical_devices = tf.config.experimental.list_physical_devices( 'GPU' )
print( 'Num GPUs Available: ', len( physical_devices ) )
if len( physical_devices ) > 0:
    tf.config.experimental.set_memory_growth( physical_devices[0], True )

Num GPUs Available:  0


# 1. Load PoseNet files and classify each sample into good or bad depending on the file name

These CSV files were created by our PoseNet model at [test.html](https://github.com/digitacs/4dv652-frontend/blob/main/test.html).<br />
W is the start of file names containing a bad exercise, and all others file names are to be classified as good exercises.

In [7]:
file_path = 'https://raw.githubusercontent.com/digitacs/4dv652-ml/main/datasets/all_good_bad_videos/posenet_good_vs_bad_not_preprocessed/'

### 1.1.1 Cut leading and trailing frames

When loading the files, we'll use this method for removing leading and trailing frames from each data sample-

In [8]:
cut_start_scaler = mlflow.sklearn.load_model('gs://mlflow-atlas/mlflow_artifacts/0/ca84e7c5b9e54551bd4708aa457bf730/artifacts/InputScaler')
cut_start_model = mlflow.keras.load_model('gs://mlflow-atlas/mlflow_artifacts/0/ca84e7c5b9e54551bd4708aa457bf730/artifacts/cut_start_posenet')

cut_stop_scaler = mlflow.sklearn.load_model('gs://mlflow-atlas/mlflow_artifacts/0/583700c9367d4a49ad54912df95cf3cb/artifacts/InputScaler')
cut_stop_model = mlflow.keras.load_model('gs://mlflow-atlas/mlflow_artifacts/0/583700c9367d4a49ad54912df95cf3cb/artifacts/cut_stop_posenet')

In [9]:
def cut_leading_trailing(data):
    trimmed_data = data.copy()
    scaled_data_start = cut_start_scaler.transform(data, copy=True)

    # Remove start frames
    predictions = cut_start_model.predict(scaled_data_start)
    predictions = predictions.round().astype(int)
    
    # Find start point based on density of true predictions
    surrounding_area = 5
    n = 0
    s = 0
    
    for pred in predictions:
        if pred == 1:
            s += 1
        else:
            if( s > surrounding_area  ):
                break
            else:
                s = 0
        n = n + 1
    try:
        if n < (len(data) / 2):
            trimmed_data = trimmed_data.loc[n:,:]
    except:
        print('Error trying to remove start frames')

    # Remove stop frames
    scaled_data_stop = cut_stop_scaler.transform(trimmed_data, copy=True)
    predictions = cut_stop_model.predict(scaled_data_stop)
    predictions = predictions.round().astype(int)
    predictions = np.flip(predictions)

    surrounding_area = 5
    s = 0
    n = len(data)

    for pred in predictions:
        if pred == 1:
            s += 1
        else:
            if( s > surrounding_area  ):
                break
            else:
                s = 0
        n -= 1
    try:
        if n > (len(data) / 2):
            trimmed_data = trimmed_data.loc[:n,:]
    except:
        print('Error trying to remove stop frames')

    return trimmed_data

In [10]:
df = pd.read_csv(file_path + 'A1.csv')
# Drop scores-, eye-, and ear-columns
df = df[df.columns.drop(list(df.filter(regex='_score')))]
df = df[df.columns.drop(list(df.filter(regex='_eye_')))]
df = df[df.columns.drop(list(df.filter(regex='_ear_')))]
# Rename nose to head in PoseNet data
df.rename(columns={'nose_x': 'head_x', 'nose_y': 'head_y'}, inplace=True)
df['quality'] = 1 # Good

numbers = ['01', '02', '03', '04', '05', '06', '07', '08', '09']

for i in numbers:
  try:
    file_name = 'G{}.csv'.format(i)
    temp = pd.read_csv(file_path +  file_name)
    # Drop scores-, eye-, and ear-columns
    temp = temp[temp.columns.drop(list(temp.filter(regex='_score')))]
    temp = temp[temp.columns.drop(list(temp.filter(regex='_eye_')))]
    temp = temp[temp.columns.drop(list(temp.filter(regex='_ear_')))]
    # Rename nose to head in PoseNet data
    temp.rename(columns={'nose_x': 'head_x', 'nose_y': 'head_y'}, inplace=True)
    temp = cut_leading_trailing(temp)
    temp['quality'] = 1 # Good
    df = df.append(temp, ignore_index=True)

    file_name = 'W{}.csv'.format(i)
    temp = pd.read_csv(file_path +  file_name)
    # Drop scores-, eye-, and ear-columns
    temp = temp[temp.columns.drop(list(temp.filter(regex='_score')))]
    temp = temp[temp.columns.drop(list(temp.filter(regex='_eye_')))]
    temp = temp[temp.columns.drop(list(temp.filter(regex='_ear_')))]
    # Rename nose to head in PoseNet data
    temp.rename(columns={'nose_x': 'head_x', 'nose_y': 'head_y'}, inplace=True)
    temp = cut_leading_trailing(temp)
    temp['quality'] = 0 # Bad
    df = df.append(temp, ignore_index=True)
  except IOError as e:
    print('Could not find file: ', file_name)

for i in range(20, 83):
  try:
    file_name = 'G{}.csv'.format(i)
    temp = pd.read_csv(file_path +  file_name)
    # Drop scores-, eye-, and ear-columns
    temp = temp[temp.columns.drop(list(temp.filter(regex='_score')))]
    temp = temp[temp.columns.drop(list(temp.filter(regex='_eye_')))]
    temp = temp[temp.columns.drop(list(temp.filter(regex='_ear_')))]
    # Rename nose to head in PoseNet data
    temp.rename(columns={'nose_x': 'head_x', 'nose_y': 'head_y'}, inplace=True)
    temp = cut_leading_trailing(temp)
    temp['quality'] = 1 # Good
    df = df.append(temp, ignore_index=True)
  except IOError as e:
    print('Could not find file: ', file_name)

for i in range(10, 44):
  try:
    file_name = 'W{}.csv'.format(i)
    temp = pd.read_csv(file_path +  file_name)
    # Drop scores-, eye-, and ear-columns
    temp = temp[temp.columns.drop(list(temp.filter(regex='_score')))]
    temp = temp[temp.columns.drop(list(temp.filter(regex='_eye_')))]
    temp = temp[temp.columns.drop(list(temp.filter(regex='_ear_')))]
    # Rename nose to head in PoseNet data
    temp.rename(columns={'nose_x': 'head_x', 'nose_y': 'head_y'}, inplace=True)
    temp = cut_leading_trailing(temp)
    temp['quality'] = 0 # Bad
    df = df.append(temp, ignore_index=True)
  except IOError as e:
    print('Could not find file: ', file_name)

print(df.shape)

Could not find file:  W05.csv
Could not find file:  G67.csv
Could not find file:  G70.csv
(70173, 27)


# 2. Data Augmentation

## Mirror X coordinate

In [11]:
df = mirror(df,'x', append=True)
print(df.shape)

(140346, 27)


## Stretch X coordinate by 50%

In [12]:
df_temp = augMultiplier(df.drop(columns=['quality']), multiplier=1.5)
df_temp['quality'] = df['quality']
df = df.append(df_temp, ignore_index=True)
print(df.shape)

(280692, 27)


## Compress by 25%

In [13]:
df_temp = augMultiplier(df.drop(columns=['quality']), multiplier=0.25)
df_temp['quality'] = df['quality']
df = df.append(df_temp, ignore_index=True)
print(df.shape)

(561384, 27)


## Rotate by p/7

In [14]:
samples = df.sample(2000)

angle = 3.1415 / 7
samples_rotated = rotate(samples.drop(columns=['quality']), angle=angle, posenet=True)
samples_rotated['quality'] = samples['quality'].append(samples['quality'], ignore_index=True)
df = df.append(samples_rotated, ignore_index=True)
print(df.shape)

(565384, 27)


## Rotate by -p/9

In [15]:
samples = df.sample(2000)

angle = 3.1415 / -9
samples_rotated = rotate(samples.drop(columns=['quality']), angle=angle, posenet=True)
samples_rotated['quality'] = samples['quality'].append(samples['quality'], ignore_index=True)
df = df.append(samples_rotated, ignore_index=True)
print(df.shape)

(569384, 27)


# 3. Save as New Dataset

In [27]:
slice_size = 4
cut_size = int(len(df) / slice_size)
q = 0

for i in range(slice_size):
    temp = df.loc[cut_size*i:cut_size*(i+1)-1,:]
    temp.to_csv('../../datasets/all_good_bad_problemB_posenet/good_bad_posenet_{}.csv'.format(i+1), index=False)
    