# Setup and Load Data

In [1]:
import numpy as np
import random
import matplotlib.pyplot as plt
import pandas as pd
from pandas.plotting import scatter_matrix
import os

SEED = 1234
# Set seed for reproducibility
np.random.seed(SEED)
random.seed(SEED)

current_folder = globals()['_dh'][0]
data_path = os.path.join(current_folder, 'data')
csv_path = os.path.join(data_path, 'training_set.csv')
df = pd.read_csv(csv_path, header=0) # load
df = df.sample(frac=1).reset_index(drop=True) # shuffle
df.head()

Unnamed: 0,NOSE_X,NOSE_Y,NOSE_Z,LEFT_EYE_INNER_X,LEFT_EYE_INNER_Y,LEFT_EYE_INNER_Z,LEFT_EYE_X,LEFT_EYE_Y,LEFT_EYE_Z,LEFT_EYE_OUTER_X,...,RIGHT_HEEL_Z,LEFT_FOOT_INDEX_X,LEFT_FOOT_INDEX_Y,LEFT_FOOT_INDEX_Z,RIGHT_FOOT_INDEX_X,RIGHT_FOOT_INDEX_Y,RIGHT_FOOT_INDEX_Z,IS_LOW_LUNGE,IS_CRESCENT_POSE,INSTRUCTION
0,0.52023,0.635046,-0.372716,0.535121,0.587382,-0.318471,0.545458,0.587426,-0.318122,0.555094,...,-0.078819,0.559809,2.670057,-0.281307,0.436801,2.701503,-0.490008,0,1,Raise your hands
1,0.630045,0.545611,-0.543785,0.655914,0.483179,-0.517935,0.669563,0.48656,-0.5176,0.681861,...,0.366401,0.647351,2.974119,0.001294,0.505877,2.966749,-0.051269,0,1,your arms up towards the sky as you
2,0.553312,0.551331,-0.818429,0.573563,0.492558,-0.776206,0.585662,0.491515,-0.775922,0.597475,...,0.378935,0.564804,2.962438,-0.260139,0.426746,2.957223,-0.107479,1,0,Have a drink
3,0.49918,0.56168,-0.737946,0.51826,0.495814,-0.707827,0.53265,0.491957,-0.707701,0.546015,...,0.086125,0.555374,2.960517,-0.262226,0.411628,2.966175,-0.381353,1,0,Drop an elbow
4,0.578184,0.535424,-0.95063,0.597474,0.472747,-0.879421,0.611845,0.472694,-0.877219,0.625602,...,0.854012,0.602773,2.916028,0.15598,0.450033,2.93097,0.370374,1,0,Push Your Knee


In [2]:
# Define X and y matrices
X_columns = df.columns[:len(df.columns) - 1]
X = df[X_columns].values
y = df['INSTRUCTION'].values

In [3]:
print(X_columns)
print ("X: ", np.shape(X))
print ("y: ", np.shape(y))

Index(['NOSE_X', 'NOSE_Y', 'NOSE_Z', 'LEFT_EYE_INNER_X', 'LEFT_EYE_INNER_Y',
       'LEFT_EYE_INNER_Z', 'LEFT_EYE_X', 'LEFT_EYE_Y', 'LEFT_EYE_Z',
       'LEFT_EYE_OUTER_X',
       ...
       'RIGHT_HEEL_Y', 'RIGHT_HEEL_Z', 'LEFT_FOOT_INDEX_X',
       'LEFT_FOOT_INDEX_Y', 'LEFT_FOOT_INDEX_Z', 'RIGHT_FOOT_INDEX_X',
       'RIGHT_FOOT_INDEX_Y', 'RIGHT_FOOT_INDEX_Z', 'IS_LOW_LUNGE',
       'IS_CRESCENT_POSE'],
      dtype='object', length=101)
X:  (600, 101)
y:  (600,)


# Split Data

In [4]:
import collections
from sklearn.model_selection import train_test_split

TRAIN_SIZE = 0.70
VAL_SIZE = 0.15
TEST_SIZE = 0.15
TRAIN_COUNT = int(len(y) * 0.70)
TEST_COUNT = int(len(y) - TRAIN_COUNT)


In [5]:
def train_val_test_split(X, y, train_size, test_size):
    """Split dataset into data splits."""
    X_train, X_, y_train, y_ = train_test_split(X, y, test_size=test_size, train_size=train_size, stratify=y)

    X_val, X_test, y_val, y_test = train_test_split(X_, y_, test_size=0.5, train_size=0.5, stratify=y_)
    return X_train, X_val, X_test, y_train, y_val, y_test

In [6]:
# Create data splits
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(
    X=X, y=y, train_size=TRAIN_COUNT, test_size=TEST_COUNT)
print (f"X_train: {X_train.shape}, y_train: {y_train.shape}")
print (f"X_val: {X_val.shape}, y_val: {y_val.shape}")
print (f"X_test: {X_test.shape}, y_test: {y_test.shape}")
print (f"Sample point: {X_train[0]} → {y_train[0]}")

X_train: (420, 101), y_train: (420,)
X_val: (90, 101), y_val: (90,)
X_test: (90, 101), y_test: (90,)
Sample point: [ 0.51662898  0.59451157 -0.47648197  0.53160924  0.5477547  -0.3986699
  0.54344171  0.54846144 -0.39841127  0.55465221  0.55011392 -0.39807943
  0.49697709  0.55058587 -0.4067041   0.48455828  0.55340815 -0.40648952
  0.47271284  0.55578554 -0.40659609  0.56496513  0.58114612 -0.06756394
  0.45733339  0.5822652  -0.09955167  0.5383693   0.65116829 -0.36612824
  0.49537271  0.65059447 -0.37650141  0.656829    0.84885412  0.01141141
  0.37297991  0.85019201 -0.07071388  0.82548141  0.6482439  -0.30853003
  0.13249227  0.68535835 -0.56832254  0.79075795  0.3068994  -0.57139653
  0.2570056   0.33422554 -0.77595699  0.79013366  0.20868532 -0.6687066
  0.29478174  0.26130414 -0.89249784  0.76716137  0.20266846 -0.59680468
  0.31962317  0.26058161 -0.80426186  0.76131988  0.23850882 -0.56627983
  0.31440723  0.28715467 -0.76308078  0.61325294  1.63374567 -0.00724168
  0.4021478

In [7]:
from sklearn.preprocessing import LabelEncoder
# Output vectorizer
label_encoder = LabelEncoder()
# Fit on train data
label_encoder = label_encoder.fit(y_train)
classes = list(label_encoder.classes_)
print (f"classes: {classes}")

classes: ['0', 'Arch your back', 'Drop an elbow', 'Have a drink', 'Push Your Knee', 'Push your knee', 'Put hands on knee', 'Raise your hands', 'Tighten tummy', 'Tighten your back', 'fingertips underneath your shoulders on', 'inhale lengthen through the sides of your waist and lift your back ribs as you exhale draw your front ribs down and', 'lower your head', 'your arms up towards the sky as you', 'your back leg knee place your hands on your front leg knee and press your torso']


# Label Encoding

In [8]:
# Convert labels to tokens
print (f"y_train[0]: {y_train[0]}")
y_train = label_encoder.transform(y_train)
y_val = label_encoder.transform(y_val)
y_test = label_encoder.transform(y_test)
print (f"y_train[0]: {y_train[0]}")

y_train[0]: Push your knee
y_train[0]: 5


In [9]:
# Class weights
counts = np.bincount(y_train)
class_weights = {i: 1.0/count for i, count in enumerate(counts)}
print (f"counts: {counts}\nweights: {class_weights}")

counts: [28 28 28 28 14 28 14 28 14 14 28 56 28 56 28]
weights: {0: 0.03571428571428571, 1: 0.03571428571428571, 2: 0.03571428571428571, 3: 0.03571428571428571, 4: 0.07142857142857142, 5: 0.03571428571428571, 6: 0.07142857142857142, 7: 0.03571428571428571, 8: 0.07142857142857142, 9: 0.07142857142857142, 10: 0.03571428571428571, 11: 0.017857142857142856, 12: 0.03571428571428571, 13: 0.017857142857142856, 14: 0.03571428571428571}


# Standardize Data

In [10]:
from sklearn.preprocessing import StandardScaler
# Standardize the data (mean=0, std=1) using training data
X_scaler = StandardScaler().fit(X_train)
# Apply scaler on training and test data (don't standardize outputs for classification)
X_train = X_scaler.transform(X_train)
X_val = X_scaler.transform(X_val)
X_test = X_scaler.transform(X_test)
# Check (means should be ~0 and std should be ~1)
print (f"X_test[0]: mean: {np.mean(X_test[:, 0], axis=0):.1f}, std: {np.std(X_test[:, 0], axis=0):.1f}")
print (f"X_test[1]: mean: {np.mean(X_test[:, 1], axis=0):.1f}, std: {np.std(X_test[:, 1], axis=0):.1f}")

X_test[0]: mean: 0.0, std: 1.0
X_test[1]: mean: 0.1, std: 1.1


# Model

In [14]:
from torch import nn
import torch.nn.functional as F
INPUT_DIM = X_train.shape[1] # X is 101-dimensional
HIDDEN_DIM = INPUT_DIM * 4 # Center-most latent space vector will have length of 404
NUM_CLASSES = len(classes) # 16 classes

print(INPUT_DIM)
print(HIDDEN_DIM)
print(NUM_CLASSES)

101
404
15


In [15]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, input_dim * 2)
        self.fc2 = nn.Linear(input_dim * 2, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, int(hidden_dim/2))
        self.fc4 = nn.Linear(int(hidden_dim/2), int(hidden_dim/4))
        self.fc5 = nn.Linear(int(hidden_dim/4), int(hidden_dim/8))
        self.fc6 = nn.Linear(int(hidden_dim/8), num_classes)

    def forward(self, x_in):
        z = F.relu(self.fc1(x_in)) # ReLU activation function added!
        z = F.relu(self.fc2(z))
        z = F.relu(self.fc3(z))
        z = F.relu(self.fc4(z))
        z = F.relu(self.fc5(z))
        z = self.fc6(z)
        return z

In [16]:
# Initialize model
model = MLP(input_dim=INPUT_DIM, hidden_dim=HIDDEN_DIM, num_classes=NUM_CLASSES)
print (model.named_parameters)

<bound method Module.named_parameters of MLP(
  (fc1): Linear(in_features=101, out_features=202, bias=True)
  (fc2): Linear(in_features=202, out_features=404, bias=True)
  (fc3): Linear(in_features=404, out_features=202, bias=True)
  (fc4): Linear(in_features=202, out_features=101, bias=True)
  (fc5): Linear(in_features=101, out_features=50, bias=True)
  (fc6): Linear(in_features=50, out_features=15, bias=True)
)>
