In [1]:
# Imports
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat

In [2]:
def load_data_DB1(subject,exercise):
    data = loadmat(f'../Dataset/DB1/s{subject}/S{subject}_A1_E{exercise}.mat')
    return data

In [3]:
def get_start_indexes(data,movement,target):
    start_indexes=[]
    for idx,move in enumerate(data[target]):
        if move[0] == movement and data[target][idx-1][0] == 0:
            start_indexes.append(idx)
        if len(start_indexes) == 10:
            break
    return start_indexes

In [4]:
def get_repetition_indices(data, movement, rep_number):
    '''
    It returns the start and end indices of a certain repetition of a movement for particular subject and exercise that 
    is defined in the data.
    '''
    restimulus_idxs = get_start_indexes(data, movement, "restimulus")
    rep_start_idx = restimulus_idxs[rep_number - 1]
    rep_end_idx = rep_start_idx
    while data['restimulus'][rep_end_idx] == movement:
        rep_end_idx = rep_end_idx + 1
    return {
        "rep_start":rep_start_idx,
        "rep_end":rep_end_idx
    }

In [5]:
x = load_data_DB1(4, 2)
y = get_repetition_indices(x, 4, 2)
y, x['emg'][y['rep_start'] : y['rep_end']].shape

({'rep_start': 26416, 'rep_end': 26839}, (423, 10))

**Predicting joint angles for subject 12 for all exercises using `train_test_split`**

In [29]:
ex1 = load_data_DB1(subject=12, exercise=1)
ex2 = load_data_DB1(subject=12, exercise=2)
ex3 = load_data_DB1(subject=12, exercise=3)

emg = np.concatenate((ex1['emg'], ex2['emg'], ex3['emg']))

glove = np.concatenate((ex1['glove'], ex2['glove'], ex3['glove']))

In [32]:
emg.shape, glove.shape

((460382, 10), (460382, 22))

Predicting for one sensor

**Trying two models `RandomForestRegressor` and `LinearRegression` to check which has better performance on test set**

In [35]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

np.random.seed(42)

sensor = 5

X = emg
y = glove[:, sensor]

rfr  = RandomForestRegressor()
lin_reg = LinearRegression()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Fitting both models and comparing test scores
rfr.fit(X_train, y_train)

lin_reg.fit(X_train, y_train)

print(f"R^2 Score on test set for RandomForestRegressor model is {rfr.score(X_test, y_test)}")
print(f"R^2 Score on test set for LinearRegression model is {lin_reg.score(X_test, y_test)}")

R^2 Score on test set for RandomForestRegressor model is 0.8669857657372461
R^2 Score on test set for LinearRegression model is 0.24137446383096228


In [36]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

np.random.seed(42)

sensor = 19

X = emg
y = glove[:, sensor]

rfr  = RandomForestRegressor()
lin_reg = LinearRegression()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Fitting both models and comparing test scores
rfr.fit(X_train, y_train)

lin_reg.fit(X_train, y_train)

print(f"R^2 Score on test set for RandomForestRegressor model is {rfr.score(X_test, y_test)}")
print(f"R^2 Score on test set for LinearRegression model is {lin_reg.score(X_test, y_test)}")

R^2 Score on test set for RandomForestRegressor model is 0.7628787857847055
R^2 Score on test set for LinearRegression model is 0.14019796546001984


>**Conclusion: Random Forest Exceeds the performance of Linear Regression significantly, thus I will only consider RandomForestRegressor.**

**Getting all exercises for all subjects**

In [7]:
# Loop Over all subjects
emg_df = pd.DataFrame()
glove_df = pd.DataFrame()

for sub in np.arange(1,28):
    
    ex1 = load_data_DB1(subject=sub, exercise=1)
    ex2 = load_data_DB1(subject=sub, exercise=2)
    ex3 = load_data_DB1(subject=sub, exercise=3)

    
    emg_df = emg_df.append(pd.DataFrame(ex1['emg']), ignore_index=True)
    emg_df = emg_df.append(pd.DataFrame(ex2['emg']), ignore_index=True)
    emg_df = emg_df.append(pd.DataFrame(ex3['emg']), ignore_index=True)

    glove_df = glove_df.append(pd.DataFrame(ex1['glove']), ignore_index=True)
    glove_df = glove_df.append(pd.DataFrame(ex2['glove']), ignore_index=True)
    glove_df = glove_df.append(pd.DataFrame(ex3['glove']), ignore_index=True)
    
print(f"Shape of emg: {emg_df.shape}")
print(f"Shape of glove: {glove_df.shape}")

Shape of emg: (12553611, 10)
Shape of glove: (12553611, 22)


In [8]:
data = emg_df
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.0684,0.0024,0.0024,0.0024,0.0024,0.0098,0.0024,0.0488,0.0024,0.0342
1,0.0586,0.0024,0.0024,0.0024,0.0024,0.0049,0.0024,0.0415,0.0024,0.0293
2,0.0562,0.0024,0.0024,0.0024,0.0024,0.0049,0.0024,0.0391,0.0024,0.0244
3,0.0562,0.0024,0.0024,0.0024,0.0024,0.0049,0.0024,0.0342,0.0024,0.0171
4,0.0488,0.0024,0.0024,0.0024,0.0024,0.0024,0.0024,0.0366,0.0024,0.0146


In [11]:
# Add the glove data to data df
for i in np.arange(0,22):
    data[i+10] = glove_df[i]
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,0.0684,0.0024,0.0024,0.0024,0.0024,0.0098,0.0024,0.0488,0.0024,0.0342,...,88.0,69.0,148.0,114.0,116.0,83.0,140.0,131.0,135.0,115.0
1,0.0586,0.0024,0.0024,0.0024,0.0024,0.0049,0.0024,0.0415,0.0024,0.0293,...,88.0,69.0,148.0,114.0,116.0,83.0,140.0,131.0,135.0,115.0
2,0.0562,0.0024,0.0024,0.0024,0.0024,0.0049,0.0024,0.0391,0.0024,0.0244,...,88.0,69.0,148.0,114.0,116.0,83.0,140.0,131.0,135.0,115.0
3,0.0562,0.0024,0.0024,0.0024,0.0024,0.0049,0.0024,0.0342,0.0024,0.0171,...,88.0,69.0,148.0,114.0,116.0,83.0,140.0,131.0,135.0,115.0
4,0.0488,0.0024,0.0024,0.0024,0.0024,0.0024,0.0024,0.0366,0.0024,0.0146,...,88.0,69.0,148.0,114.0,116.0,83.0,140.0,131.0,135.0,115.0


In [None]:
# Shuffle dataframe
np.random.seed(42)
data = data.sample(frac=1)
data.head()

**Predicting joint angles for each sensor for all subjects and all exercises combined (all data) - without Averaging and without PCA**

In [9]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

rfr = RandomForestRegressor()

np.random.seed(42)

model_scores_training = []
model_scores_test = []

X = emg_df

for test_sensor in np.arange(0,22):

    # Split data into X & y
    y = glove_df[test_sensor]
    print(y.shape)
    # Split into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    print(X_train.shape)
    # Fit the model 
    rfr.fit(X_train, y_train)
    print("Fitted")
    training_score = round(rfr.score(X_train, y_train), 3)
    test_score = round(rfr.score(X_test, y_test), 3)

    model_scores_training.append(training_score)
    model_scores_test.append(test_score)

    print(f"Training Score for sensor {test_sensor + 1} for all subjects and all exercises is {training_score}")
    print(f"Test Score for sensor {test_sensor + 1} for all subjects and all exercises is {test_score}")
    print("------------------------------------------------------------------------------------------------------")
    
print(f"Average Training Score for all sensors is {np.mean(model_scores_training)}")
print(f"Average Test Score for all sensors is {np.mean(model_scores_test)}")

(12553611,)
(10042888, 10)


KeyboardInterrupt: 

**Predicting Joint Angles each sensor using all data and PCA of 0.95 (all exercises for all subjects) - without averaging:**

In [None]:
emg_df.shape , glove_df.shape

In [None]:
from sklearn.decomposition import PCA

np.random.seed(42)

pca = PCA(n_components=0.95)
emg_df_transformed = pca.fit_transform(emg_df)
print(emg_df_transformed.shape, glove_df.shape)

In [None]:
from sklearn.ensemble import RandomForestRegressor

rfr = RandomForestRegressor()

np.random.seed(42)

model_scores_training = []
model_scores_test = []

X = emg_df_transformed

for test_sensor in np.arange(0,22):


    # Split data into X & y
    y = glove_df[test_sensor]

    # Split into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Fit the model 
    rfr.fit(X_train, y_train)

    training_score = round(rfr.score(X_train, y_train), 3)
    test_score = round(rfr.score(X_test, y_test), 3)

    model_scores_training.append(training_score)
    model_scores_test.append(test_score)

    print(f"Training Score for sensor {test_sensor + 1} for all subjects and all exercises is {training_score}")
    print(f"Test Score for sensor {test_sensor + 1} for all subjects and all exercises is {test_score}")
    print("------------------------------------------------------------------------------------------------------")
    
print(f"Average Training Score for all sensors is {np.mean(model_scores_training)}")
print(f"Average Test Score for all sensors is {np.mean(model_scores_test)}")

**Predicting Joint Angles each sensor using all data (all exercises for all subjects) - WITH averaging:**

In [None]:
emg_df.shape , glove_df.shape