# Classifying all movements in exercise 1 for subject 4

### Basic imports and helper functions

In [72]:
# Imports
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat

In [73]:
def load_data_DB1(subject,exercise):
    data = loadmat(f'../Dataset/DB1/s{subject}/S{subject}_A1_E{exercise}.mat')
    return data

In [74]:
def get_start_indexes(data,movement,target):
    start_indexes=[]
    for idx,move in enumerate(data[target]):
        if move[0] == movement and data[target][idx-1][0] == 0:
            start_indexes.append(idx)
        if len(start_indexes) == 10:
            break
    return start_indexes

In [75]:
def get_movement_df(data,movement,target):
    start_indexes = get_start_indexes(data,movement,target)
    df= pd.DataFrame()
    step = 500 if target == 'stimulus' else 200
    for i in start_indexes:
        new_row = data['emg'][i:i+step].flatten()
        df = df.append(pd.Series(new_row),ignore_index=True)
    target_movement = np.full((10,1),movement,dtype=int)
    df['target_movement'] = target_movement
    return df

### Classification of all movements in exercise 1 for subject 4 based on the stimulus column concatenating 500 rows together using RandomForestClassifier

In [97]:
# Import data for subject 4 e11xercise 1
sub4_ex1 = load_data_DB1(subject=20,exercise=1)

In [98]:
from sklearn.ensemble import RandomForestClassifier

# Add all movements in exercise 1 to a list - Exercise 1 has 12 movements
movements_ex1 = []
for m in range(1,13):
    movements_ex1.append(get_movement_df(data=sub4_ex1,movement=m,target="stimulus"))

In [99]:
start_idxes = get_start_indexes(data=sub4_ex1, movement=1,target="stimulus")
start_idxes

[325, 1165, 2005, 2845, 3686, 4526, 5366, 6206, 7047, 7887]

In [100]:
sub4_ex1['emg'][321+499] , sub4_ex1['stimulus'][321+499]

(array([0.957 , 0.0269, 0.0024, 0.0024, 0.0024, 0.0562, 0.4858, 0.6543,
        0.0024, 0.1782]),
 array([1], dtype=uint8))

In [101]:
# Create a dataframe with all data in exercise 1
data = pd.concat(movements_ex1,ignore_index=True)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4991,4992,4993,4994,4995,4996,4997,4998,4999,target_movement
0,0.0952,0.0024,0.0024,0.0024,0.0024,0.0024,0.0122,0.0757,0.0024,0.0073,...,0.0098,0.0024,0.0024,0.0024,0.0366,0.4297,0.6152,0.0024,0.1367,1
1,0.4932,0.083,0.0024,0.0024,0.0024,0.0317,0.4248,0.4639,0.0024,0.0903,...,0.0049,0.0024,0.0024,0.0024,0.0073,0.4053,0.4443,0.0024,0.1172,1
2,0.4297,0.0146,0.0024,0.0024,0.0024,0.0122,0.4028,0.4321,0.0024,0.144,...,0.0024,0.0024,0.0024,0.0024,0.0024,0.2075,0.21,0.0024,0.0513,1
3,0.415,0.0024,0.0024,0.0024,0.0024,0.0073,0.4126,0.3369,0.0024,0.061,...,0.0073,0.0024,0.0024,0.0024,0.0073,0.3442,0.3857,0.0024,0.0562,1
4,0.5908,0.0024,0.0024,0.0024,0.0024,0.0024,0.2808,0.5298,0.0024,0.0586,...,0.0024,0.0024,0.0024,0.0024,0.0122,0.498,0.3003,0.0024,0.1514,1


In [102]:
# Creating X & y for the model
X = data.drop("target_movement", axis=1)
y = data["target_movement"]

In [103]:
# Trying RandomForest with 10 different random states

from sklearn.model_selection import train_test_split

# Try different random states 
random_states = [0,1,42,101,500,63,84,36,111,99]
training_accuracies = []
test_accuracies = []
for r in random_states:
    
    # Split the data 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=r)
    # Fit RandomForestClassifierModel model 
    clf = RandomForestClassifier(n_estimators=100)
    clf.fit(X_train, y_train)
    training_accuracies.append(clf.score(X_train,y_train))
    test_accuracies.append(clf.score(X_test,y_test))

# Average Accuracies for training and test sets
print(f"Average Training Accuracy: {np.sum(training_accuracies)/len(training_accuracies) :.2f}")
print(f"Average Test Accuracy: {np.sum(test_accuracies)/len(test_accuracies):.2f}")

Average Training Accuracy: 1.00
Average Test Accuracy: 0.85


### Classification of all movements in exercise 1 for subject 4 based on the restimulus column concatenating 200 rows together using RandomForestClassifier

In [104]:
from sklearn.ensemble import RandomForestClassifier

# Add all movements in exercise 1 to a list based on restimulus column- Exercise 1 has 12 movements
movements_ex1 = []
for m in range(1,13):
    movements_ex1.append(get_movement_df(data=sub4_ex1,movement=m,target="restimulus"))



In [105]:
# Create a dataframe with all data in exercise 1
data = pd.concat(movements_ex1,ignore_index=True)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1991,1992,1993,1994,1995,1996,1997,1998,1999,target_movement
0,0.2417,0.0415,0.0244,0.0073,0.0024,0.0073,0.1172,0.1416,0.0146,0.083,...,0.2246,0.0781,0.0171,0.0024,0.0293,0.5176,0.9595,0.1025,0.1807,1
1,0.7007,0.105,0.0024,0.0024,0.0049,0.0146,0.5273,0.5518,0.0024,0.1489,...,0.9131,0.3931,0.144,0.144,0.3784,3.042,1.1255,0.4004,0.7642,1
2,0.3955,0.0171,0.0024,0.0024,0.0049,0.0073,0.2979,0.3198,0.0024,0.1221,...,0.9351,0.3589,0.127,0.1074,0.3003,2.688,1.1768,0.2905,0.6543,1
3,0.4517,0.0854,0.0024,0.0024,0.0024,0.022,0.5518,0.3638,0.0073,0.0928,...,0.9302,0.3271,0.1416,0.1343,0.3711,2.8101,1.1938,0.2686,0.813,1
4,0.5396,0.0171,0.0049,0.0049,0.0024,0.0171,0.4614,0.5371,0.0024,0.1196,...,0.9717,0.3174,0.1196,0.1074,0.2588,2.1753,1.0107,0.2759,0.4663,1


In [106]:
# Creating X & y for the model
X = data.drop("target_movement", axis=1)
y = data["target_movement"]

In [107]:
# Trying single random state

# Split the data 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Fit RandomForestClassifierModel model 
clf = RandomForestClassifier(n_estimators=100,max_features=10,random_state=66)
clf.fit(X_train, y_train)

print(f"Average Training Accuracy: {clf.score(X_train, y_train) :.3f}")
print(f"Average Test Accuracy: {clf.score(X_test, y_test) :.3f}")


Average Training Accuracy: 1.000
Average Test Accuracy: 0.875


In [108]:
# Trying RandomForest on 200 rows of restimulus 
from sklearn.model_selection import train_test_split

# Try different random states 
random_states = [0,1,42,101,500,63,84,36,111,99]
training_accuracies = []
test_accuracies = []
for r in random_states:
    
    # Split the data 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=r)
    # Fit RandomForestClassifierModel model 
    clf = RandomForestClassifier(n_estimators=100,max_features=10,random_state=10)
    clf.fit(X_train, y_train)
    training_accuracies.append(clf.score(X_train,y_train))
    test_accuracies.append(clf.score(X_test,y_test))

# Average Accuracies for training and test sets
print(f"Average Training Accuracy: {np.sum(training_accuracies)/len(training_accuracies)}")
print(f"Average Test Accuracy: {np.sum(test_accuracies)/len(test_accuracies)}")

Average Training Accuracy: 1.0
Average Test Accuracy: 0.8708333333333333


### Classification of all movements in exercise 1 for subject 4 based on the restimulus column concatenating real movement rows together using RandomForestClassifier

#### Concatenate rows of emg signal based on the whole refined label of movement restimluls column (not just 200 rows)

In [109]:
# Iterate over the 12 movements in exercise 1
data = pd.DataFrame() # Creating a list to fit 10 repetitions of 12 movements
for m in range(1,13):
    # Get restimulus start indexes of all repetitions in each movement
    start_indices = get_start_indexes(data=sub4_ex1,movement=m, target="restimulus")
    
    # For each repetition, concatenate as long as the restimulus is the same capturing the whole movement
    for rep_idx in start_indices:
        last_rep_idx = rep_idx
        while sub4_ex1["restimulus"][last_rep_idx] == m:
            last_rep_idx +=1
        current_rep_list = sub4_ex1["emg"][rep_idx:last_rep_idx].flatten()
        current_rep_list = current_rep_list.reshape((1, len(current_rep_list)))
        current_rep_df = pd.DataFrame(current_rep_list)
        current_rep_df["target"] = m
        data = data.append(current_rep_df,ignore_index=True)


In [110]:
# Re-order the column names to have target column at the end
column_names = list(range(0,5680))
column_names.append("target")
data = data.reindex(columns=column_names)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5671,5672,5673,5674,5675,5676,5677,5678,5679,target
0,0.2417,0.0415,0.0244,0.0073,0.0024,0.0073,0.1172,0.1416,0.0146,0.083,...,,,,,,,,,,1
1,0.7007,0.105,0.0024,0.0024,0.0049,0.0146,0.5273,0.5518,0.0024,0.1489,...,,,,,,,,,,1
2,0.3955,0.0171,0.0024,0.0024,0.0049,0.0073,0.2979,0.3198,0.0024,0.1221,...,,,,,,,,,,1
3,0.4517,0.0854,0.0024,0.0024,0.0024,0.022,0.5518,0.3638,0.0073,0.0928,...,,,,,,,,,,1
4,0.5396,0.0171,0.0049,0.0049,0.0024,0.0171,0.4614,0.5371,0.0024,0.1196,...,,,,,,,,,,1


###### Fitting the all signals to the least signal length (capture the first parts of the signal only)

In [111]:
d = data.dropna(axis=1)
d

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1821,1822,1823,1824,1825,1826,1827,1828,1829,target
0,0.2417,0.0415,0.0244,0.0073,0.0024,0.0073,0.1172,0.1416,0.0146,0.0830,...,0.3101,0.0952,0.0464,0.0342,0.0781,0.7422,1.0034,0.1099,0.2490,1
1,0.7007,0.1050,0.0024,0.0024,0.0049,0.0146,0.5273,0.5518,0.0024,0.1489,...,0.9082,0.4834,0.2124,0.1953,0.4858,2.9370,1.3428,0.3931,0.8496,1
2,0.3955,0.0171,0.0024,0.0024,0.0049,0.0073,0.2979,0.3198,0.0024,0.1221,...,0.9375,0.2783,0.1196,0.1221,0.3442,2.6221,1.0889,0.3320,0.7056,1
3,0.4517,0.0854,0.0024,0.0024,0.0024,0.0220,0.5518,0.3638,0.0073,0.0928,...,1.0840,0.3857,0.1343,0.1367,0.3711,2.9883,1.1694,0.2417,0.6982,1
4,0.5396,0.0171,0.0049,0.0049,0.0024,0.0171,0.4614,0.5371,0.0024,0.1196,...,0.6836,0.3003,0.0879,0.0757,0.2295,1.9434,0.9277,0.2539,0.4492,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,0.0024,0.0024,0.0024,0.0024,0.0024,0.0024,0.0635,0.0537,0.0024,0.0049,...,0.0049,0.0024,0.0024,0.0024,0.0220,0.1587,0.5273,0.0146,0.2344,12
116,0.0317,0.0024,0.0024,0.0024,0.0024,0.0049,0.1538,0.1074,0.0024,0.0073,...,0.0146,0.0049,0.0024,0.0024,0.0244,0.2051,0.7715,0.0732,0.2637,12
117,0.0024,0.0024,0.0024,0.0024,0.0049,0.0024,0.0854,0.0732,0.0024,0.0098,...,0.0049,0.0049,0.0024,0.0024,0.0269,0.1929,0.5786,0.1318,0.2319,12
118,0.0024,0.0024,0.0024,0.0024,0.0024,0.0024,0.1074,0.0781,0.0024,0.0122,...,0.0024,0.0024,0.0024,0.0024,0.0293,0.2612,0.5225,0.2051,0.2661,12


In [112]:
X = d.drop("target",axis=1)
y = d["target"]

In [113]:
from sklearn.model_selection import train_test_split

# Try different random states 
random_states = [0,1,42,101,500,63,84,36,111,99]
training_accuracies = []
test_accuracies = []
for r in random_states:
    
    # Split the data 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=r)
    # Fit RandomForestClassifierModel model 
    clf = RandomForestClassifier(n_estimators=100)
    clf.fit(X_train, y_train)
    training_accuracies.append(clf.score(X_train,y_train))
    test_accuracies.append(clf.score(X_test,y_test))

# Average Accuracies for training and test sets
print(f"Average Training Accuracy: {np.sum(training_accuracies)/len(training_accuracies)}")
print(f"Average Test Accuracy: {np.sum(test_accuracies)/len(test_accuracies)}")

Average Training Accuracy: 1.0
Average Test Accuracy: 0.8583333333333332


###### Imputing the data by filling missing values using bfill and ffill

In [114]:
data_fill = data.fillna(method="bfill",axis=0).fillna(method="ffill",axis=0)
data_fill["target"].value_counts()

1     10
2     10
3     10
4     10
5     10
6     10
7     10
8     10
9     10
10    10
11    10
12    10
Name: target, dtype: int64

In [115]:
# Creating X & y for the model
X = data_fill.drop("target", axis=1)
y = data_fill["target"]

In [116]:
from sklearn.model_selection import train_test_split
from sklearn import svm
# Try different random states 
random_states = [0,1,42,101,500,63,84,36,111,99]
training_accuracies = []
test_accuracies = []
for r in random_states:
    # Split the data 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=r)
    # Fit RandomForestClassifierModel model 
    clf = RandomForestClassifier(n_estimators=100)
    clf.fit(X_train, y_train)
    training_accuracies.append(clf.score(X_train,y_train))
    test_accuracies.append(clf.score(X_test,y_test))

# Average Accuracies for training and test sets
print(f"Average Training Accuracy: {np.sum(training_accuracies)/len(training_accuracies)}")
print(f"Average Test Accuracy: {np.sum(test_accuracies)/len(test_accuracies)}")

ValueError: Input contains NaN, infinity or a value too large for dtype('float32').