In [0]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
from pandas import read_csv
import multiprocessing
import os
import matplotlib.pyplot as plt
import seglearn
from sys import getsizeof
from scipy.stats import entropy, iqr, kurtosis
from scipy.stats.stats import pearsonr
from IPython.display import display
from ipywidgets import IntProgress
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVC
from scipy.fftpack import fft
import random
import sys

In [0]:
project_dir = "~/School/bbdc/"

## Load trained labels

In [0]:
labels = read_csv(project_dir+"train/labels.train.csv", header=None)
labels.columns = ["Subject", "Start", "End", "Action"]
print(labels.shape)
labels.head()

In [0]:
filenames = sorted(list(set([x.split(".")[0] for x in labels.Subject])))
filenames_test = ["s06t01","s06t02","s06t03","s06t04","s06t05"]

In [0]:
label_mapping = {
    "la-object-pick": 0,
    "ra-object-pick": 0,
    "la-object-carry": 1,
    "ra-object-carry": 1,
    "la-object-place": 2,
    "ra-object-place": 2,
    "la-object-switch-hands": 3,
    "ra-object-switch-hands": 3,
    "la-object-orient": 4,
    "ra-object-orient": 4,
    "la-nothing": 5,
    "ra-nothing": 5
}
inverted_label_mapping_left = {
    0: "la-object-pick",
    1: "la-object-carry",
    2: "la-object-place",
    3: "la-object-switch-hands",
    4: "la-object-orient",
    5: "la-nothing"
}
inverted_label_mapping_right = {
    0: "ra-object-pick",
    1: "ra-object-carry",
    2: "ra-object-place",
    3: "ra-object-switch-hands",
    4: "ra-object-orient",
    5: "ra-nothing"
}
emg_positions = ["fa-o-t-r","fa-i-t-r","fa-i-b-r","fa-o-b-r","fa-o-t-l","fa-i-t-l","fa-i-b-l","fa-o-b-l"]

## Match the EMG measurement to the corresponding labels

### -     Overwrites the old .emg.csv files with new ones containing the action labels for all timestamps 
### -     Removes `55,885` rows that don't have a corresponding action label



In [0]:
f = IntProgress(min=0, max=len(filenames)-1) # instantiate the bar
display(f) # display the bar

for fname in filenames:
    fname_base = fname.split(".")[0]
    # if not fname_base == "s01t01":
    #     break
    fname = fname_base + ".emg.csv"
    print(fname, fname.split(".")[0])
    emg_sample = read_csv(project_dir+"train/emg/"+fname)
    print("found",emg_sample.shape[0], "measurements")
    actions_for_file = labels[labels.Subject.isin([fname_base+".la", 
                                                                                                 fname_base+".ra"])]

    print("Found", actions_for_file.shape[0], "actions for file") 

    actions_for_la_for_file = actions_for_file[actions_for_file.Subject == fname_base+".la"]
    actions_for_ra_for_file = actions_for_file[actions_for_file.Subject == fname_base+".ra"]
    print("l:", actions_for_la_for_file.shape[0], 
                "r:", actions_for_ra_for_file.shape[0])

    actions_la, actions_ra = [],[]

    # Get the left actions for each time stamp
    for i,label in actions_for_la_for_file.iterrows():
        measurements_for_ith_label = emg_sample[emg_sample.ts < label.End][emg_sample.ts >= label.Start]
        new_values = measurements_for_ith_label.shape[0]*[label.Action]
        actions_la += new_values
        # print("Left",len(new_values), "measurements", "for time", label.End)
    print("finished left going on right")
    # Get the right actions for each time stamp
    for i,label in actions_for_ra_for_file.iterrows():
        measurements_for_ith_label = emg_sample[emg_sample.ts < label.End][emg_sample.ts >= label.Start]
        new_values = measurements_for_ith_label.shape[0]*[label.Action]
        actions_ra += new_values
        # print("Right",len(new_values), "measurements", "for time", label.End)

    print("Total", "Left:", len(actions_la), "Right:", len(actions_ra))
    if (len(actions_la)!=len(actions_ra)):
        print("****** Left labels and Right labels not equal ********")
    print("No labels for", emg_sample.shape[0]-len(actions_ra), "samples")
    actions_la = pd.DataFrame(actions_la, columns=["la"])
    actions_ra = pd.DataFrame(actions_ra, columns=["ra"])

    new_file = pd.concat([emg_sample.iloc[:actions_la.shape[0]][:], actions_la, actions_ra], axis=1)
    print(new_file.shape)
    new_file.to_csv(project_dir+"train/emg/"+fname, index = False)
    print("------ Saved new version ------")
    f.value += 1
            

## Match the MoCap measurement to the corresponding labels

### -     Overwrites the old .mocap.csv files with new ones containing the action labels for all timestamps 
### -     Removes `X` rows that don't have a corresponding action label



In [0]:
f = IntProgress(min=0, max=len(filenames)-1) # instantiate the bar
display(f) # display the bar

for fname in filenames:
    fname_base = fname.split(".")[0]
    # if not fname_base == "s01t01":
    #     break
    fname = fname_base + ".mocap.csv"
    print(fname, fname.split(".")[0])
    mocap_sample = read_csv(project_dir+"train/mocap/"+fname)
    mocap_sample = mocap_sample.interpolate()
    print("found",mocap_sample.shape[0], "measurements")
    actions_for_file = labels[labels.Subject.isin([fname_base+".la", 
                                                                                                 fname_base+".ra"])]

    print("Found", actions_for_file.shape[0], "actions for file") 

    actions_for_la_for_file = actions_for_file[actions_for_file.Subject == fname_base+".la"]
    actions_for_ra_for_file = actions_for_file[actions_for_file.Subject == fname_base+".ra"]
    print("l:", actions_for_la_for_file.shape[0], 
                "r:", actions_for_ra_for_file.shape[0])

    actions_la, actions_ra = [],[]

    # Get the left actions for each time stamp
    for i,label in actions_for_la_for_file.iterrows():
        measurements_for_ith_label = mocap_sample[mocap_sample.ts < label.End][mocap_sample.ts >= label.Start]
        new_values = measurements_for_ith_label.shape[0]*[label.Action]
        actions_la += new_values
        # print("Left",len(new_values), "measurements", "for time", label.End)
    print("finished left going on right")
    # Get the right actions for each time stamp
    for i,label in actions_for_ra_for_file.iterrows():
        measurements_for_ith_label = mocap_sample[mocap_sample.ts < label.End][mocap_sample.ts >= label.Start]
        new_values = measurements_for_ith_label.shape[0]*[label.Action]
        actions_ra += new_values
        # print("Right",len(new_values), "measurements", "for time", label.End)

    print("Total", "Left:", len(actions_la), "Right:", len(actions_ra))
    if (len(actions_la)!=len(actions_ra)):
        print("****** Left labels and Right labels not equal ********")
    print("No labels for", mocap_sample.shape[0]-len(actions_ra), "samples")
    actions_la = pd.DataFrame(actions_la, columns=["la"])
    actions_ra = pd.DataFrame(actions_ra, columns=["ra"])

    new_file = pd.concat([mocap_sample.iloc[:actions_la.shape[0]][:], actions_la, actions_ra], axis=1)
    print(new_file.shape)
    new_file.to_csv(project_dir+"train/mocap/"+fname, index = False)
    print("------ Saved new version ------")
    f.value += 1
            

## Load all EMG data

In [0]:
def load_all_emg_data(filenames,train=True):
    emg_datasets = []
    if train:
        subdir = "train/"
    else:
        subdir = "test/"
    for i in range(len(filenames)):
        if (i * 100) // len(filenames) % 10 == 0:
            print(str(i * 100 // len(filenames))+"%")
        emg_datasets.append(pd.read_csv(project_dir+subdir+"emg/"+filenames[i]+".emg.csv"))

    emg_df = pd.concat(emg_datasets, ignore_index=True)
    print(emg_df.shape)
    return emg_df


## Load all MoCap data

In [0]:
def load_all_mocap_data(filenames,train=True):
    mocap_datasets = []
    if train:
        subdir = "train/"
    else:
        subdir = "test/"
    for i in range(len(filenames)):
        if (i * 100) // len(filenames) % 10 == 0:
            print(str(i * 100 // len(filenames))+"%")
        mocap_datasets.append(pd.read_csv(project_dir+subdir+"mocap/"+filenames[i]+".mocap.csv"))

    mocap_df = pd.concat(mocap_datasets, ignore_index=True)
    print(mocap_df.shape)
    return mocap_df


## EMG measurements feature extraction

### Features to be extrated

In [0]:
def entropy1(labels, base=None):
    value,counts = np.unique(labels, return_counts=True)
    return entropy(counts, base=base)

def minimum(inputs):
     return inputs.min()

def maximum(inputs):
    return inputs.max()

def min_max_diff(inputs):
    return inputs.max() - inputs.min()

def variance(inputs):
    return inputs.std()

def mAverage(inputs):
    return inputs.mean()

def root_mean_square(inputs):
    return np.sqrt(np.mean(np.array(inputs)**2))

def inter_quartile_range(inputs):
    return iqr(inputs, axis=0)

def third_quartile(inputs):
    return np.percentile(inputs, 75, axis=0)

def kurt(inputs):
    return kurtosis(inputs)

def mPearsonCorrelation(first, second):
    if (len(first) < 2) or (len(second) < 2):
        return 0
    val = pearsonr(first, second)[0]
    if np.isnan(val):
        return 0.5
    else:
        return (val+1)/2
features_to_extract = [entropy1,
                     minimum,
                     maximum,
                     min_max_diff,
                     variance,
                     mAverage,
                     root_mean_square,
                     inter_quartile_range,
                     third_quartile,
                     kurt]

In [0]:
def extract_features(emg_measures_with_labels, side=None):
    extracted_features = []
    label_mapped = None
    if side:
        x = emg_measures_with_labels[side]
        label_mapped = label_mapping[x.iloc[0]]
    for emg_position in emg_positions:
        measurements = emg_measures_with_labels[emg_position]
        for feature in features_to_extract:
            value = feature(np.array(measurements))
            extracted_features.append(value)
    for i in range(len(emg_positions)):
        for j in range(i+1, len(emg_positions)):
            emg_position1 = emg_positions[i]
            emg_position2 = emg_positions[j]
            value = mPearsonCorrelation(emg_measures_with_labels[emg_position1], 
                                                                    emg_measures_with_labels[emg_position2])
            extracted_features.append(value)
    extracted_features.append(emg_measures_with_labels.shape[0])
    return extracted_features, label_mapped

In [0]:
def learn_model(X,y,n_splits=4,hidden_layer_sizes=(40,40),model="nn"):
    kf = KFold(n_splits=n_splits)
    train_mses = []
    test_mses = []
    classifiers = []
    alphas = np.linspace(0.00001, 1, n_splits)
    index = 0
    plot_label = ""
    for train_index, test_index in kf.split(X):
        X_train = X[train_index]
        X_test = X[test_index]
        y_train = y[train_index]
        y_test = y[test_index]
        if model == "nn":
            clf = MLPClassifier(solver='lbfgs', alpha=alphas[index],hidden_layer_sizes=hidden_layer_sizes, random_state=1,max_iter=1000)
            plot_label = "Neural Network " + str(hidden_layer_sizes)
        elif model == "svm":
            clf = SVC(gamma='auto')
            plot_label = "SVM"
        clf.fit(X_train, y_train)
        classifiers.append(clf)
        y_pred_train = clf.predict(X_train)
        train_mse = accuracy_score(y_train, y_pred_train)
        y_pred_test = clf.predict(X_test)
        test_mse = accuracy_score(y_test, y_pred_test)
        train_mses.append(train_mse)
        test_mses.append(test_mse)
        index += 1
    plt.plot(train_mses, "r")
    plt.plot(test_mses, "y")
    plt.title(plot_label)
    print(train_mses)
    print(test_mses)
    plt.show()
    return classifiers[np.argmin(np.array(test_mse))]

In [0]:
def extract_features_by_window(emg_measures_with_labels, train=True):
    extracted_features = []

    left_label_mapped, right_label_mapped = None, None
    if train:
        left_labels = list(emg_measures_with_labels["la"])
        right_labels = list(emg_measures_with_labels["ra"])
        left_label = max(left_labels,key=left_labels.count)
        right_label = max(right_labels,key=right_labels.count)
        left_label_mapped = label_mapping[left_label]
        right_label_mapped = label_mapping[right_label]

    for emg_position in emg_positions:
        measurements = emg_measures_with_labels[emg_position]
        for feature in features_to_extract:
            value = feature(np.array(measurements))
            extracted_features.append(value)

    for i in range(len(emg_positions)):
        for j in range(i+1, len(emg_positions)):
            emg_position1 = emg_positions[i]
            emg_position2 = emg_positions[j]
            value = mPearsonCorrelation(emg_measures_with_labels[emg_position1], 
                                                                    emg_measures_with_labels[emg_position2])
            extracted_features.append(value)

    return extracted_features, left_label_mapped, right_label_mapped

In [0]:
emg_df = load_all_emg_data(filenames=filenames)
sys.getsizeof(emg_df)/1000000000

In [0]:
mocap_df = load_all_mocap_data(filenames=filenames)
mocap_df.shape

## Data Exploration

In [0]:
fig, axs = plt.subplots(4, 2)
fig= plt.figure(figsize=(20.5,14.5))
times = emg_df["ts"].values
new_reading = np.argwhere(times==0)
plt.rcParams.update({'font.size': 24})
for i in range(8):
    values = emg_df.iloc[:,i+1]
    l = ax.figure.subplotpars.left
    r = ax.figure.subplotpars.right
    t = ax.figure.subplotpars.top
    b = ax.figure.subplotpars.bottom
    figw = float(20)/(r-l)
    figh = float(8)/(t-b)
    axs[i//2,i%2].figure.set_size_inches(figw, figh)
    axs[i//2,i%2].set_title(emg_df.columns[i+1])
    axs[i//2,i%2].get_xaxis().set_visible(False)
    axs[i//2,i%2].get_yaxis().set_visible(False)

    for j in new_reading:
        axs[i//2,i%2].axvline(x=j,color='r')
    axs[i//2,i%2].plot(values)

# Without Window Sampling

## Extract features for left and right actions

In [0]:

left_change_indexes = emg_df["la"].ne(emg_df["la"].shift())[emg_df["la"].ne(emg_df["la"].shift()) == True].index.values
right_change_indexes = emg_df["ra"].ne(emg_df["ra"].shift())[emg_df["ra"].ne(emg_df["ra"].shift()) == True].index.values
print("Found", len(left_change_indexes), "left action changes and", len(right_change_indexes), "right action changes")

left_action_features = []
right_action_features = []
left_action_labels = []
right_action_labels = []
left_action_timestamp = []
right_action_timestamp = []

f = IntProgress(min=0, max=len(left_change_indexes)-1) # instantiate the bar
display(f) # display the bar

# Extract features from measurements with the same left action label
for i in range(len(left_change_indexes)):
    f.value += 1
    start_index = left_change_indexes[i]
    if (i == len(left_change_indexes)-1):
        end_index = emg_df.shape[0]-1
    else:
        end_index = left_change_indexes[i+1]
    left_action_timestamp.append([emg_df.iloc[start_index]["ts"], emg_df.iloc[end_index]["ts"]])
    extracted_features, label_mapped = extract_features(emg_df.iloc[start_index:end_index], "la")
    left_action_features.append(extracted_features)
    left_action_labels.append(label_mapped)

print("Finished extracting features for left actions going on to right")

f1 = IntProgress(min=0, max=len(right_change_indexes)-1) # instantiate the bar
display(f1) # display the bar
# Extract features from measurements with the same right action label
for i in range(len(right_change_indexes)):
    f1.value += 1
    start_index = right_change_indexes[i]
    if (i == len(right_change_indexes)-1):
        end_index = emg_df.shape[0]-1
    else:
        end_index = right_change_indexes[i+1]
    right_action_timestamp.append([emg_df.iloc[start_index]["ts"], emg_df.iloc[end_index]["ts"]])
    extracted_features, label_mapped = extract_features(emg_df.iloc[start_index:end_index], "ra")
    right_action_features.append(extracted_features)
    right_action_labels.append(label_mapped)



## Parse extracted features

In [0]:
print(len(left_action_features[0]))

header = [["entropy1-"+x,
                "minimum-"+x,
                "maximum-"+x,
                "min_max_diff-"+x,
                "variance-"+x,
                "mAverage-"+x,
                "root_mean_square-"+x,
                "inter_quartile_range-"+x,
                "third_quartile-"+x,
                "kurt-"+x] for x in emg_positions]

header = list(np.array(header).flatten())
header += ["corr"+str(x+1) for x in range(28)]
header.append("size")

left_action_features = pd.DataFrame(left_action_features, columns=header)
right_action_features = pd.DataFrame(right_action_features, columns=header)
left_action_labels = pd.DataFrame(left_action_labels, columns=["labels"])
right_action_labels = pd.DataFrame(right_action_labels, columns=["labels"])
left_action_timestamp = pd.DataFrame(left_action_timestamp, columns=["start_time", "end_time"])
right_action_timestamp = pd.DataFrame(right_action_timestamp, columns=["start_time", "end_time"])

display(left_action_features)
display(right_action_features)
display(left_action_labels)
display(right_action_labels)
display(left_action_timestamp)
display(right_action_timestamp)

In [0]:
action_values = left_action_labels.index[left_action_labels.labels==3]
plt.plot(left_action_features.loc[action_values,"min_max_diff-fa-o-t-r"],"b.")
plt.plot(left_action_features.loc[action_values,"min_max_diff-fa-o-t-l"],"r.")


In [0]:
true_labels_left = pd.DataFrame(left_action_labels["labels"].map(inverted_label_mapping_left))
true_labels_right = pd.DataFrame(right_action_labels["labels"].map(inverted_label_mapping_right))

### Offset negative values and normalize

In [0]:
for columnName, column in left_action_features.iteritems():
    if any(column<0):
        column += abs(min(column))
    left_action_features[columnName] = column / column.max()

for columnName, column in right_action_features.iteritems():
    if any(column<0):
        column += abs(min(column))
    right_action_features[columnName] = column / column.max()


### Output files

In [0]:
pd.concat([left_action_features,left_action_labels,true_labels_left,left_action_timestamp], axis=1).to_csv(project_dir+"train/left_extracted.csv",index=None)
pd.concat([right_action_features,right_action_labels,true_labels_left,right_action_timestamp], axis=1).to_csv(project_dir+"train/right_extracted.csv",index=None)

## Train models after PCA

In [0]:
pca = PCA(.9)
pca_left_action_features = pca.fit_transform(left_action_features)
pca_right_action_features = pca.fit_transform(right_action_features)

X = pca_left_action_features
y = left_action_labels.values
left_classifier_nn = learn_model(X,y,n_splits=8,hidden_layer_sizes=(20))
left_classifier_svm = learn_model(X,y,model="svm")

X = pca_right_action_features
y = right_action_labels.values
right_classifier_nn = learn_model(X,y,n_splits=8,hidden_layer_sizes=(20))
right_classifier_svm = learn_model(X,y,model="svm")

# With Window Sampling

In [0]:


action_features_by_window = []
left_action_labels_by_window = []
right_action_labels_by_window = []
timestamp_by_window = []

N = emg_df.shape[0]
frequency = 600
window_size = int(0.5 * frequency) # 0.5 second
indexes = list(range(0,N,window_size))
f = IntProgress(min=0, max=len(indexes)-1) # instantiate the bar
display(f) # display the bar

# Extract features from N/window_size measurements with size window_size
for i in range(len(indexes)):
    f.value += 1
    start_index = indexes[i]
    if (i == len(indexes)-1):
        end_index = N-1
    else:
        end_index = indexes[i+1]
    timestamp_by_window.append([emg_df.iloc[start_index]["ts"], emg_df.iloc[end_index]["ts"]])
    extracted_features, left_label_mapped, right_label_mapped = extract_features_by_window(emg_df.iloc[start_index:end_index+1])
    action_features_by_window.append(extracted_features)
    left_action_labels_by_window.append(left_label_mapped)
    right_action_labels_by_window.append(right_label_mapped)


print(len(action_features_by_window), len(action_features_by_window[0]))
print(len(timestamp_by_window), len(timestamp_by_window[0]))


In [0]:
print(type(action_features_by_window))

In [0]:
print(len(action_features_by_window), len(action_features_by_window[0]))
print(len(timestamp_by_window), len(timestamp_by_window[0]))
header = [["entropy1-"+x,
                "minimum-"+x,
                "maximum-"+x,
                "min_max_diff-"+x,
                "variance-"+x,
                "mAverage-"+x,
                "root_mean_square-"+x,
                "inter_quartile_range-"+x,
                "third_quartile-"+x,
                "kurt-"+x] for x in emg_positions]

header = list(np.array(header).flatten())
header += ["corr"+str(x+1) for x in range(28)]
print(len(header))

action_features_by_window = pd.DataFrame(action_features_by_window, columns=header)
left_action_labels_by_window = pd.DataFrame(left_action_labels_by_window, columns=["labels"])
right_action_labels_by_window = pd.DataFrame(right_action_labels_by_window, columns=["labels"])
timestamp_by_window = pd.DataFrame(timestamp_by_window, columns=["start_time", "end_time"])

display(action_features_by_window)
display(left_action_labels)
display(right_action_labels)
display(timestamp_by_window)

In [0]:
true_labels_left_by_window = pd.DataFrame(left_action_labels["labels"].map(inverted_label_mapping_left))
true_labels_right_by_window = pd.DataFrame(right_action_labels["labels"].map(inverted_label_mapping_right))

In [0]:
pd.concat([action_features_by_window, left_action_labels_by_window, right_action_labels_by_window, true_labels_left_by_window, true_labels_right_by_window, timestamp_by_window], axis=1).to_csv(project_dir+"train/emg_feature_extracted_by_window_300.csv",index=None)

In [0]:
for columnName, column in action_features_by_window.iteritems():
    if any(column<0):
        column += abs(min(column))
    action_features_by_window[columnName] = column / column.max()

In [0]:
pca = PCA(.9,)
pca_action_features_by_window = pca.fit_transform(action_features_by_window)
print(pca.explained_variance_ratio_)
print(pca.singular_values_)
print(pca_action_features_by_window.shape)
X = pca_action_features_by_window
y = left_action_labels_by_window.values
left_classifier_nn = learn_model(X,y,n_splits=8,hidden_layer_sizes=(20))
left_classifier_svm = learn_model(X,y,model="svm")
y = right_action_labels_by_window.values
right_classifier_nn = learn_model(X,y,n_splits=8,hidden_layer_sizes=(20))
right_classifier_svm = learn_model(X,y,model="svm")

# Test dataset

In [0]:
emg_df_test = load_all_emg_data(filenames=filenames_test, train=False)

## Autocatically set the action of the first parts of each subject to `la-nothing` and `ra-nothing`
This is because the measurements always start by `nothing` actions

In [0]:
first_rows = [round(random.uniform(4.5,8.5)*600) for x in range(5)]
zero_indexes = emg_df_test.index[emg_df_test["ts"]==0].to_list()

la_column = ["la-nothing"]*first_rows[0] + [np.nan]*(zero_indexes[1]-first_rows[0]-zero_indexes[0]) + \
                        ["la-nothing"]*first_rows[1] + [np.nan]*(zero_indexes[2]-first_rows[1]-zero_indexes[1]) + \
                        ["la-nothing"]*first_rows[2] + [np.nan]*(zero_indexes[3]-first_rows[2]-zero_indexes[2]) + \
                        ["la-nothing"]*first_rows[3] + [np.nan]*(zero_indexes[4]-first_rows[3]-zero_indexes[3]) + \
                        ["la-nothing"]*first_rows[4] + [np.nan]*(emg_df_test.shape[0]-first_rows[4]-zero_indexes[4])

ra_column = ["ra-nothing"]*first_rows[0] + [np.nan]*(zero_indexes[1]-first_rows[0]-zero_indexes[0]) + \
                        ["ra-nothing"]*first_rows[1] + [np.nan]*(zero_indexes[2]-first_rows[1]-zero_indexes[1]) + \
                        ["ra-nothing"]*first_rows[2] + [np.nan]*(zero_indexes[3]-first_rows[2]-zero_indexes[2]) + \
                        ["ra-nothing"]*first_rows[3] + [np.nan]*(zero_indexes[4]-first_rows[3]-zero_indexes[3]) + \
                        ["ra-nothing"]*first_rows[4] + [np.nan]*(emg_df_test.shape[0]-first_rows[4]-zero_indexes[4])
print(len(la_column),len(ra_column))
emg_df_test["la"] = la_column
emg_df_test["ra"] = ra_column
# for i in range(len(first_rows)):
#     emg_df_test["la"][zero_indexes[i]:first_rows[i]] = "la-nothing"
#     emg_df_test["ra"][zero_indexes[i]:first_rows[i]] = "ra-nothing"


## Remove rows already assigned nothing actions

In [0]:
emg_df_test_no_first_rows = emg_df_test[emg_df_test["la"].isnull()].reindex()
emg_df_test_no_first_rows = emg_df_test_no_first_rows.drop(["la", "ra"],axis=1)
display(emg_df_test_no_first_rows.head())
ind = 0
step = 0
indexes_new = []
while ind < emg_df_test_no_first_rows.shape[0]:
    step = round(random.uniform(1.5,3.5)*600)
    ind = min(ind + step, emg_df_test_no_first_rows.shape[0])

    indexes_new.append(ind)


## Extract features for test measurements

In [0]:

extracted_features_test = []
test_time_stamps = []
f = IntProgress(min=0, max=len(indexes_new)-2) # instantiate the bar
display(f) # display the bar

# Extract features from measurements with the same left action label
for i in range(len(indexes_new)-1):
    f.value += 1
    start_index = indexes_new[i]
    end_index = indexes_new[i+1]
    test_time_stamps.append([emg_df_test_no_first_rows.iloc[start_index]["ts"], emg_df_test_no_first_rows.iloc[end_index-1]["ts"]])
    extracted_features, label_mapped = extract_features(emg_df_test_no_first_rows.iloc[start_index:end_index])
    extracted_features_test.append(extracted_features)

## Parse extracted features for test measurements

In [0]:
print(len(extracted_features_test[0]))

header = [["entropy1-"+x,
                "minimum-"+x,
                "maximum-"+x,
                "min_max_diff-"+x,
                "variance-"+x,
                "mAverage-"+x,
                "root_mean_square-"+x,
                "inter_quartile_range-"+x,
                "third_quartile-"+x,
                "kurt-"+x] for x in emg_positions]

header = list(np.array(header).flatten())
header += ["corr"+str(x+1) for x in range(28)]
header.append("size")
print(len(header))

extracted_features_test = pd.DataFrame(extracted_features_test, columns=header)
test_time_stamps = pd.DataFrame(test_time_stamps, columns=["start_time", "end_time"])

display(extracted_features_test)
display(test_time_stamps)


## Normalize 

In [0]:
for columnName, column in extracted_features_test.iteritems():
    if any(column<0):
        column += abs(min(column))
    extracted_features_test[columnName] = column / column.max()

## Predict

In [0]:
pca = PCA(n_components=16)
pca_extracted_features_test = pca.fit_transform(extracted_features_test)
left_prediction_test = left_classifier_nn.predict(pca_extracted_features_test)
right_prediction_test = right_classifier_nn.predict(pca_extracted_features_test)

In [0]:
true_labels_left_test = pd.DataFrame(pd.Series(left_prediction_test).map(inverted_label_mapping_left))
true_labels_right_test = pd.DataFrame(pd.Series(right_prediction_test).map(inverted_label_mapping_right))

## Parse Predictions For Output

In [0]:
la_output_test = pd.concat([test_time_stamps, true_labels_left_test],axis=1)
ra_output_test = pd.concat([test_time_stamps, true_labels_right_test],axis=1)
pd.concat([la_output_test, ra_output_test],axis=0).to_csv(project_dir+"test/labels.test.csv")

In [0]:
final_list_pred = []
final_list_pred_time = []
before = 0
final_list_pred.append(left_prediction_test[before])
final_list_pred_time.append(list(test_time_stamps.iloc[before]))

for i in range(1, len(left_prediction_test)):
    if left_prediction_test[i] == left_prediction_test[before]:
        final_list_pred_time[len(final_list_pred_time)-1][1] = list(test_time_stamps.iloc[i])[1]
    else:
        final_list_pred.append(left_prediction_test[i])
        final_list_pred_time.append(list(test_time_stamps.iloc[i]))
    before = i

In [0]:
ff = pd.DataFrame(final_list_pred_time,columns=["start_time","end_time"])

In [0]:
ff["action"] = final_list_pred
ff["action"] = ff["action"].map(inverted_label_mapping_left)

In [0]:
final_list_pred = []
final_list_pred_time = []
before = 0
final_list_pred.append(right_prediction_test[before])
final_list_pred_time.append(list(test_time_stamps.iloc[before]))

for i in range(1, len(right_prediction_test)):
    if right_prediction_test[i] == right_prediction_test[before]:
        final_list_pred_time[len(final_list_pred_time)-1][1] = list(test_time_stamps.iloc[i])[1]
    else:
        final_list_pred.append(right_prediction_test[i])
        final_list_pred_time.append(list(test_time_stamps.iloc[i]))
    before = i

In [0]:
ff_right = pd.DataFrame(final_list_pred_time,columns=["start_time","end_time"])

In [0]:
ff_right["action"] = final_list_pred
ff_right["action"] = ff_right["action"].map(inverted_label_mapping_right)

In [0]:
pd.concat([ff, ff_right],axis=0).to_csv(project_dir+"test/labels_test.csv")