In [1]:
import pandas as pd
import numpy as np
import os
import random
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [2]:
def parse_data(fileDirectory, X, Y, label):
    source = open(fileDirectory, 'r')
    data = source.readlines()
    source.close()
    for i in range(1, len(data)-1):
        raw = data[i][1:len(data[i])-2]
        processed = raw.split(",")
        Y.append(label)
        X.append([float(num) for num in processed])

In [4]:
X_train = []
Y_train = []
X_test = []
Y_test = []
print(len(X_train))

#filePath = {1:'20201027/hair', 2:'20201027/rocket', 3:'20201027/zigzag', 4:'20201027/elbowLock',
#            5:'20201027/pushBack',6:'20201027/scarecrow',7:'20201027/shoulder', 8:'20201027/window', 9:'20201027/logout'}
filePath = {1:'20201111/hair', 2:'20201111/rocket', 3:'20201111/zigzag', 4:'20201111/elbowLock',
            5:'20201111/pushBack',6:'20201111/scarecrow',7:'20201111/shoulder', 8:'20201111/window', 9:'20201111/logout'}


for i in range(1,10):
#for i in (1,2,5):   # hair = 1, rocket = 2, pushBack = 5
    files = [f for f in os.listdir(filePath[i])]
    for file in files:
        print("Reading " + file)
        fullPath = filePath[i] + '/' + file
        parse_data(fullPath, X_train, Y_train, i)
# at this point, X_train and Y_train is filled up
print(len(X_train))

0
Reading claire_hair_d1_102.txt
Reading hair1_RUSDI_50.txt
Reading hair2_RUSDI_50.txt
Reading hair50_1_JN.txt
Reading hair50_2_JN.txt
Reading nic_hair_100.txt
Reading claire_rocket_d1_32.txt
Reading claire_rocket_d2_32.txt
Reading claire_rocket_d3_22.txt
Reading claire_rocket_d4_22.txt
Reading nic_rocket_1_50.txt
Reading nic_rocket_2_50.txt
Reading rocket1_Rusdi_50.txt
Reading rocket2_Rusdi_50.txt
Reading rocket50_1_JN.txt
Reading rocket50_2_JN.txt
Reading claire_zigzag_d1_32.txt
Reading claire_zigzag_d2_32.txt
Reading claire_zigzag_d3_22.txt
Reading claire_zigzag_d4_22.txt
Reading nic_zigzag_100.txt
Reading zigzag1_Rusdi_50.txt
Reading zigzag_100_JN.txt
Reading claire_elbowlock_d1_72.txt
Reading claire_elbowlock_d2_32.txt
Reading elbowLock_50_JN_1.txt
Reading elbowLock_50_JN_2.txt
Reading elbow_RUSDI_50.txt
Reading nic_elbowlock_100.txt
Reading claire_pushback_d1_52.txt
Reading claire_pushback_d2_52.txt
Reading nic_pushback_100.txt
Reading pushback1_50_Rusdi.txt
Reading pushback2_50_

In [5]:
testSize = 300

indexSet = set()
while len(indexSet)<testSize:
    indexSet.add(random.randint(0,len(X_train)-1))
    
indexList = list(indexSet)
indexList.sort(reverse=True)

for index in indexList:
    X_test.append(X_train[index])
    Y_test.append(Y_train[index])
    X_train.pop(index)
    Y_train.pop(index)
    
print("size of X_train is {}".format(len(X_train)))
print("size of Y_train is {}".format(len(Y_train)))
print("size of X_test is {}".format(len(X_test)))
print("size of Y_test is {}".format(len(Y_test)))

size of X_train is 2928
size of Y_train is 2928
size of X_test is 300
size of Y_test is 300


In [6]:
#scaling inputs
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
seed = 10
layer1_size = 128
layer2_size = 256
layer3_size = 128
max_iteration_size = 10000
validation_fraction_size = 0.1
n_iter_no_change_size = 30


mlp = MLPClassifier(hidden_layer_sizes=(layer1_size,layer2_size,layer3_size,),
activation='logistic',
max_iter=max_iteration_size,
random_state=seed,
solver='adam',
shuffle=True,
early_stopping=True,
n_iter_no_change = n_iter_no_change_size,
validation_fraction=validation_fraction_size)

mlp.fit(X_train_scaled, Y_train)

MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=True, epsilon=1e-08,
       hidden_layer_sizes=(128, 256, 128), learning_rate='constant',
       learning_rate_init=0.001, max_iter=10000, momentum=0.9,
       n_iter_no_change=30, nesterovs_momentum=True, power_t=0.5,
       random_state=10, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [8]:
y_pred = mlp.predict(X_test_scaled)
print(accuracy_score(Y_test,y_pred))
print(set(Y_test))
print(set(y_pred))
print(y_pred)
print(Y_test)

0.87
{1, 2, 3, 4, 5, 6, 7, 8, 9}
{1, 2, 3, 4, 5, 6, 7, 8, 9}
[9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 8 9 8 8 8 9 9 9 9 9 9 9 9 9 8 8 8
 8 8 4 8 8 8 8 8 8 8 2 2 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 6 8 7 7 7 7 7
 5 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 3 6 6 6 6 6 6 6 3 6 6 2
 6 6 6 6 6 6 6 6 1 2 6 6 6 6 6 6 6 6 5 5 5 5 5 5 5 2 2 5 5 5 5 5 5 5 2 2 5
 5 5 5 5 5 5 2 5 5 5 5 5 2 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 2 2 2 4 8 4 4 4 8 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 2 2 4 2 2 2 2 2 2 2 2 2 8 2 2 8 8 2 2 3 2 3 2 2 2 2 5 1 2 2 2 2 2 2
 2 2 6 2 1 1 1 1 1 1 1 1 1 1 5 1 1 5 1 1 1 1 1 1 1 1 1 6 1 1 1 1 6 1 1 1 6
 1 1 1 1]
[9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6

In [9]:
joblib.dump(mlp, 'mlp20201111_withoutUmar.pkl', compress = 3)
joblib.dump(scaler, 'scaler1111_withoutUmar.pkl', compress = 3)

['scaler1111_withoutUmar.pkl']