In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [3]:
def parse_data(fileDirectory, X, Y):
    wordDict = {"STANDING": 1, "LAYING": 2, "SITTING" : 3, "WALKING": 4, "WALKING_DOWNSTAIRS": 5, "WALKING_UPSTAIRS": 6}
    source = open(fileDirectory, 'r')
    data = source.readlines()
    source.close()
    for i in range(1, len(data)):
        raw = data[i].split(",")
        activity = raw[-1].strip().strip('"')
        translate = wordDict[activity] if activity in wordDict else 0 
        Y.append(translate)
        X.append([float(i) for i in raw[:40]])
        
X_train = []
Y_train = []
X_test = []
Y_test = []

parse_data('train.csv', X_train, Y_train)
parse_data('test.csv', X_test, Y_test)

scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [4]:
seed = 10

mlp = MLPClassifier(hidden_layer_sizes=(40,40,40,),
activation='logistic',
max_iter=10000,
random_state=seed)


In [5]:
mlp.fit(X_train_scaled, Y_train)
y_pred = mlp.predict(X_test_scaled)
print(X_test_scaled[0])
print(y_pred[0])
#print(accuracy_score(Y_test,y_pred))
#print(confusion_matrix(Y_test,y_pred))
#print(classification_report(Y_test, y_pred, target_names = ["STANDING", "LAYING", "SITTING", "WALKING", "WALKING_DOWNSTAIRS", "WALKING_UPSTAIRS"]))

[-2.77708103e-01 -2.10631233e-01  2.19307573e+00 -7.87267479e-01
 -8.33211956e-01 -9.34437521e-02 -8.08091468e-01 -8.39013385e-01
 -1.04178055e-01 -8.24746074e-01 -9.17965536e-01  4.29192243e-01
  5.54091878e-01  7.36703014e-01  6.71474205e-01 -7.24347315e-01
 -7.49278443e-01 -7.48507370e-01 -3.66217690e-01 -8.42191783e-01
 -8.47946656e-01 -4.99493288e-01 -1.67077183e-01 -6.44500089e-01
  1.08212932e+00  2.07960919e+00 -2.46411147e+00  1.98833917e+00
 -6.21090318e-01  1.01385521e+00 -8.58244689e-01 -5.90046873e-02
  7.50840045e-04 -3.84020234e-01  1.16178968e-01 -8.41460913e-01
  5.87292878e-01  5.39608353e-01 -9.14661571e-01 -2.28725132e+00]
2


In [8]:
def parse_data_bin(fileDirectory, X, Y):
    source = open(fileDirectory, 'r')
    data = source.readlines()
    source.close()
    for i in range(1, len(data)):
        raw = data[i].split(",")
        Y.append(1 if raw[-1].strip().strip('"') == "STANDING" else 0)
        X.append(raw[:40])
        
X_train_bin = []
Y_train_bin = []
X_test_bin = []
Y_test_bin = []

parse_data_bin('train.csv', X_train_bin, Y_train_bin)
parse_data_bin('test.csv', X_test_bin, Y_test_bin)

scaler = StandardScaler()
scaler.fit(X_train_bin)
X_train_scaled_bin = scaler.transform(X_train_bin)
X_test_scaled_bin = scaler.fit_transform(X_test_bin)


In [9]:
mlp_bin = MLPClassifier(hidden_layer_sizes=(40,40,40,),
activation='logistic',
max_iter=10000,
random_state=seed)


In [10]:
mlp.fit(X_train_scaled_bin, Y_train_bin)
y_pred_bin = mlp.predict(X_test_scaled_bin)
print(accuracy_score(Y_test_bin,y_pred_bin))
print(confusion_matrix(Y_test_bin,y_pred_bin))
print(classification_report(Y_test_bin, y_pred_bin, target_names = ["STANDING", "OTHERS"]))

0.9043094672548354
[[2314  101]
 [ 181  351]]
              precision    recall  f1-score   support

    STANDING       0.93      0.96      0.94      2415
      OTHERS       0.78      0.66      0.71       532

   micro avg       0.90      0.90      0.90      2947
   macro avg       0.85      0.81      0.83      2947
weighted avg       0.90      0.90      0.90      2947



In [6]:
joblib.dump(mlp, 'mlp.pkl', compress = 3)

['mlp.pkl']