In [1]:
# Import block
import os
import pandas as pd
import numpy as np
import time
from math import sqrt
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error
from pandas.plotting import scatter_matrix
from importlib import reload
from sklearn.feature_selection import VarianceThreshold
from collections import Counter
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

In [2]:
# Initialize pandas dataframe

cols_to_keep = [
    'tBodyAcc-mean()-X',
    'tBodyAcc-mean()-Y',
    'tBodyAcc-mean()-Z',
    'tBodyGyro-mean()-X',
    'tBodyGyro-mean()-Y',
    'tBodyGyro-mean()-Z',
    'Activity',
]

TRAIN_DATASET_PATH = 'https://raw.githubusercontent.com/bryanwhl/laser-tag/main/classifier/Dataset/train.csv?token=GHSAT0AAAAAABXUMFKT2WLN76AEEZN5O2XUZAOTDEQ'
TEST_DATASET_PATH = 'https://raw.githubusercontent.com/bryanwhl/laser-tag/main/classifier/Dataset/test.csv?token=GHSAT0AAAAAABXUMFKT7GDPTBI4TWO5SAIEZAOTDEA'

train_df_raw = pd.read_csv(TRAIN_DATASET_PATH, index_col=False)
test_df_raw = pd.read_csv(TEST_DATASET_PATH, index_col=False)

train_df_raw = train_df_raw[train_df_raw.Activity != 'SITTING']
train_df_raw = train_df_raw[train_df_raw.Activity != 'LAYING']

test_df_raw = test_df_raw[test_df_raw.Activity != 'SITTING']
test_df_raw = test_df_raw[test_df_raw.Activity != 'LAYING']

train_df_raw.reset_index(inplace=True)
test_df_raw.reset_index(inplace=True)

train_df = train_df_raw[cols_to_keep]
test_df = test_df_raw[cols_to_keep]

train_df

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyGyro-mean()-X,tBodyGyro-mean()-Y,tBodyGyro-mean()-Z,Activity
0,0.288585,-0.020294,-0.132905,-0.006101,-0.031365,0.107725,STANDING
1,0.278419,-0.016411,-0.123520,-0.016112,-0.083894,0.100584,STANDING
2,0.279653,-0.019467,-0.113462,-0.031698,-0.102335,0.096127,STANDING
3,0.279174,-0.026201,-0.123283,-0.043410,-0.091386,0.085538,STANDING
4,0.276629,-0.016570,-0.115362,-0.033960,-0.074708,0.077392,STANDING
...,...,...,...,...,...,...,...
4654,0.299665,-0.057193,-0.181233,-0.035024,-0.093011,0.124412,WALKING_UPSTAIRS
4655,0.273853,-0.007749,-0.147468,0.118696,-0.095746,0.033277,WALKING_UPSTAIRS
4656,0.273387,-0.017011,-0.045022,-0.213192,0.039321,0.197982,WALKING_UPSTAIRS
4657,0.289654,-0.018843,-0.158281,-0.406205,0.068797,0.177467,WALKING_UPSTAIRS


In [3]:
# Split dataframe into features and labels
from sklearn.preprocessing import LabelBinarizer

def split_df(df):
  features = df.drop('Activity', axis=1)
  labels = df['Activity']
  return features, labels

train_features_df, train_labels_df = split_df(train_df)
test_features_df, test_labels_df = split_df(train_df)

train_features_df

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyGyro-mean()-X,tBodyGyro-mean()-Y,tBodyGyro-mean()-Z
0,0.288585,-0.020294,-0.132905,-0.006101,-0.031365,0.107725
1,0.278419,-0.016411,-0.123520,-0.016112,-0.083894,0.100584
2,0.279653,-0.019467,-0.113462,-0.031698,-0.102335,0.096127
3,0.279174,-0.026201,-0.123283,-0.043410,-0.091386,0.085538
4,0.276629,-0.016570,-0.115362,-0.033960,-0.074708,0.077392
...,...,...,...,...,...,...
4654,0.299665,-0.057193,-0.181233,-0.035024,-0.093011,0.124412
4655,0.273853,-0.007749,-0.147468,0.118696,-0.095746,0.033277
4656,0.273387,-0.017011,-0.045022,-0.213192,0.039321,0.197982
4657,0.289654,-0.018843,-0.158281,-0.406205,0.068797,0.177467


In [4]:
# Implement sliding window

WINDOW_SIZE = 20
SLIDE_SIZE = 1

def process_df_with_sliding_window(df, window_size, slide_size):
    attributes = ["acc_x", "acc_y", "acc_z", "gyro_x", "gyro_y", "gyro_z"]
    columns_list = []
    # Build column list
    for idx in range(1, 1 + window_size):
        for attribute in attributes:
            columns_list.append(attribute + '_' + str(idx))
            
    df_out = pd.DataFrame(columns=columns_list)

    for row_idx in range(0, len(df) - window_size, slide_size):
        curr_window_data = []
        for row_iter_idx in range(window_size):
            curr_row_idx = row_idx + row_iter_idx
            curr_row_list = df.loc[curr_row_idx, :].values.flatten().tolist()
            curr_window_data += curr_row_list

        df_out.loc[len(df_out)] = curr_window_data

    return df_out

processed_train_df = process_df_with_sliding_window(train_features_df, WINDOW_SIZE, SLIDE_SIZE)
processed_test_df = process_df_with_sliding_window(test_features_df, WINDOW_SIZE, SLIDE_SIZE)

processed_train_df

Unnamed: 0,acc_x_1,acc_y_1,acc_z_1,gyro_x_1,gyro_y_1,gyro_z_1,acc_x_2,acc_y_2,acc_z_2,gyro_x_2,...,acc_z_19,gyro_x_19,gyro_y_19,gyro_z_19,acc_x_20,acc_y_20,acc_z_20,gyro_x_20,gyro_y_20,gyro_z_20
0,0.288585,-0.020294,-0.132905,-0.006101,-0.031365,0.107725,0.278419,-0.016411,-0.123520,-0.016112,...,-0.109188,-0.024642,-0.087791,0.107822,0.275568,-0.016980,-0.111429,-0.029462,-0.084377,0.100748
1,0.278419,-0.016411,-0.123520,-0.016112,-0.083894,0.100584,0.279653,-0.019467,-0.113462,-0.031698,...,-0.111429,-0.029462,-0.084377,0.100748,0.277562,-0.014318,-0.107877,-0.029815,-0.078706,0.093773
2,0.279653,-0.019467,-0.113462,-0.031698,-0.102335,0.096127,0.279174,-0.026201,-0.123283,-0.043410,...,-0.107877,-0.029815,-0.078706,0.093773,0.277152,-0.017983,-0.106601,-0.029179,-0.079554,0.094585
3,0.279174,-0.026201,-0.123283,-0.043410,-0.091386,0.085538,0.276629,-0.016570,-0.115362,-0.033960,...,-0.106601,-0.029179,-0.079554,0.094585,0.275676,-0.021264,-0.110801,-0.028154,-0.076119,0.088999
4,0.276629,-0.016570,-0.115362,-0.033960,-0.074708,0.077392,0.277199,-0.010098,-0.105137,-0.028776,...,-0.110801,-0.028154,-0.076119,0.088999,0.279200,-0.017714,-0.109161,-0.027221,-0.069195,0.079055
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4634,0.194867,0.005240,-0.148642,-0.053195,0.036804,0.112493,0.193572,0.012456,-0.173717,0.041092,...,-0.220567,0.422435,-0.159161,-0.107474,0.237966,-0.001088,-0.148326,0.184816,0.003176,0.067381
4635,0.193572,0.012456,-0.173717,0.041092,-0.008134,0.015860,0.354660,-0.010554,-0.196504,-0.285294,...,-0.148326,0.184816,0.003176,0.067381,0.299665,-0.057193,-0.181233,-0.035024,-0.093011,0.124412
4636,0.354660,-0.010554,-0.196504,-0.285294,0.025760,0.101464,0.389674,-0.015067,-0.166583,-0.359741,...,-0.181233,-0.035024,-0.093011,0.124412,0.273853,-0.007749,-0.147468,0.118696,-0.095746,0.033277
4637,0.389674,-0.015067,-0.166583,-0.359741,-0.090017,0.134230,0.306127,-0.025759,-0.103981,-0.110558,...,-0.147468,0.118696,-0.095746,0.033277,0.273387,-0.017011,-0.045022,-0.213192,0.039321,0.197982


In [5]:
def process_label_with_sliding_window(labels, window_size, slide_size):
    df_out = pd.DataFrame(columns=["classification"])
    for row_idx in range(0, len(labels) - window_size, slide_size):
        curr_window_labels = labels.iloc[[row_idx, row_idx + 10]]
        df_out.loc[len(df_out)] = curr_window_labels.mode()[0]

    return df_out

processed_train_labels = process_label_with_sliding_window(train_labels_df, WINDOW_SIZE, SLIDE_SIZE)
processed_test_labels = process_label_with_sliding_window(test_labels_df, WINDOW_SIZE, SLIDE_SIZE)


lb = LabelBinarizer()
processed_train_labels = lb.fit_transform(processed_train_labels)
processed_test_labels = lb.transform(processed_test_labels)

processed_test_labels

array([[1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       ...,
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1]])

In [6]:
# Normalize and pre-process data

from sklearn.preprocessing import normalize

def normalize_data(df):
    for col in df.columns:
        df[col] = df[col] / df[col].abs().max()
    
    return df
    
processed_train_df = normalize_data(processed_train_df)
processed_test_df = normalize_data(processed_test_df)

In [7]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes = (120,60,4), max_iter=2000)

mlp.fit(processed_train_df, processed_train_labels)

mlp.score(processed_test_df, processed_test_labels)



0.9980599267083423

In [8]:
print(len(mlp.coefs_), len(mlp.coefs_[0]), len(mlp.coefs_[1][0]), len(mlp.coefs_[2][0]), len(mlp.coefs_[3][0]), len(mlp.coefs_[0][0]))
print("==================")
print(len(mlp.intercepts_), len(mlp.intercepts_[0]), len(mlp.intercepts_[1]), len(mlp.intercepts_[2]), len(mlp.intercepts_[3]))
mlp.intercepts_

4 120 60 4 4 120
4 120 60 4 4


[array([-0.09125151,  0.13895494,  0.05200441, -0.14325097, -0.14370696,
         0.14903955,  0.17754116, -0.17546544,  0.14974004,  0.132261  ,
        -0.06744133, -0.04295817, -0.18114541, -0.018153  ,  0.04252142,
        -0.03214331,  0.04324781, -0.05570098, -0.08378657, -0.0946748 ,
         0.0764524 , -0.04587466,  0.05741324,  0.08100831, -0.0557328 ,
        -0.04364701,  0.0251144 , -0.023986  , -0.12469433, -0.00318873,
        -0.07525506,  0.12550266, -0.02752787, -0.13720862,  0.16290319,
         0.03330492, -0.10221078, -0.00968407,  0.20386308, -0.05148764,
         0.18035317, -0.07001556, -0.14629973,  0.06076475, -0.05969232,
        -0.01398784,  0.07427498,  0.1271618 ,  0.19607745, -0.10721258,
         0.09422138,  0.10674731,  0.02242971,  0.00630731, -0.0841694 ,
         0.10656165,  0.07940413,  0.13423481, -0.10184904, -0.13630864,
         0.03236531, -0.09051396, -0.13906376, -0.064864  , -0.15166114,
        -0.11165623, -0.12793001,  0.01902092,  0.0

In [9]:
for idx in range(len(mlp.coefs_)):
    weights = mlp.coefs_[idx].tolist()
    cpp_file = [
        'double weights', str(idx), '[', str(len(weights)), '][', str(len(weights[0])), '] = {\n'
    ]
    for row in range(len(weights)):
        for col in range(len(weights[0])):
            weights[row][col] = str(round(weights[row][col], 4))
    for index, row_weights in enumerate(weights):
        cpp_file.append('{')
        output_str = ', '.join(row_weights)
        cpp_file.append(output_str)
        append_str = "},\n"
        cpp_file.append(append_str)
    cpp_file.append('};')

    cpp_program_string = ''.join(cpp_file)
    with open('/Users/bryanwong/Documents/classifier/weights' + str(idx) + '.cpp', 'w') as f:
        f.write(cpp_program_string)
        f.close()
#     h_file = [
#         '#ifndef WEIGHTS', str(idx), '_H\n', 
#         '#define WEIGHTS', str(idx), '_H\n',
#         '#include <vector>\n\n',
#         'using namespace std;\n\n',
#         'vector<vector<double> > generateWeights', str(idx), '();\n\n',
#         '#endif',
#     ]
#     h_program_string = ''.join(h_file)
#     with open('/Users/bryanwong/Documents/classifier/weights' + str(idx) + '.h', 'w') as f:
#         f.write(h_program_string)
#         f.close()

In [10]:
for idx in range(len(mlp.intercepts_)):
    biases = mlp.intercepts_[idx].tolist()
    cpp_file = [
        'double biases', str(idx), '[1][', str(len(biases)), '] = {'
    ]
    for col in range(len(biases)):
        biases[col] = str(round(biases[col], 4))
    output_str = ', '.join(biases)
    output_str = "{" + output_str + "}};\n"
    cpp_file.append(output_str)

    cpp_program_string = ''.join(cpp_file)
    with open('/Users/bryanwong/Documents/classifier/biases' + str(idx) + '.cpp', 'w') as f:
        f.write(cpp_program_string)
        f.close()

In [11]:
processed_test_df

Unnamed: 0,acc_x_1,acc_y_1,acc_z_1,gyro_x_1,gyro_y_1,gyro_z_1,acc_x_2,acc_y_2,acc_z_2,gyro_x_2,...,acc_z_19,gyro_x_19,gyro_y_19,gyro_z_19,acc_x_20,acc_y_20,acc_z_20,gyro_x_20,gyro_y_20,gyro_z_20
0,0.456975,-0.095382,-0.260780,-0.006675,-0.040719,0.114925,0.440878,-0.077129,-0.242365,-0.017627,...,-0.214243,-0.026960,-0.113974,0.115028,0.436364,-0.079804,-0.218641,-0.032232,-0.109542,0.107481
1,0.440878,-0.077129,-0.242365,-0.017627,-0.108915,0.107307,0.442832,-0.091495,-0.222629,-0.034679,...,-0.218641,-0.032232,-0.109542,0.107481,0.439521,-0.067296,-0.211671,-0.032619,-0.102180,0.100041
2,0.442832,-0.091495,-0.222629,-0.034679,-0.132856,0.102552,0.442074,-0.123142,-0.241899,-0.047492,...,-0.211671,-0.032619,-0.102180,0.100041,0.438872,-0.084521,-0.209168,-0.031923,-0.103281,0.100907
3,0.442074,-0.123142,-0.241899,-0.047492,-0.118642,0.091255,0.438043,-0.077877,-0.226357,-0.037154,...,-0.209168,-0.031923,-0.103281,0.100907,0.436535,-0.099941,-0.217409,-0.030801,-0.098821,0.094947
4,0.438043,-0.077877,-0.226357,-0.037154,-0.096989,0.082565,0.438946,-0.047460,-0.206295,-0.031481,...,-0.217409,-0.030801,-0.098821,0.094947,0.442115,-0.083257,-0.214191,-0.029781,-0.089832,0.084339
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4634,0.308572,0.024629,-0.291657,-0.058197,0.047781,0.120012,0.306522,0.058544,-0.340859,0.044956,...,-0.432786,0.462157,-0.206630,-0.114657,0.376821,-0.005113,-0.291038,0.202195,0.004123,0.071884
4635,0.306522,0.058544,-0.340859,0.044956,-0.010560,0.016921,0.561606,-0.049602,-0.385570,-0.312120,...,-0.291038,0.202195,0.004123,0.071884,0.474522,-0.268807,-0.355606,-0.038317,-0.120751,0.132727
4636,0.561606,-0.049602,-0.385570,-0.312120,0.033443,0.108246,0.617051,-0.070813,-0.326860,-0.393569,...,-0.355606,-0.038317,-0.120751,0.132727,0.433647,-0.036422,-0.289355,0.129857,-0.124302,0.035501
4637,0.617051,-0.070813,-0.326860,-0.393569,-0.116864,0.143201,0.484754,-0.121067,-0.204026,-0.120954,...,-0.289355,0.129857,-0.124302,0.035501,0.432911,-0.079949,-0.088340,-0.233239,0.051048,0.211214


In [27]:
input_param = processed_test_df.iloc[0].tolist()
for row in range(len(input_param)):
    input_param[row] = str(round(input_param[row], 2))
output_str = ', '.join(input_param)
output_str = "test_case = [" + output_str + "]"
output_str

'test_case = [0.46, -0.1, -0.26, -0.01, -0.04, 0.11, 0.44, -0.08, -0.24, -0.02, -0.11, 0.11, 0.44, -0.09, -0.22, -0.03, -0.13, 0.1, 0.44, -0.12, -0.24, -0.05, -0.12, 0.09, 0.44, -0.08, -0.23, -0.04, -0.1, 0.08, 0.44, -0.05, -0.21, -0.03, -0.09, 0.08, 0.44, -0.09, -0.22, -0.03, -0.11, 0.1, 0.44, -0.14, -0.25, -0.03, -0.1, 0.1, 0.44, -0.1, -0.24, -0.03, -0.09, 0.08, 0.44, -0.05, -0.21, -0.03, -0.09, 0.08, 0.44, -0.06, -0.2, -0.03, -0.09, 0.09, 0.44, -0.1, -0.21, -0.03, -0.1, 0.1, 0.44, -0.1, -0.22, -0.03, -0.1, 0.09, 0.44, -0.07, -0.21, -0.03, -0.09, 0.09, 0.47, 0.13, -0.12, -0.01, 0.2, -0.19, 0.44, -0.11, -0.24, -0.02, 0.04, -0.03, 0.44, -0.07, -0.23, -0.01, -0.06, 0.07, 0.44, -0.07, -0.21, -0.01, -0.1, 0.1, 0.44, -0.09, -0.21, -0.03, -0.11, 0.12, 0.44, -0.08, -0.22, -0.03, -0.11, 0.11]'

In [28]:
print(processed_test_labels[0])
mlp.predict_proba([processed_test_df.iloc[0]])

[1 0 0 0]




array([[9.99975601e-01, 7.42391907e-05, 6.54797354e-08, 1.49916856e-15]])

In [14]:
# cpp_file = [
#     '//#include "test',
#     str(idx),
#     '.h"\n',
#     '#include <vector>\n\n',
#     'using namespace std;\n\n',
#     'vector<vector<double> > generateTest',
#     str(idx),
#     '() {\n',
#     '  vector<vector<double> > test',
#     str(idx),
#     ';\n',
#     "  vector<double> rowTest;\n"
# ]
# weights = processed_test_df.iloc[idx].tolist()
# for row in range(len(weights)):
#     for col in range(len(weights[0])):
#         weights[row][col] = str(weights[row][col])
# for index, rowTest in enumerate(weights):
#     output_str = ', '.join(row_weights)
#     output_str = "  rowTest = {" + output_str + "};\n"
#     cpp_file.append(output_str)
#     append_str = "  weights" + str(idx) + ".push_back(rowTest);\n"
#     cpp_file.append(append_str)
# cpp_file.append('  return weights' + str(idx) + ';\n}')

# cpp_program_string = ''.join(cpp_file)
# with open('/Users/bryanwong/Documents/classifier/weights' + str(idx) + '.cpp', 'w') as f:
#     f.write(cpp_program_string)
#     f.close()
# h_file = [
#     '#ifndef WEIGHTS', str(idx), '_H\n', 
#     '#define WEIGHTS', str(idx), '_H\n',
#     '#include <vector>\n\n',
#     'using namespace std;\n\n',
#     'vector<vector<double> > generateWeights', str(idx), '();\n\n',
#     '#endif',
# ]
# h_program_string = ''.join(h_file)
# with open('/Users/bryanwong/Documents/classifier/weights' + str(idx) + '.h', 'w') as f:
#     f.write(h_program_string)
#     f.close()

In [15]:
# processed_test_labels[200]

In [16]:
# lb.classes_

In [17]:
# import matplotlib.pyplot as plt

# relevant_cols = [
#     'tBodyAcc-mean()-X',
#     'tBodyAcc-mean()-Y',
#     'tBodyAcc-mean()-Z',
#     'tBodyGyro-mean()-X',
#     'tBodyGyro-mean()-Y',
#     'tBodyGyro-mean()-Z',
#     'subject',
#     'Activity',
# ]

# cols_to_show = [
#     'tBodyAcc-mean()-X',
#     'tBodyAcc-mean()-Y',
#     'tBodyAcc-mean()-Z',
#     'tBodyGyro-mean()-X',
#     'tBodyGyro-mean()-Y',
#     'tBodyGyro-mean()-Z',
# ]
# subject_1_walking_dataframe = train_df_raw[(train_df_raw["subject"] == 1)]
# activities = subject_1_walking_dataframe["Activity"].unique()

# subject_1_walking_dataframe = subject_1_walking_dataframe[relevant_cols]
# # print(subject_1_walking_dataframe)
# # for i in range(len(subject_1_walking_dataframe)):
# #     subject_1_walking_dataframe[i][0] = i

# for activity in activities:
#     print("Plot for activity: ", activity)
#     for col in cols_to_show:
#         result_df = subject_1_walking_dataframe[(subject_1_walking_dataframe["subject"] == 1) & (subject_1_walking_dataframe["Activity"] == activity)]
#         result_df.reset_index(inplace=True)
#         plt.plot(result_df[col], label=col)
#     plt.legend()
#     plt.show()
#     print("====================================================")


In [18]:
# from sklearn.neural_network import MLPClassifier

# mlp = MLPClassifier(hidden_layer_sizes = (10,60,4), max_iter=2000)

# mlp.fit(processed_train_df, processed_train_labels)

# mlp.score(processed_test_df, processed_test_labels)




In [19]:
# print(len(mlp.coefs_), len(mlp.coefs_[0]), len(mlp.coefs_[0][0]), len(mlp.coefs_[1]), len(mlp.coefs_[1][0]), len(mlp.coefs_[2]), len(mlp.coefs_[2][0]), len(mlp.coefs_[3]), len(mlp.coefs_[3][0]))