In [1]:
# Import block
import os
import pandas as pd
import numpy as np
import time
from math import sqrt
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error
from pandas.plotting import scatter_matrix
from importlib import reload
from sklearn.feature_selection import VarianceThreshold
from collections import Counter
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

In [2]:
grenade_data_path = '../expected_data_grenade.csv'
shield_data_path = '../expected_data_shield.csv'
reload_data_path = '../expected_data_reload.csv'
logout_data_path = '../expected_data_logout.csv'
noise_data_path = '../expected_data_noise.csv'


grenade_data_raw = pd.read_csv(grenade_data_path, index_col=False)
shield_data_raw = pd.read_csv(shield_data_path, index_col=False)
reload_data_raw = pd.read_csv(reload_data_path, index_col=False)
logout_data_raw = pd.read_csv(logout_data_path, index_col=False)
noise_data_raw = pd.read_csv(noise_data_path, index_col=False)

grenade_data_raw = grenade_data_raw.apply(pd.to_numeric)
grenade_data_raw.reset_index(inplace=True, drop=True)

shield_data_raw = shield_data_raw.apply(pd.to_numeric)
shield_data_raw.reset_index(inplace=True, drop=True)

reload_data_raw = reload_data_raw.apply(pd.to_numeric)
reload_data_raw.reset_index(inplace=True, drop=True)

logout_data_raw = logout_data_raw.apply(pd.to_numeric)
logout_data_raw.reset_index(inplace=True, drop=True)

noise_data_raw = noise_data_raw.drop(1491).drop(1453).apply(pd.to_numeric)
noise_data_raw.reset_index(inplace=True, drop=True)

In [3]:
WINDOW_SIZE = 20
SLIDE_SIZE = 1

def process_df_with_sliding_window(df, window_size, slide_size):
    attributes = ["acc_x", "acc_y", "acc_z", "gyro_x", "gyro_y", "gyro_z"]
    columns_list = []
    # Build column list
    for idx in range(1, 1 + window_size):
        for attribute in attributes:
            columns_list.append(attribute + '_' + str(idx))
            
    df_out = pd.DataFrame(columns=columns_list)

    for row_idx in range(0, len(df) - window_size, slide_size):
        curr_window_data = []
        nan_found = False
        for row_iter_idx in range(window_size):
            curr_row_idx = row_idx + row_iter_idx
            if df.loc[curr_row_idx, :].isnull().values.any():
                nan_found = True
                break
            curr_row_list = df.loc[curr_row_idx, :].values.flatten().tolist()
            curr_window_data += curr_row_list
        
        if nan_found:
            continue

        df_out.loc[len(df_out)] = curr_window_data


    return df_out

grenade_df_processed = process_df_with_sliding_window(grenade_data_raw, WINDOW_SIZE, SLIDE_SIZE)
shield_data_processed = process_df_with_sliding_window(shield_data_raw, WINDOW_SIZE, SLIDE_SIZE)
reload_data_processed = process_df_with_sliding_window(reload_data_raw, WINDOW_SIZE, SLIDE_SIZE)
logout_data_processed = process_df_with_sliding_window(logout_data_raw, WINDOW_SIZE, SLIDE_SIZE)
noise_data_processed = process_df_with_sliding_window(noise_data_raw, WINDOW_SIZE, SLIDE_SIZE)


In [4]:
from sklearn.preprocessing import normalize

combined_data_features = pd.concat([grenade_df_processed, shield_data_processed, reload_data_processed, logout_data_processed, noise_data_processed])
combined_data_features.reset_index(inplace=True, drop=True)
# combined_data_features = combined_data_features.apply(pd.to_numeric)

# for _, i in combined_data_features.iterrows():

#     for c in range(60):
#         if type(i[c]) != float and type(i[c]) != str:
#             print(i[c], type(i[c]))

In [5]:
# Build label
combined_data_labels = pd.DataFrame(columns=['Activity'])

for i in range(len(grenade_df_processed)):
    combined_data_labels.loc[len(combined_data_labels)] = 0
    
for i in range(len(shield_data_processed)):
    combined_data_labels.loc[len(combined_data_labels)] = 1
    
for i in range(len(reload_data_processed)):
    combined_data_labels.loc[len(combined_data_labels)] = 2
    
for i in range(len(logout_data_processed)):
    combined_data_labels.loc[len(combined_data_labels)] = 3
    
for i in range(len(noise_data_processed)):
    combined_data_labels.loc[len(combined_data_labels)] = 4
    
combined_data_labels

Unnamed: 0,Activity
0,0
1,0
2,0
3,0
4,0
...,...
5136,4
5137,4
5138,4
5139,4


In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(combined_data_features, combined_data_labels, test_size=0.1, random_state=42)

X_train

Unnamed: 0,acc_x_1,acc_y_1,acc_z_1,gyro_x_1,gyro_y_1,gyro_z_1,acc_x_2,acc_y_2,acc_z_2,gyro_x_2,...,acc_z_19,gyro_x_19,gyro_y_19,gyro_z_19,acc_x_20,acc_y_20,acc_z_20,gyro_x_20,gyro_y_20,gyro_z_20
2596,74.64,-22.21,-142.04,419.0,245.0,316.0,74.67,-22.34,-142.17,488.0,...,-145.17,707.0,247.0,417.0,75.28,-22.94,-145.35,715.0,241.0,420.0
833,30.71,-38.37,-148.50,-480.0,-817.0,775.0,33.82,-37.43,-149.40,-539.0,...,-121.03,-106.0,-420.0,276.0,68.03,-53.86,-122.82,-28.0,-177.0,22.0
1888,-41.70,7.92,155.48,835.0,215.0,810.0,-45.48,4.69,153.62,60.0,...,47.30,-796.0,-778.0,450.0,-19.32,22.08,51.23,-18.0,-356.0,220.0
1231,-18.89,28.73,157.68,-763.0,-379.0,26.0,-20.84,30.67,146.76,-204.0,...,-8.77,-610.0,-786.0,326.0,23.42,-13.38,-15.83,-217.0,-927.0,366.0
414,36.16,-8.62,-75.97,608.0,905.0,903.0,39.42,-3.22,-90.34,921.0,...,-174.58,-803.0,-808.0,767.0,40.76,-31.78,-177.41,-464.0,-623.0,332.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4426,-6.00,-44.55,154.82,-819.0,-730.0,161.0,16.62,-42.82,-118.69,-653.0,...,150.62,-591.0,-532.0,839.0,-0.78,-48.39,151.20,-471.0,-512.0,959.0
466,70.13,-36.56,-124.26,-191.0,-142.0,131.0,72.00,-34.24,-127.56,-630.0,...,-124.71,-625.0,-285.0,583.0,33.42,-18.62,-136.85,-685.0,-538.0,349.0
3092,1.68,-12.39,-179.16,647.0,119.0,937.0,6.15,11.78,173.69,344.0,...,-114.18,-167.0,-662.0,129.0,52.67,-5.95,-102.66,-341.0,-182.0,194.0
3772,-11.06,27.58,60.21,-382.0,-960.0,250.0,-11.05,25.78,58.59,-947.0,...,-31.60,-103.0,854.0,853.0,66.94,-31.73,-42.13,-15.0,778.0,885.0


In [7]:
def normalize_data(df):
    for col in df.columns:
        print("Maximum obtained: ", df[col].abs().max())
        df[col] = df[col] / df[col].abs().max()
    
    return df

X_train = normalize_data(X_train)
X_test = normalize_data(X_test)

Maximum obtained:  179.25
Maximum obtained:  178.45
Maximum obtained:  179.97
Maximum obtained:  999.0
Maximum obtained:  999.0
Maximum obtained:  999.0
Maximum obtained:  179.25
Maximum obtained:  178.45
Maximum obtained:  179.97
Maximum obtained:  999.0
Maximum obtained:  999.0
Maximum obtained:  999.0
Maximum obtained:  179.25
Maximum obtained:  175.46
Maximum obtained:  179.99
Maximum obtained:  999.0
Maximum obtained:  999.0
Maximum obtained:  999.0
Maximum obtained:  179.25
Maximum obtained:  168.17
Maximum obtained:  179.99
Maximum obtained:  999.0
Maximum obtained:  999.0
Maximum obtained:  999.0
Maximum obtained:  161.97
Maximum obtained:  167.31
Maximum obtained:  179.99
Maximum obtained:  999.0
Maximum obtained:  999.0
Maximum obtained:  998.0
Maximum obtained:  147.98
Maximum obtained:  171.2
Maximum obtained:  179.99
Maximum obtained:  999.0
Maximum obtained:  999.0
Maximum obtained:  998.0
Maximum obtained:  137.78
Maximum obtained:  175.45
Maximum obtained:  179.99
Maxim

In [8]:
X_train.dtypes

acc_x_1      float64
acc_y_1      float64
acc_z_1      float64
gyro_x_1     float64
gyro_y_1     float64
              ...   
acc_y_20     float64
acc_z_20     float64
gyro_x_20    float64
gyro_y_20    float64
gyro_z_20    float64
Length: 120, dtype: object

In [9]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes = (120), max_iter=200)

mlp.fit(X_train, y_train)

print(mlp.score(X_train, y_train))

mlp.score(X_test, y_test)


  y = column_or_1d(y, warn=True)


1.0




0.9339805825242719

In [10]:
for idx in range(len(mlp.coefs_)):
    weights = mlp.coefs_[idx].tolist()
    cpp_file = [
        'double weights', str(idx), '[', str(len(weights)), '][', str(len(weights[0])), '] = {\n'
    ]
    for row in range(len(weights)):
        for col in range(len(weights[0])):
            weights[row][col] = str(round(weights[row][col], 4))
    for index, row_weights in enumerate(weights):
        cpp_file.append('{')
        output_str = ', '.join(row_weights)
        cpp_file.append(output_str)
        append_str = "},\n"
        cpp_file.append(append_str)
    cpp_file.append('};')

    cpp_program_string = ''.join(cpp_file)
    with open('/Users/bryanwong/Documents/classifier/weights' + str(idx) + '.cpp', 'w') as f:
        f.write(cpp_program_string)
        f.close()
        
for idx in range(len(mlp.intercepts_)):
    biases = mlp.intercepts_[idx].tolist()
    cpp_file = [
        'double biases', str(idx), '[1][', str(len(biases)), '] = {'
    ]
    for col in range(len(biases)):
        biases[col] = str(round(biases[col], 4))
    output_str = ', '.join(biases)
    output_str = "{" + output_str + "}};\n"
    cpp_file.append(output_str)

    cpp_program_string = ''.join(cpp_file)
    with open('/Users/bryanwong/Documents/classifier/biases' + str(idx) + '.cpp', 'w') as f:
        f.write(cpp_program_string)
        f.close()