In [1]:
# Import block
import os
import pandas as pd
import numpy as np
import time
from math import sqrt
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error
from pandas.plotting import scatter_matrix
from importlib import reload
from sklearn.feature_selection import VarianceThreshold
from collections import Counter
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

In [2]:
grenade_data_path = '../grenade.csv'
shield_data_path = '../shield.csv'
reload_data_path = '../reload.csv'
logout_data_path = '../logout.csv'
# noise_data_path = '../expected_data_noise.csv'


grenade_data_raw = pd.read_csv(grenade_data_path, index_col=False, header=None)
shield_data_raw = pd.read_csv(shield_data_path, index_col=False, header=None)
reload_data_raw = pd.read_csv(reload_data_path, index_col=False, header=None)
logout_data_raw = pd.read_csv(logout_data_path, index_col=False, header=None)
# noise_data_raw = pd.read_csv(noise_data_path, index_col=False)

grenade_data_raw = grenade_data_raw.apply(pd.to_numeric)
grenade_data_raw.reset_index(inplace=True, drop=True)

shield_data_raw = shield_data_raw.apply(pd.to_numeric)
shield_data_raw.reset_index(inplace=True, drop=True)

reload_data_raw = reload_data_raw.apply(pd.to_numeric)
reload_data_raw.reset_index(inplace=True, drop=True)

logout_data_raw = logout_data_raw.apply(pd.to_numeric)
logout_data_raw.reset_index(inplace=True, drop=True)

# noise_data_raw = noise_data_raw.drop(1491).drop(1453).apply(pd.to_numeric)
# noise_data_raw.reset_index(inplace=True, drop=True)
logout_data_raw

Unnamed: 0,0,1,2,3,4,5
0,19.00,-22.20,-63.10,-518.0,-169.0,-123.0
1,16.78,-16.51,-60.09,-425.0,-153.0,-72.0
2,14.86,-10.17,-57.48,-327.0,-146.0,-5.0
3,13.21,-3.30,-55.33,-210.0,151.0,73.0
4,11.78,-3.96,-53.70,-66.0,162.0,170.0
...,...,...,...,...,...,...
4055,56.52,-41.33,-152.88,223.0,57.0,156.0
4056,56.25,-38.36,-154.73,213.0,53.0,132.0
4057,55.60,-35.70,-157.11,32.0,86.0,131.0
4058,53.77,-30.89,-160.54,259.0,53.0,135.0


In [3]:
reload_data_raw

Unnamed: 0,0,1,2,3,4,5
0,43.24,9.50,3.51,663.0,263.0,803.0
1,42.60,11.33,0.24,615.0,245.0,840.0
2,-41.83,-13.21,2.99,558.0,220.0,875.0
3,-40.94,-15.14,6.17,490.0,194.0,908.0
4,-39.93,-17.04,9.29,408.0,171.0,938.0
...,...,...,...,...,...,...
6445,16.65,-10.07,-2.40,185.0,298.0,865.0
6446,11.29,-4.52,-5.41,-30.0,360.0,837.0
6447,3.98,-19.03,-8.57,-360.0,371.0,865.0
6448,-11.00,-26.62,9.55,-482.0,346.0,919.0


In [4]:
ACTION_DATA_SIZE = 30
WINDOW_SIZE = 20
SLIDE_SIZE = 4

def process_df_with_sliding_window(df, action_data_size, window_size, slide_size):
    attributes = ["acc_x", "acc_y", "acc_z", "gyro_x", "gyro_y", "gyro_z"]
    columns_list = []
    # Build column list
    for idx in range(1, 1 + window_size):
        for attribute in attributes:
            columns_list.append(attribute + '_' + str(idx))
            
    df_out = pd.DataFrame(columns=columns_list)
    
    for datapoint_idx in range(0, len(df), action_data_size):
        for row_idx in range(0, action_data_size - window_size, slide_size):
            curr_window_data = []
            # Bound check
            if datapoint_idx + row_idx + window_size - 1 >= len(df):
                break
            for row_iter_idx in range(window_size):
                curr_row_idx = datapoint_idx + row_idx + row_iter_idx
                print
                if curr_row_idx >= len(df):
                    break
                curr_row_list = df.loc[curr_row_idx, :].values.flatten().tolist()
                curr_window_data += curr_row_list


            df_out.loc[len(df_out)] = curr_window_data


    return df_out

grenade_df_processed = process_df_with_sliding_window(grenade_data_raw, ACTION_DATA_SIZE, WINDOW_SIZE, SLIDE_SIZE)
shield_data_processed = process_df_with_sliding_window(shield_data_raw, ACTION_DATA_SIZE, WINDOW_SIZE, SLIDE_SIZE)
reload_data_processed = process_df_with_sliding_window(reload_data_raw, ACTION_DATA_SIZE, WINDOW_SIZE, SLIDE_SIZE)
logout_data_processed = process_df_with_sliding_window(logout_data_raw, ACTION_DATA_SIZE, WINDOW_SIZE, SLIDE_SIZE)
# noise_data_processed = process_df_with_sliding_window(noise_data_raw, WINDOW_SIZE, SLIDE_SIZE)
reload_data_processed


Unnamed: 0,acc_x_1,acc_y_1,acc_z_1,gyro_x_1,gyro_y_1,gyro_z_1,acc_x_2,acc_y_2,acc_z_2,gyro_x_2,...,acc_z_19,gyro_x_19,gyro_y_19,gyro_z_19,acc_x_20,acc_y_20,acc_z_20,gyro_x_20,gyro_y_20,gyro_z_20
0,43.24,9.50,3.51,663.0,263.0,803.0,42.60,11.33,0.24,615.0,...,15.02,349.0,4.0,988.0,-18.20,31.97,15.83,-321.0,-3.0,978.0
1,-39.93,-17.04,9.29,408.0,171.0,938.0,-38.83,-18.86,12.31,314.0,...,1.75,651.0,207.0,925.0,3.14,-29.34,-3.38,496.0,265.0,925.0
2,-35.06,-23.42,20.57,13.0,135.0,994.0,-33.65,-24.58,23.04,-77.0,...,-10.91,127.0,257.0,811.0,11.22,-13.94,-10.25,325.0,96.0,847.0
3,-13.32,-19.18,4.43,505.0,814.0,885.0,-11.59,-20.26,7.90,421.0,...,16.71,24.0,218.0,888.0,-15.33,18.79,19.28,141.0,170.0,901.0
4,-6.40,-22.52,17.86,123.0,714.0,886.0,-4.68,-22.91,20.95,27.0,...,-10.27,778.0,302.0,851.0,8.12,-11.33,-4.52,540.0,411.0,840.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
640,46.80,3.68,6.12,73.0,36.0,78.0,44.49,2.53,6.87,29.0,...,47.22,143.0,961.0,129.0,9.90,-56.99,-19.76,447.0,840.0,288.0
641,12.91,-7.70,-48.23,992.0,242.0,847.0,15.39,-8.66,-52.35,874.0,...,-2.63,96.0,227.0,258.0,37.97,-37.92,-4.75,94.0,568.0,686.0
642,21.33,-10.20,-102.65,-695.0,319.0,897.0,19.69,-9.44,-98.78,-634.0,...,-12.89,-329.0,865.0,848.0,41.76,-22.30,-8.02,461.0,333.0,34.0
643,14.46,-5.57,-85.72,-389.0,388.0,858.0,12.64,-3.50,-80.96,-265.0,...,-4.88,69.0,235.0,979.0,23.69,-24.09,-2.89,82.0,216.0,953.0


In [5]:
from sklearn.preprocessing import normalize

# combined_data_features = pd.concat([grenade_df_processed, shield_data_processed, reload_data_processed, logout_data_processed, noise_data_processed])
combined_data_features = pd.concat([grenade_df_processed, shield_data_processed, reload_data_processed, logout_data_processed])
combined_data_features.reset_index(inplace=True, drop=True)
# combined_data_features = combined_data_features.apply(pd.to_numeric)

# for _, i in combined_data_features.iterrows():

#     for c in range(60):
#         if type(i[c]) != float and type(i[c]) != str:
#             print(i[c], type(i[c]))

In [6]:
# Build label
combined_data_labels = pd.DataFrame(columns=['Activity'])

for i in range(len(grenade_df_processed)):
    combined_data_labels.loc[len(combined_data_labels)] = 0
    
for i in range(len(shield_data_processed)):
    combined_data_labels.loc[len(combined_data_labels)] = 1
    
for i in range(len(reload_data_processed)):
    combined_data_labels.loc[len(combined_data_labels)] = 2
    
for i in range(len(logout_data_processed)):
    combined_data_labels.loc[len(combined_data_labels)] = 3
    
# for i in range(len(noise_data_processed)):
#     combined_data_labels.loc[len(combined_data_labels)] = 4
    
combined_data_labels

Unnamed: 0,Activity
0,0
1,0
2,0
3,0
4,0
...,...
1999,3
2000,3
2001,3
2002,3


In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(combined_data_features, combined_data_labels, test_size=0.1, random_state=42)

X_train

Unnamed: 0,acc_x_1,acc_y_1,acc_z_1,gyro_x_1,gyro_y_1,gyro_z_1,acc_x_2,acc_y_2,acc_z_2,gyro_x_2,...,acc_z_19,gyro_x_19,gyro_y_19,gyro_z_19,acc_x_20,acc_y_20,acc_z_20,gyro_x_20,gyro_y_20,gyro_z_20
694,4.27,-39.89,-95.75,-154.0,943.0,325.0,7.79,-39.20,-98.75,-132.0,...,106.02,-310.0,908.0,578.0,1.55,-46.39,-108.15,-319.0,27.0,553.0
650,59.49,-39.68,-113.81,-17.0,-878.0,357.0,57.92,-36.43,-112.26,-31.0,...,-131.61,-152.0,-649.0,179.0,75.50,-47.56,-125.61,-197.0,-790.0,864.0
1110,-29.59,3.30,151.59,48.0,444.0,783.0,-29.57,3.75,151.51,87.0,...,86.82,194.0,467.0,877.0,-10.22,29.61,94.32,23.0,327.0,965.0
383,31.93,-30.44,-101.03,-380.0,-232.0,500.0,35.52,-32.54,-97.81,-636.0,...,-102.92,-380.0,-264.0,482.0,32.41,-27.47,-102.94,-432.0,-234.0,450.0
173,70.55,-68.52,-113.73,212.0,-305.0,-347.0,82.09,-66.62,-114.72,486.0,...,-130.77,-622.0,212.0,643.0,58.11,5.94,161.34,-664.0,780.0,680.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1130,-27.98,0.60,168.86,151.0,535.0,886.0,24.48,7.43,155.33,215.0,...,-129.88,-180.0,494.0,829.0,0.26,-1.77,-133.74,-64.0,451.0,818.0
1294,-35.72,17.09,8.33,636.0,402.0,783.0,-35.50,16.00,6.70,734.0,...,-22.74,299.0,569.0,7.0,7.14,-33.87,-24.94,29.0,650.0,963.0
860,36.12,-35.85,-43.50,-542.0,-237.0,932.0,37.03,-32.51,-43.80,-443.0,...,-10.11,-338.0,-196.0,787.0,37.15,-38.39,-11.79,-475.0,-234.0,748.0
1459,12.00,-8.40,-72.60,77.0,964.0,845.0,13.89,-8.63,-76.09,18.0,...,-38.62,257.0,459.0,121.0,33.34,-32.71,-38.34,1.0,416.0,50.0


In [8]:
def normalize_data(df):
    for col in df.columns:
        print("Maximum obtained: ", df[col].abs().max())
        df[col] = df[col] / df[col].abs().max()
    
    return df

X_train = normalize_data(X_train)
X_test = normalize_data(X_test)

Maximum obtained:  175.5
Maximum obtained:  169.64
Maximum obtained:  179.98
Maximum obtained:  999.0
Maximum obtained:  995.0
Maximum obtained:  997.0
Maximum obtained:  166.73
Maximum obtained:  176.48
Maximum obtained:  179.99
Maximum obtained:  998.0
Maximum obtained:  999.0
Maximum obtained:  998.0
Maximum obtained:  163.33
Maximum obtained:  173.47
Maximum obtained:  179.99
Maximum obtained:  999.0
Maximum obtained:  999.0
Maximum obtained:  999.0
Maximum obtained:  177.68
Maximum obtained:  167.47
Maximum obtained:  179.98
Maximum obtained:  999.0
Maximum obtained:  998.0
Maximum obtained:  994.0
Maximum obtained:  175.5
Maximum obtained:  174.38
Maximum obtained:  179.98
Maximum obtained:  999.0
Maximum obtained:  999.0
Maximum obtained:  997.0
Maximum obtained:  166.73
Maximum obtained:  176.48
Maximum obtained:  179.99
Maximum obtained:  998.0
Maximum obtained:  999.0
Maximum obtained:  998.0
Maximum obtained:  163.33
Maximum obtained:  173.47
Maximum obtained:  179.99
Maximu

In [9]:
X_train.dtypes

acc_x_1      float64
acc_y_1      float64
acc_z_1      float64
gyro_x_1     float64
gyro_y_1     float64
              ...   
acc_y_20     float64
acc_z_20     float64
gyro_x_20    float64
gyro_y_20    float64
gyro_z_20    float64
Length: 120, dtype: object

In [10]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes = (120), max_iter=180)

mlp.fit(X_train, y_train)

print(mlp.score(X_train, y_train))

mlp.score(X_test, y_test)


  y = column_or_1d(y, warn=True)


1.0




0.9552238805970149

In [11]:
for idx in range(len(mlp.coefs_)):
    weights = mlp.coefs_[idx].tolist()
    cpp_file = [
        'double weights', str(idx), '[', str(len(weights)), '][', str(len(weights[0])), '] = {\n'
    ]
    for row in range(len(weights)):
        for col in range(len(weights[0])):
            weights[row][col] = str(round(weights[row][col], 4))
    for index, row_weights in enumerate(weights):
        cpp_file.append('{')
        output_str = ', '.join(row_weights)
        cpp_file.append(output_str)
        append_str = "},\n"
        cpp_file.append(append_str)
    cpp_file.append('};')

    cpp_program_string = ''.join(cpp_file)
    with open('/Users/bryanwong/Documents/classifier/weights' + str(idx) + '.cpp', 'w') as f:
        f.write(cpp_program_string)
        f.close()
        
for idx in range(len(mlp.intercepts_)):
    biases = mlp.intercepts_[idx].tolist()
    cpp_file = [
        'double biases', str(idx), '[1][', str(len(biases)), '] = {'
    ]
    for col in range(len(biases)):
        biases[col] = str(round(biases[col], 4))
    output_str = ', '.join(biases)
    output_str = "{" + output_str + "}};\n"
    cpp_file.append(output_str)

    cpp_program_string = ''.join(cpp_file)
    with open('/Users/bryanwong/Documents/classifier/biases' + str(idx) + '.cpp', 'w') as f:
        f.write(cpp_program_string)
        f.close()

In [12]:
input_param = X_train.iloc[1000].tolist()
for row in range(len(input_param)):
    input_param[row] = str(round(input_param[row], 2))
output_str = ', '.join(input_param)
output_str = "test_case = [" + output_str + "]"
print(y_train.iloc[1000])
output_str


Activity    1
Name: 826, dtype: int64


'test_case = [0.14, -0.19, -0.21, 0.07, 0.98, 0.89, 0.19, -0.18, -0.22, 0.06, 0.08, 0.84, 0.22, -0.19, -0.23, 0.06, 0.18, 0.78, 0.22, -0.19, -0.23, 0.05, 0.26, 0.74, 0.25, -0.18, -0.24, 0.05, 0.33, 0.72, 0.34, -0.13, -0.27, 0.06, 0.39, 0.7, 0.39, -0.04, -0.25, 0.28, 0.65, 0.66, 0.44, -0.1, -0.21, 0.78, 0.64, 1.0, 0.57, -0.82, -0.2, 0.08, 0.34, 0.17, 0.61, -0.73, -0.19, -0.61, -0.09, -0.34, 0.48, -0.27, -0.34, -0.02, -0.69, 0.52, 0.45, -0.25, -0.4, -0.83, -0.96, 0.13, 0.47, -0.22, -0.51, -0.58, -0.96, 0.84, 0.46, -0.21, -0.54, -0.46, -0.91, 0.77, 0.39, -0.2, -0.55, -0.48, -0.87, 0.75, 0.38, -0.18, -0.55, -0.4, -0.9, 0.75, -0.06, -0.29, 0.31, 0.11, 0.19, 0.05, -0.04, -0.27, 0.32, 0.05, 0.31, 0.07, -0.0, -0.28, 0.33, -0.01, 0.45, 0.09, 0.02, -0.29, -0.34, -0.06, 0.6, 0.1]'

In [13]:
mlp.out_activation_

'softmax'