In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from scipy import signal
from numpy.fft import fft

from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings('ignore')

plt.rcParams["figure.figsize"] = (30, 20)

## process_file() takes a string with a path to the .csv


def process_file(pathname):
    res = pd.read_csv(pathname, sep=',', header=0, index_col=None, names=[
                      'time', 'seconds', 'z', 'y', 'x'])
    return res


def butterworth(df):
    nyquist_frequency = 0.5 * 50

    low = 0.5 / nyquist_frequency
    high = 2.2 / nyquist_frequency

    b, a = signal.butter(3, Wn=[low, high], btype='bandpass')

    df['x'] = signal.filtfilt(b, a, df['x'])
    df['y'] = signal.filtfilt(b, a, df['y'])
    df['z'] = signal.filtfilt(b, a, df['z'])

    return df


In [9]:
dlp_upstairs_1 = butterworth(process_file('data/dstepup1.csv'))
dlp_upstairs_1 = dlp_upstairs_1[dlp_upstairs_1['seconds'] > 12]
dlp_upstairs_1 = dlp_upstairs_1[dlp_upstairs_1['seconds'] < 22]
dlp_upstairs_1['seconds'] = dlp_upstairs_1['seconds'] - 12

dlp_upstairs_2 = butterworth(process_file('data/dstepup2.csv'))
dlp_upstairs_2 = dlp_upstairs_2[dlp_upstairs_2['seconds'] > 13]
dlp_upstairs_2 = dlp_upstairs_2[dlp_upstairs_2['seconds'] < 23]
dlp_upstairs_2['seconds'] = dlp_upstairs_2['seconds'] - 13

dlp_downstairs_1 = butterworth(process_file('data/dstepdown1.csv'))
dlp_downstairs_1 = dlp_downstairs_1[dlp_downstairs_1['seconds'] > 12]
dlp_downstairs_1 = dlp_downstairs_1[dlp_downstairs_1['seconds'] < 21]
dlp_downstairs_1['seconds'] = dlp_downstairs_1['seconds'] - 12

dlp_downstairs_2 = butterworth(process_file('data/dstepdown2.csv'))
dlp_downstairs_2 = dlp_downstairs_2[dlp_downstairs_2['seconds'] > 15.5]
dlp_downstairs_2 = dlp_downstairs_2[dlp_downstairs_2['seconds'] < 23]
dlp_downstairs_2['seconds'] = dlp_downstairs_2['seconds'] - 15.5


mrp_upstairs_1 = butterworth(process_file(
    'processed_data/mrp_upstairs_16steps.csv'))
m_up24_1_bw = process_file('processed_data\m_up24_1_bw.csv')
m_up24_2_bw = process_file('processed_data\m_up24_2_bw.csv')
m_up24_3_bw = process_file('processed_data\m_up24_3_bw.csv')
m_up24_4_bw = process_file('processed_data\m_up24_4_bw.csv')
m_up24_5_bw = process_file('processed_data\m_up24_5_bw.csv')

mrp_downstairs_1 = butterworth(process_file(
    'processed_data/mrp_downstairs_16steps.csv'))
m_down24_1_bw = process_file('processed_data\m_down24_1_bw.csv')
m_down24_2_bw = process_file('processed_data\m_down24_2_bw.csv')
m_down24_3_bw = process_file('processed_data\m_down24_3_bw.csv')
m_down24_4_bw = process_file('processed_data\m_down24_4_bw.csv')
m_down24_5_bw = process_file('processed_data\m_down24_5_bw.csv')

dlp_upstairs_1['movement'] = 'upstairs'
dlp_upstairs_2['movement'] = 'upstairs'
dlp_downstairs_1['movement'] = 'downstairs'
dlp_downstairs_2['movement'] = 'downstairs'
mrp_upstairs_1['movement'] = 'upstairs'
m_up24_1_bw['movement'] = 'upstairs'
m_up24_2_bw['movement'] = 'upstairs'
m_up24_3_bw['movement'] = 'upstairs'
m_up24_4_bw['movement'] = 'upstairs'
m_up24_5_bw['movement'] = 'upstairs'
mrp_downstairs_1['movement'] = 'downstairs'
m_down24_1_bw['movement'] = 'downstairs'
m_down24_2_bw['movement'] = 'downstairs'
m_down24_3_bw['movement'] = 'downstairs'
m_down24_4_bw['movement'] = 'downstairs'
m_down24_5_bw['movement'] = 'downstairs'

dlp_upstairs_1_split = np.array_split(dlp_upstairs_1, 4)
dlp_upstairs_2_split = np.array_split(dlp_upstairs_2, 4)
dlp_downstairs_1_split = np.array_split(dlp_downstairs_1, 4)
dlp_downstairs_2_split = np.array_split(dlp_downstairs_2, 4)
mrp_upstairs_1_split = np.array_split(mrp_upstairs_1, 4)
m_up24_1_bw_split = np.array_split(m_up24_1_bw, 4)
m_up24_2_bw_split = np.array_split(m_up24_2_bw, 4)
m_up24_3_bw_split = np.array_split(m_up24_3_bw, 4)
m_up24_4_bw_split = np.array_split(m_up24_4_bw, 4)
m_up24_5_bw_split = np.array_split(m_up24_5_bw, 4)
mrp_downstairs_1_split = np.array_split(mrp_downstairs_1, 4)
m_down24_1_bw_split = np.array_split(m_down24_1_bw, 4)
m_down24_2_bw_split = np.array_split(m_down24_2_bw, 4)
m_down24_3_bw_split = np.array_split(m_down24_3_bw, 4)
m_down24_4_bw_split = np.array_split(m_down24_4_bw, 4)
m_down24_5_bw_split = np.array_split(m_down24_5_bw, 4)

tmove = []
tdata = []

for i in [dlp_upstairs_1_split, dlp_upstairs_2_split, dlp_downstairs_1_split, dlp_downstairs_2_split, mrp_upstairs_1_split, m_up24_1_bw_split, m_up24_2_bw_split, m_up24_3_bw_split, m_up24_4_bw_split, m_up24_5_bw_split, mrp_downstairs_1_split, m_down24_1_bw_split, m_down24_2_bw_split, m_down24_3_bw_split, m_down24_4_bw_split, m_down24_5_bw_split]:
    for j in range(len(i) - 1):
        temp = i[j]

        # if ((temp['movement'] == 'walking').all()):
            # tmove.append('walking')
        if ((temp['movement'] == 'upstairs').all()):
            tmove.append('upstairs')
        if ((temp['movement'] == 'downstairs').all()):
            tmove.append('downstairs')

        temp['combined'] = temp['y'] + temp['z']
        tdata.append(np.asarray(temp['combined']))

training_data = pd.DataFrame(tmove)
training_data['joined'] = pd.Series(tdata, index=training_data.index)
training_data

training_data.rename(columns={0: 'movement'}, inplace=True)
training_data = pd.concat([training_data['movement'], training_data.pop(
    'joined').apply(pd.Series)], axis=1).dropna(axis=1)
training_data


Unnamed: 0,movement,0,1,2,3,4,5,6,7,8,...,83,84,85,86,87,88,89,90,91,92
0,upstairs,0.72715,0.907812,1.063343,1.184847,1.265162,1.299434,1.285551,1.22441,1.119933,...,-1.926562,-2.25004,-2.485225,-2.623332,-2.659224,-2.592078,-2.426056,-2.170692,-1.840762,-1.455624
1,upstairs,2.172768,2.469652,2.711744,2.892723,3.006744,3.048795,3.015192,2.904058,2.715752,...,-0.937466,-0.704216,-0.469763,-0.246372,-0.044719,0.126482,0.260654,0.353373,0.402235,0.406637
2,upstairs,2.697359,2.540849,2.309704,2.010522,1.653138,1.250379,0.817469,0.371094,-0.07171,...,0.474775,0.468391,0.386349,0.235951,0.028764,-0.219991,-0.49227,-0.768212,-1.027306,-1.249738
3,upstairs,-0.112958,-0.115275,-0.107152,-0.088283,-0.059115,-0.020908,0.024248,0.073476,0.123207,...,-2.650547,-2.597883,-2.467856,-2.275166,-2.034605,-1.760044,-1.463929,-1.157172,-0.849276,-0.548484
4,upstairs,2.154293,2.437471,2.646866,2.77085,2.80135,2.734409,2.570524,2.314711,1.976256,...,-1.092118,-0.758492,-0.412204,-0.064439,0.273919,0.592601,0.88208,1.133591,1.339131,1.491543
5,upstairs,1.785666,1.387475,0.939459,0.465402,-0.011033,-0.467864,-0.886057,-1.250422,-1.550077,...,0.378344,0.54443,0.679421,0.776426,0.829747,0.8352,0.790419,0.695148,0.551517,0.364272
6,downstairs,-0.407569,-0.413991,-0.406516,-0.386,-0.354072,-0.312877,-0.264729,-0.211712,-0.155257,...,2.144396,2.784117,3.273688,3.586489,3.703457,3.614216,3.318052,2.824658,2.154528,1.338787
7,downstairs,4.676862,5.049725,5.186959,5.085165,4.752229,4.206405,3.475041,2.593128,1.601796,...,3.684423,3.87064,3.806122,3.493823,2.95039,2.205521,1.300671,0.287087,-0.776842,-1.828793
8,downstairs,5.498235,4.972943,4.214665,3.26719,2.183404,1.02223,-0.154509,-1.285203,-2.311529,...,0.456402,-0.587611,-1.617821,-2.573086,-3.395069,-4.032326,-4.444174,-4.603773,-4.499954,-4.137704
9,downstairs,1.720102,2.028969,2.304572,2.526577,2.674351,2.728312,2.671476,2.491045,2.179912,...,1.647808,2.610157,3.390227,3.94068,4.228946,4.238927,3.971701,3.445236,2.693199,1.763016


In [10]:
## ML portion starts here

X = training_data.drop(columns=['movement'])
y = training_data['movement']

X_train, X_valid, y_train, y_valid = train_test_split(X, y)

bayes_model = make_pipeline(
    GaussianNB()
)

knn_model = make_pipeline(
    StandardScaler(),
    KNeighborsClassifier(n_neighbors=8)
)

nn_model = make_pipeline(
    StandardScaler(),
    MLPClassifier(solver='lbfgs', hidden_layer_sizes=(
        16, 8, 4), activation='logistic', max_iter=100000)
)

dt_model = make_pipeline(
    StandardScaler(),
    DecisionTreeClassifier(max_depth=125)
)

rf_model = make_pipeline(
    RandomForestClassifier(n_estimators=1500, max_depth=3, min_samples_leaf=5)
)

en_model = make_pipeline(
    VotingClassifier([
        ('nb', GaussianNB()),
        ('knn', KNeighborsClassifier(8)),
        ('tree1', DecisionTreeClassifier(max_depth=20)),
        ('tree2', DecisionTreeClassifier(min_samples_leaf=4)),
        ('rf', RandomForestClassifier(
            n_estimators=250, max_depth=15, min_samples_leaf=2)),
        ('mlp', MLPClassifier(solver='lbfgs', hidden_layer_sizes=(16, 8, 6), activation='logistic', max_iter=100000))])

)

print('bayes')
bayes_model.fit(X_train, y_train)
print(bayes_model.score(X_train, y_train))
print(bayes_model.score(X_valid, y_valid))
y_predicted = bayes_model.predict(X_valid)
print(classification_report(y_valid, y_predicted))

print('')
print('knn')
knn_model.fit(X_train, y_train)
print(knn_model.score(X_train, y_train))
print(knn_model.score(X_valid, y_valid))
y_predicted = knn_model.predict(X_valid)
print(classification_report(y_valid, y_predicted))

print('')
print('neural net')
nn_model.fit(X_train, y_train)
print(nn_model.score(X_train, y_train))
print(nn_model.score(X_valid, y_valid))
y_predicted = nn_model.predict(X_valid)
print(classification_report(y_valid, y_predicted))

print('')
print('decision tree')
dt_model.fit(X_train, y_train)
print(dt_model.score(X_train, y_train))
print(dt_model.score(X_valid, y_valid))
y_predicted = dt_model.predict(X_valid)
print(classification_report(y_valid, y_predicted))

print('')
print('random forest')
rf_model.fit(X_train, y_train)
print(rf_model.score(X_train, y_train))
print(rf_model.score(X_valid, y_valid))
y_predicted = rf_model.predict(X_valid)
print(classification_report(y_valid, y_predicted))

print('')
print('ensemble')
en_model.fit(X_train, y_train)
print(en_model.score(X_train, y_train))
print(en_model.score(X_valid, y_valid))
y_predicted = en_model.predict(X_valid)
print(classification_report(y_valid, y_predicted))


bayes
0.8055555555555556
0.3333333333333333
              precision    recall  f1-score   support

  downstairs       0.38      0.50      0.43         6
    upstairs       0.25      0.17      0.20         6

    accuracy                           0.33        12
   macro avg       0.31      0.33      0.31        12
weighted avg       0.31      0.33      0.31        12


knn
0.8055555555555556
0.6666666666666666
              precision    recall  f1-score   support

  downstairs       0.75      0.50      0.60         6
    upstairs       0.62      0.83      0.71         6

    accuracy                           0.67        12
   macro avg       0.69      0.67      0.66        12
weighted avg       0.69      0.67      0.66        12


neural net
1.0
0.5
              precision    recall  f1-score   support

  downstairs       0.50      0.67      0.57         6
    upstairs       0.50      0.33      0.40         6

    accuracy                           0.50        12
   macro avg       0.