In [193]:

from numpy import loadtxt
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC, SVC
import eli5


In [194]:
def classifier(num=0, rfnest=1000, rfdep=10, SVMC=10):
    if num==0:
        return RandomForestClassifier(n_estimators=rfnest,
                                     criterion='gini',
                                     max_depth=rfdep,
                                     min_samples_split=2,
                                     min_samples_leaf=1,
                                     min_weight_fraction_leaf=0.0,
                                     max_features=None,
                                     max_leaf_nodes=None,
                                     bootstrap=True,
                                     oob_score=False,
                                     n_jobs=1,
                                     class_weight='balanced')
    elif num == 1:
        return LogisticRegression(C=10000, penalty='l2',fit_intercept=False)
    elif num == 2:
        return LinearSVC(C=SVMC,fit_intercept=False)
    elif num ==3:
        return RidgeClassifier(alpha=2, fit_intercept=False)
    elif num == 4:
        return SVC(C=10000, gamma=0.01, kernel='rbf',degree=3)
    
    
def diff_prediction(true, pred):
    for i, (t, p) in enumerate(zip(true, pred)):
        if t != p:
            print("Error instance %d, True=%d, Pred=%d" %(i,t,p))
        

In [195]:
from IPython.core.display import display, HTML
show_html = lambda html: display(HTML(html))
show_html_expl = lambda expl, **kwargs: show_html(eli5.format_as_html(expl, include_styles=False, **kwargs))
show_html(eli5.format_html_styles())

In [196]:
data = loadtxt('CVIrf.csv', delimiter=';', skiprows=1)
fnames = ['round','cadence','vel','sLength','sTime','sLengthCV','sTimeCV','rhfx','lhfx','rhfy','lhfy','rhfz','lhfz']
ncl = 6
X = data[:, 1:-ncl]
XNN = X.copy()
sc = MinMaxScaler()
X= sc.fit_transform(X)

n_splits = 10

#  3 strides 3 clusters

In [197]:

yv = data[:, -ncl:]
print(yv.shape)
nlb = 0
y = yv[:,nlb].reshape(yv.shape[0])


(153, 6)


## Random Forest

In [198]:

skf = StratifiedKFold(n_splits=n_splits)
skf.get_n_splits(X, y)

lacc = []
for train_index, test_index in skf.split(X, y):
    clf= classifier(num=0, rfnest=500, rfdep=8)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    acc = accuracy_score(y_test, prediction)
    lacc.append(acc)

print('ACC mean=', np.mean(lacc))

ACC mean= 0.7836309523809524


In [199]:
clf= classifier(num=0, rfnest=500, rfdep=8)

clf.fit(X, y)

y_hat = clf.predict(X)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))

             precision    recall  f1-score   support

        1.0       1.00      1.00      1.00        50
        2.0       1.00      1.00      1.00        37
        3.0       1.00      1.00      1.00        66

avg / total       1.00      1.00      1.00       153

[[50  0  0]
 [ 0 37  0]
 [ 0  0 66]]
ACC= 1.0


In [200]:
expl = eli5.explain_weights(clf, feature_names=fnames)

show_html_expl(expl)

Weight,Feature
0.4509  ± 0.0946,rhfx
0.1402  ± 0.1824,rhfz
0.0939  ± 0.1931,lhfz
0.0631  ± 0.1086,sTimeCV
0.0573  ± 0.0922,lhfx
0.0334  ± 0.0784,sTime
0.0330  ± 0.0848,cadence
0.0282  ± 0.0657,rhfy
0.0279  ± 0.0934,sLength
0.0223  ± 0.0746,lhfy


## SVM

In [201]:
skf = StratifiedKFold(n_splits=n_splits)
skf.get_n_splits(X, y)

lacc = []
for train_index, test_index in skf.split(X, y):
    clf= classifier(num=2, SVMC=0.1)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    acc = accuracy_score(y_test, prediction)
    lacc.append(acc)

print('ACC mean=', np.mean(lacc))

ACC mean= 0.802797619047619


In [202]:
clf= classifier(num=2, SVMC=0.1)

clf.fit(X, y)

y_hat = clf.predict(X)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))

             precision    recall  f1-score   support

        1.0       0.88      0.72      0.79        61
        2.0       0.43      0.73      0.54        22
        3.0       0.98      0.93      0.96        70

avg / total       0.86      0.82      0.83       153

[[44 17  0]
 [ 5 16  1]
 [ 1  4 65]]
ACC= 0.8169934640522876


In [203]:
diff_prediction(y, y_hat)

Error instance 8, True=2, Pred=1
Error instance 27, True=1, Pred=2
Error instance 30, True=2, Pred=3
Error instance 31, True=2, Pred=3
Error instance 40, True=1, Pred=2
Error instance 45, True=1, Pred=2
Error instance 51, True=3, Pred=2
Error instance 53, True=2, Pred=3
Error instance 54, True=2, Pred=1
Error instance 55, True=2, Pred=1
Error instance 56, True=2, Pred=1
Error instance 57, True=2, Pred=1
Error instance 70, True=1, Pred=2
Error instance 71, True=1, Pred=2
Error instance 72, True=1, Pred=3
Error instance 74, True=2, Pred=1
Error instance 75, True=2, Pred=1
Error instance 76, True=2, Pred=1
Error instance 77, True=2, Pred=1
Error instance 90, True=2, Pred=1
Error instance 91, True=2, Pred=1
Error instance 114, True=2, Pred=1
Error instance 115, True=2, Pred=1
Error instance 116, True=2, Pred=1
Error instance 117, True=2, Pred=1
Error instance 119, True=2, Pred=1
Error instance 123, True=2, Pred=3
Error instance 140, True=2, Pred=1


In [204]:
expl = eli5.explain_weights(clf, feature_names=fnames)

show_html_expl(expl)

Weight?,Feature,Unnamed: 2_level_0
Weight?,Feature,Unnamed: 2_level_1
Weight?,Feature,Unnamed: 2_level_2
+0.407,rhfz,
+0.299,vel,
+0.282,lhfz,
+0.242,sLength,
+0.029,rhfy,
+0.009,round,
-0.023,cadence,
-0.086,sTimeCV,
-0.101,lhfx,
-0.247,lhfy,

Weight?,Feature
0.407,rhfz
0.299,vel
0.282,lhfz
0.242,sLength
0.029,rhfy
0.009,round
-0.023,cadence
-0.086,sTimeCV
-0.101,lhfx
-0.247,lhfy

Weight?,Feature
0.586,lhfy
0.201,cadence
0.189,lhfx
0.126,sTimeCV
0.073,rhfy
0.071,sTime
0.061,round
0.024,vel
0.015,sLengthCV
-0.138,sLength

Weight?,Feature
2.181,rhfx
0.191,rhfz
0.103,sLengthCV
0.072,sTime
0.031,lhfz
-0.074,sTimeCV
-0.079,round
-0.102,sLength
-0.271,rhfy
-0.349,vel


## Decision Tree

In [205]:
clf= DecisionTreeClassifier()

clf.fit(XNN, y)

y_hat = clf.predict(XNN)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))
expl = eli5.explain_weights(clf, feature_names=fnames, rotate=True, filled=True, leaves_parallel=True, proportion=True)

show_html_expl(expl)

             precision    recall  f1-score   support

        1.0       1.00      1.00      1.00        50
        2.0       1.00      1.00      1.00        37
        3.0       1.00      1.00      1.00        66

avg / total       1.00      1.00      1.00       153

[[50  0  0]
 [ 0 37  0]
 [ 0  0 66]]
ACC= 1.0


Weight,Feature
0.5179,rhfx
0.1222,rhfz
0.1063,sTimeCV
0.0967,cadence
0.0386,round
0.0363,vel
0.0247,lhfx
0.0232,lhfz
0.0207,lhfy
0.0134,sTime


# 5 strides 3 clusters

In [206]:
yv = data[:, -ncl:]
print(yv.shape)
nlb = 1
y = yv[:,nlb].reshape(yv.shape[0])


(153, 6)


## Random Forest

In [207]:
skf = StratifiedKFold(n_splits=n_splits)
skf.get_n_splits(X, y)

lacc = []
for train_index, test_index in skf.split(X, y):
    clf= classifier(num=0, rfnest=500, rfdep=10)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    acc = accuracy_score(y_test, prediction)
    lacc.append(acc)

print('ACC mean=', np.mean(lacc))

ACC mean= 0.731195054945055


In [208]:
clf= classifier(num=0, rfnest=500, rfdep=10)

clf.fit(X, y)

y_hat = clf.predict(X)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))

             precision    recall  f1-score   support

        1.0       1.00      1.00      1.00        59
        2.0       1.00      1.00      1.00        56
        3.0       1.00      1.00      1.00        38

avg / total       1.00      1.00      1.00       153

[[59  0  0]
 [ 0 56  0]
 [ 0  0 38]]
ACC= 1.0


In [209]:
expl = eli5.explain_weights(clf, feature_names=fnames)

show_html_expl(expl)

Weight,Feature
0.3789  ± 0.1038,rhfx
0.1626  ± 0.2275,lhfz
0.1392  ± 0.2167,rhfz
0.0681  ± 0.1546,lhfy
0.0486  ± 0.0964,cadence
0.0349  ± 0.0898,vel
0.0313  ± 0.0986,sTimeCV
0.0309  ± 0.0777,lhfx
0.0295  ± 0.0762,sLength
0.0269  ± 0.0737,sLengthCV


## SVM

In [210]:
skf = StratifiedKFold(n_splits=n_splits)
skf.get_n_splits(X, y)

lacc = []
for train_index, test_index in skf.split(X, y):
    clf= classifier(num=2, SVMC=0.1)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    acc = accuracy_score(y_test, prediction)
    lacc.append(acc)

print('ACC mean=', np.mean(lacc))

ACC mean= 0.7959111721611721


In [211]:
clf= classifier(num=2, SVMC=0.1)

clf.fit(X, y)

y_hat = clf.predict(X)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))

             precision    recall  f1-score   support

        1.0       0.98      0.87      0.92        67
        2.0       0.89      0.77      0.83        65
        3.0       0.37      0.67      0.47        21

avg / total       0.86      0.80      0.82       153

[[58  0  9]
 [ 0 50 15]
 [ 1  6 14]]
ACC= 0.7973856209150327


In [212]:
diff_prediction(y, y_hat)

Error instance 22, True=2, Pred=3
Error instance 23, True=2, Pred=3
Error instance 25, True=3, Pred=2
Error instance 28, True=3, Pred=1
Error instance 29, True=3, Pred=1
Error instance 30, True=3, Pred=1
Error instance 31, True=3, Pred=1
Error instance 37, True=1, Pred=3
Error instance 44, True=2, Pred=3
Error instance 45, True=2, Pred=3
Error instance 55, True=3, Pred=2
Error instance 56, True=3, Pred=2
Error instance 70, True=2, Pred=3
Error instance 71, True=2, Pred=3
Error instance 74, True=3, Pred=2
Error instance 75, True=3, Pred=2
Error instance 76, True=3, Pred=2
Error instance 77, True=3, Pred=2
Error instance 90, True=3, Pred=2
Error instance 91, True=3, Pred=2
Error instance 92, True=3, Pred=2
Error instance 114, True=3, Pred=2
Error instance 115, True=3, Pred=2
Error instance 117, True=3, Pred=2
Error instance 119, True=3, Pred=2
Error instance 123, True=3, Pred=1
Error instance 124, True=3, Pred=2
Error instance 149, True=3, Pred=1
Error instance 150, True=3, Pred=1
Error 

In [213]:
expl = eli5.explain_weights(clf, feature_names=fnames)

show_html_expl(expl)

Weight?,Feature,Unnamed: 2_level_0
Weight?,Feature,Unnamed: 2_level_1
Weight?,Feature,Unnamed: 2_level_2
+1.855,rhfx,
+0.386,rhfz,
+0.270,lhfz,
+0.121,sLengthCV,
+0.081,sTimeCV,
-0.010,round,
-0.037,sTime,
-0.213,sLength,
-0.396,rhfy,
-0.474,lhfy,

Weight?,Feature
1.855,rhfx
0.386,rhfz
0.27,lhfz
0.121,sLengthCV
0.081,sTimeCV
-0.01,round
-0.037,sTime
-0.213,sLength
-0.396,rhfy
-0.474,lhfy

Weight?,Feature
0.364,lhfz
0.315,vel
0.265,rhfz
0.253,sLength
0.172,cadence
0.112,rhfy
0.02,round
-0.046,sTimeCV
-0.138,lhfx
-0.181,sLengthCV

Weight?,Feature
0.549,lhfy
0.244,lhfx
0.185,vel
0.175,sTime
0.111,rhfy
0.055,cadence
-0.023,round
-0.037,sLength
-0.073,sTimeCV
-0.14,sLengthCV


## Decision tree

In [214]:
clf= DecisionTreeClassifier()

clf.fit(XNN, y)

y_hat = clf.predict(XNN)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))
expl = eli5.explain_weights(clf, feature_names=fnames, rotate=True, filled=True, leaves_parallel=True, proportion=True)

show_html_expl(expl)

             precision    recall  f1-score   support

        1.0       1.00      1.00      1.00        59
        2.0       1.00      1.00      1.00        56
        3.0       1.00      1.00      1.00        38

avg / total       1.00      1.00      1.00       153

[[59  0  0]
 [ 0 56  0]
 [ 0  0 38]]
ACC= 1.0


Weight,Feature
0.4323,rhfx
0.2475,lhfz
0.0884,cadence
0.0613,lhfx
0.0489,rhfz
0.0433,lhfy
0.0399,round
0.0193,sLength
0.019,rhfy
0.0,sTimeCV


# 5 strides 5 clusters

In [215]:
yv = data[:, -ncl:]
print(yv.shape)
nlb = 2
y = yv[:,nlb].reshape(yv.shape[0])


(153, 6)


## Random Forest

In [216]:
skf = StratifiedKFold(n_splits=n_splits)
skf.get_n_splits(X, y)

lacc = []
for train_index, test_index in skf.split(X, y):
    clf= classifier(num=0, rfnest=1000, rfdep=8)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    acc = accuracy_score(y_test, prediction)
    lacc.append(acc)

print('ACC mean=', np.mean(lacc))

ACC mean= 0.786338612368024


In [217]:
clf= classifier(num=0, rfnest=1000, rfdep=8)

clf.fit(X, y)

y_hat = clf.predict(X)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))

             precision    recall  f1-score   support

        1.0       1.00      1.00      1.00        30
        2.0       1.00      1.00      1.00        58
        3.0       1.00      1.00      1.00        18
        4.0       1.00      1.00      1.00        25
        5.0       1.00      1.00      1.00        22

avg / total       1.00      1.00      1.00       153

[[30  0  0  0  0]
 [ 0 58  0  0  0]
 [ 0  0 18  0  0]
 [ 0  0  0 25  0]
 [ 0  0  0  0 22]]
ACC= 1.0


In [218]:
expl = eli5.explain_weights(clf, feature_names=fnames)

show_html_expl(expl)

Weight,Feature
0.3800  ± 0.1116,rhfx
0.1441  ± 0.2185,rhfz
0.1102  ± 0.2026,lhfz
0.0872  ± 0.1581,sTimeCV
0.0653  ± 0.1167,sTime
0.0422  ± 0.0757,rhfy
0.0313  ± 0.0578,lhfx
0.0287  ± 0.0770,cadence
0.0287  ± 0.0673,sLengthCV
0.0259  ± 0.0595,vel


## SVM

In [219]:
skf = StratifiedKFold(n_splits=n_splits)
skf.get_n_splits(X, y)

lacc = []
for train_index, test_index in skf.split(X, y):
    clf= classifier(num=2, SVMC=10)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    acc = accuracy_score(y_test, prediction)
    lacc.append(acc)

print('ACC mean=', np.mean(lacc))

ACC mean= 0.7061576168929109


In [220]:
clf= classifier(num=2, SVMC=10)

clf.fit(X, y)

y_hat = clf.predict(X)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))

             precision    recall  f1-score   support

        1.0       0.80      0.73      0.76        33
        2.0       0.97      0.92      0.94        61
        3.0       0.67      0.75      0.71        16
        4.0       0.52      0.65      0.58        20
        5.0       0.82      0.78      0.80        23

avg / total       0.82      0.80      0.81       153

[[24  0  0  8  1]
 [ 0 56  4  0  1]
 [ 2  2 12  0  0]
 [ 4  0  1 13  2]
 [ 0  0  1  4 18]]
ACC= 0.803921568627451


In [221]:
diff_prediction(y, y_hat)

Error instance 26, True=4, Pred=5
Error instance 29, True=3, Pred=2
Error instance 30, True=3, Pred=2
Error instance 32, True=3, Pred=2
Error instance 36, True=2, Pred=3
Error instance 37, True=2, Pred=3
Error instance 41, True=4, Pred=5
Error instance 42, True=4, Pred=5
Error instance 44, True=4, Pred=5
Error instance 70, True=1, Pred=3
Error instance 71, True=1, Pred=3
Error instance 72, True=1, Pred=4
Error instance 73, True=1, Pred=4
Error instance 74, True=3, Pred=5
Error instance 76, True=5, Pred=4
Error instance 77, True=3, Pred=4
Error instance 84, True=4, Pred=1
Error instance 85, True=4, Pred=1
Error instance 90, True=4, Pred=1
Error instance 91, True=4, Pred=1
Error instance 92, True=4, Pred=1
Error instance 93, True=4, Pred=1
Error instance 114, True=4, Pred=1
Error instance 115, True=5, Pred=4
Error instance 123, True=5, Pred=2
Error instance 137, True=1, Pred=4
Error instance 138, True=4, Pred=1
Error instance 139, True=1, Pred=4
Error instance 145, True=5, Pred=1
Error i

In [222]:
expl = eli5.explain_weights(clf, feature_names=fnames)

show_html_expl(expl)

Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
Weight?,Feature,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4
+2.363,lhfz,,,
+0.746,vel,,,
+0.527,rhfz,,,
+0.512,sTimeCV,,,
+0.381,lhfy,,,
+0.280,sLength,,,
-0.137,lhfx,,,
-0.410,round,,,
-0.738,cadence,,,
-1.244,rhfy,,,

Weight?,Feature
2.363,lhfz
0.746,vel
0.527,rhfz
0.512,sTimeCV
0.381,lhfy
0.28,sLength
-0.137,lhfx
-0.41,round
-0.738,cadence
-1.244,rhfy

Weight?,Feature
5.533,rhfx
2.027,rhfz
1.806,sLength
1.477,sTimeCV
0.565,lhfz
0.46,round
0.069,sLengthCV
-0.29,sTime
-0.879,cadence
-1.498,lhfx

Weight?,Feature
2.927,vel
0.957,lhfx
0.796,sLength
0.62,cadence
0.241,rhfx
-0.218,sTime
-0.232,round
-0.39,lhfy
-0.819,rhfz
-0.989,rhfy

Weight?,Feature
2.215,sLength
0.891,cadence
0.604,sTime
0.522,lhfy
0.29,rhfy
0.278,sTimeCV
0.111,round
-0.296,sLengthCV
-0.323,rhfz
-0.397,lhfz

Weight?,Feature
5.195,lhfx
1.63,vel
0.213,sTimeCV
0.11,sTime
-0.32,round
-0.449,rhfy
-0.507,rhfz
-0.822,sLengthCV
-0.971,lhfz
-1.636,lhfy


## Decision tree

In [223]:
clf= DecisionTreeClassifier()

clf.fit(XNN, y)

y_hat = clf.predict(XNN)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))
expl = eli5.explain_weights(clf, feature_names=fnames, rotate=True, filled=True, leaves_parallel=True, proportion=True)

show_html_expl(expl)

             precision    recall  f1-score   support

        1.0       1.00      1.00      1.00        30
        2.0       1.00      1.00      1.00        58
        3.0       1.00      1.00      1.00        18
        4.0       1.00      1.00      1.00        25
        5.0       1.00      1.00      1.00        22

avg / total       1.00      1.00      1.00       153

[[30  0  0  0  0]
 [ 0 58  0  0  0]
 [ 0  0 18  0  0]
 [ 0  0  0 25  0]
 [ 0  0  0  0 22]]
ACC= 1.0


Weight,Feature
0.464,rhfx
0.1877,rhfz
0.1139,lhfz
0.0451,sLengthCV
0.0429,vel
0.039,lhfx
0.0201,lhfy
0.0165,rhfy
0.0164,sTime
0.0163,round


# 6 strides 3 clusters

In [224]:
yv = data[:, -ncl:]
print(yv.shape)
nlb = 4
y = yv[:,nlb].reshape(yv.shape[0])


(153, 6)


## Random Forest

In [225]:
skf = StratifiedKFold(n_splits=n_splits)
skf.get_n_splits(X, y)

lacc = []
for train_index, test_index in skf.split(X, y):
    clf= classifier(num=0, rfnest=1000, rfdep=8)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    acc = accuracy_score(y_test, prediction)
    lacc.append(acc)

print('ACC mean=', np.mean(lacc))

ACC mean= 0.7940705128205129


In [226]:
clf= classifier(num=0, rfnest=1000, rfdep=8)

clf.fit(X, y)

y_hat = clf.predict(X)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))

             precision    recall  f1-score   support

        1.0       1.00      1.00      1.00        49
        2.0       1.00      1.00      1.00        58
        3.0       1.00      1.00      1.00        46

avg / total       1.00      1.00      1.00       153

[[49  0  0]
 [ 0 58  0]
 [ 0  0 46]]
ACC= 1.0


In [227]:
expl = eli5.explain_weights(clf, feature_names=fnames)

show_html_expl(expl)

Weight,Feature
0.4063  ± 0.1009,rhfx
0.1855  ± 0.2307,rhfz
0.1414  ± 0.2341,lhfz
0.0710  ± 0.1041,sTime
0.0431  ± 0.0940,sTimeCV
0.0258  ± 0.0673,lhfx
0.0257  ± 0.0600,rhfy
0.0253  ± 0.0829,sLength
0.0252  ± 0.0796,vel
0.0221  ± 0.0668,lhfy


## SVM

In [228]:
skf = StratifiedKFold(n_splits=n_splits)
skf.get_n_splits(X, y)

lacc = []
for train_index, test_index in skf.split(X, y):
    clf= classifier(num=2, SVMC=0.1)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    acc = accuracy_score(y_test, prediction)
    lacc.append(acc)

print('ACC mean=', np.mean(lacc))

ACC mean= 0.753191391941392


In [229]:
clf= classifier(num=2, SVMC=0.1)

clf.fit(X, y)

y_hat = clf.predict(X)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))

             precision    recall  f1-score   support

        1.0       0.47      0.74      0.58        31
        2.0       0.98      0.85      0.91        67
        3.0       0.85      0.71      0.77        55

avg / total       0.83      0.78      0.79       153

[[23  1  7]
 [10 57  0]
 [16  0 39]]
ACC= 0.7777777777777778


In [230]:
diff_prediction(y, y_hat)

Error instance 22, True=3, Pred=1
Error instance 27, True=3, Pred=1
Error instance 28, True=1, Pred=2
Error instance 29, True=1, Pred=2
Error instance 30, True=1, Pred=2
Error instance 31, True=1, Pred=2
Error instance 37, True=2, Pred=1
Error instance 39, True=1, Pred=2
Error instance 54, True=1, Pred=3
Error instance 55, True=1, Pred=3
Error instance 56, True=1, Pred=3
Error instance 57, True=1, Pred=3
Error instance 70, True=3, Pred=1
Error instance 71, True=3, Pred=1
Error instance 72, True=3, Pred=1
Error instance 73, True=3, Pred=1
Error instance 74, True=1, Pred=3
Error instance 75, True=1, Pred=3
Error instance 76, True=1, Pred=3
Error instance 77, True=1, Pred=3
Error instance 90, True=1, Pred=3
Error instance 91, True=1, Pred=3
Error instance 92, True=1, Pred=3
Error instance 110, True=3, Pred=1
Error instance 114, True=1, Pred=3
Error instance 115, True=1, Pred=3
Error instance 116, True=1, Pred=3
Error instance 117, True=1, Pred=3
Error instance 118, True=1, Pred=3
Error in

In [231]:
expl = eli5.explain_weights(clf, feature_names=fnames)

show_html_expl(expl)

Weight?,Feature,Unnamed: 2_level_0
Weight?,Feature,Unnamed: 2_level_1
Weight?,Feature,Unnamed: 2_level_2
+0.541,lhfy,
+0.391,lhfx,
+0.333,sTime,
+0.205,rhfy,
+0.147,round,
+0.144,vel,
+0.069,sLengthCV,
+0.054,cadence,
+0.032,sTimeCV,
+0.003,sLength,

Weight?,Feature
0.541,lhfy
0.391,lhfx
0.333,sTime
0.205,rhfy
0.147,round
0.144,vel
0.069,sLengthCV
0.054,cadence
0.032,sTimeCV
0.003,sLength

Weight?,Feature
1.862,rhfx
0.351,rhfz
0.267,lhfz
0.073,sTimeCV
0.011,sLengthCV
-0.022,round
-0.07,sTime
-0.151,sLength
-0.427,rhfy
-0.461,vel

Weight?,Feature
0.554,rhfz
0.359,lhfz
0.288,vel
0.155,sLength
0.13,cadence
0.049,rhfy
-0.138,round
-0.139,sTimeCV
-0.236,lhfy
-0.276,sLengthCV


## Decision tree

In [232]:
clf= DecisionTreeClassifier()

clf.fit(XNN, y)

y_hat = clf.predict(XNN)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))
expl = eli5.explain_weights(clf, feature_names=fnames, rotate=True, filled=True, leaves_parallel=True, proportion=True)

show_html_expl(expl)

             precision    recall  f1-score   support

        1.0       1.00      1.00      1.00        49
        2.0       1.00      1.00      1.00        58
        3.0       1.00      1.00      1.00        46

avg / total       1.00      1.00      1.00       153

[[49  0  0]
 [ 0 58  0]
 [ 0  0 46]]
ACC= 1.0


Weight,Feature
0.4221,rhfx
0.1616,rhfz
0.1406,sTime
0.0938,lhfz
0.056,lhfx
0.0333,sTimeCV
0.0322,lhfy
0.0277,cadence
0.0179,vel
0.0148,rhfy


# 6 strides 4 clusters

In [233]:
yv = data[:, -ncl:]
print(yv.shape)
nlb = 5
y = yv[:,nlb].reshape(yv.shape[0])


(153, 6)


## Random Forest

In [234]:
skf = StratifiedKFold(n_splits=n_splits)
skf.get_n_splits(X, y)

lacc = []
for train_index, test_index in skf.split(X, y):
    clf= classifier(num=0, rfnest=1000, rfdep=8)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    acc = accuracy_score(y_test, prediction)
    lacc.append(acc)

print('ACC mean=', np.mean(lacc))

ACC mean= 0.7968872549019608


In [235]:
clf= classifier(num=0, rfnest=1000, rfdep=8)

clf.fit(X, y)

y_hat = clf.predict(X)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))

             precision    recall  f1-score   support

        1.0       1.00      1.00      1.00        32
        2.0       1.00      1.00      1.00        41
        3.0       1.00      1.00      1.00        23
        4.0       1.00      1.00      1.00        57

avg / total       1.00      1.00      1.00       153

[[32  0  0  0]
 [ 0 41  0  0]
 [ 0  0 23  0]
 [ 0  0  0 57]]
ACC= 1.0


In [236]:
expl = eli5.explain_weights(clf, feature_names=fnames)

show_html_expl(expl)

Weight,Feature
0.3346  ± 0.1115,rhfx
0.2345  ± 0.2273,rhfz
0.0920  ± 0.1924,lhfz
0.0902  ± 0.1586,sTimeCV
0.0668  ± 0.1126,cadence
0.0343  ± 0.0840,sLengthCV
0.0319  ± 0.0754,rhfy
0.0314  ± 0.0694,lhfx
0.0257  ± 0.0648,sLength
0.0216  ± 0.0537,sTime


## SVM

In [237]:
skf = StratifiedKFold(n_splits=n_splits)
skf.get_n_splits(X, y)

lacc = []
for train_index, test_index in skf.split(X, y):
    clf= classifier(num=2, SVMC=50)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    acc = accuracy_score(y_test, prediction)
    lacc.append(acc)

print('ACC mean=', np.mean(lacc))

ACC mean= 0.7035142390289449


In [238]:
clf= classifier(num=2, SVMC=50)

clf.fit(X, y)

y_hat = clf.predict(X)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))

             precision    recall  f1-score   support

        1.0       0.69      0.76      0.72        29
        2.0       0.85      0.74      0.80        47
        3.0       0.61      0.78      0.68        18
        4.0       0.95      0.92      0.93        59

avg / total       0.83      0.82      0.82       153

[[22  5  2  0]
 [ 9 35  3  0]
 [ 0  1 14  3]
 [ 1  0  4 54]]
ACC= 0.8169934640522876


In [239]:
diff_prediction(y, y_hat)

Error instance 2, True=1, Pred=4
Error instance 27, True=1, Pred=2
Error instance 30, True=3, Pred=4
Error instance 32, True=3, Pred=4
Error instance 36, True=4, Pred=3
Error instance 37, True=4, Pred=3
Error instance 38, True=4, Pred=3
Error instance 39, True=3, Pred=4
Error instance 48, True=2, Pred=3
Error instance 49, True=3, Pred=2
Error instance 70, True=1, Pred=2
Error instance 71, True=1, Pred=2
Error instance 72, True=1, Pred=2
Error instance 73, True=1, Pred=2
Error instance 74, True=3, Pred=1
Error instance 75, True=3, Pred=2
Error instance 76, True=3, Pred=1
Error instance 77, True=3, Pred=2
Error instance 84, True=2, Pred=1
Error instance 90, True=2, Pred=1
Error instance 91, True=2, Pred=1
Error instance 92, True=2, Pred=1
Error instance 93, True=2, Pred=1
Error instance 127, True=1, Pred=2
Error instance 130, True=1, Pred=2
Error instance 137, True=1, Pred=2
Error instance 139, True=1, Pred=2
Error instance 149, True=3, Pred=4


In [240]:
expl = eli5.explain_weights(clf, feature_names=fnames)

show_html_expl(expl)

Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3
+1.941,lhfz,,
+0.747,rhfz,,
+0.371,vel,,
+0.140,sTimeCV,,
+0.089,cadence,,
-0.023,sLength,,
-0.058,lhfx,,
-0.171,lhfy,,
-0.294,round,,
-1.141,sLengthCV,,

Weight?,Feature
1.941,lhfz
0.747,rhfz
0.371,vel
0.14,sTimeCV
0.089,cadence
-0.023,sLength
-0.058,lhfx
-0.171,lhfy
-0.294,round
-1.141,sLengthCV

Weight?,Feature
6.468,sLength
4.924,cadence
1.137,sTime
0.647,sLengthCV
0.116,lhfx
-0.069,round
-0.102,lhfz
-0.154,sTimeCV
-0.564,rhfy
-1.04,lhfy

Weight?,Feature
14.749,vel
4.53,sLengthCV
0.645,rhfx
0.36,lhfy
0.049,rhfz
-0.107,rhfy
-0.259,round
-0.347,lhfx
-0.478,sTime
-1.265,sTimeCV

Weight?,Feature
6.112,rhfx
5.919,sLength
2.402,rhfz
1.666,sTimeCV
0.688,lhfz
0.448,round
-0.008,sTime
-0.113,cadence
-1.308,lhfx
-1.846,lhfy


## Decision Tree

In [241]:
clf= DecisionTreeClassifier()

clf.fit(XNN, y)

y_hat = clf.predict(XNN)
print (classification_report(y_hat, y))
print (confusion_matrix(y_hat, y))
print('ACC=', accuracy_score(y, y_hat))
expl = eli5.explain_weights(clf, feature_names=fnames, rotate=True, filled=True, leaves_parallel=True, proportion=True)

show_html_expl(expl)

             precision    recall  f1-score   support

        1.0       1.00      1.00      1.00        32
        2.0       1.00      1.00      1.00        41
        3.0       1.00      1.00      1.00        23
        4.0       1.00      1.00      1.00        57

avg / total       1.00      1.00      1.00       153

[[32  0  0  0]
 [ 0 41  0  0]
 [ 0  0 23  0]
 [ 0  0  0 57]]
ACC= 1.0


Weight,Feature
0.4183,rhfx
0.163,rhfz
0.1491,lhfz
0.0803,sLength
0.0647,lhfx
0.0396,sTimeCV
0.0311,cadence
0.0241,sTime
0.0163,rhfy
0.0136,lhfy
