In [76]:
# description of this dataset http://groupware.les.inf.puc-rio.br/har#ixzz2PyRdbAfA
from sklearn import datasets
from sklearn import preprocessing as pp
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score
import numpy
import csv
import pandas as pd
import time

initial = time.time()
### Retrieving all data
data = pd.read_csv("./adriel.csv", delimiter=';', header=None)


In [77]:
data.shape

(36030, 18)

In [78]:
X = pp.normalize(data)

In [79]:
def segment_signal(data, window_size): 

    N = data.shape[0]
    dim = data.shape[1]
    K = N/window_size
    segments = numpy.empty((K, window_size, dim))
    for i in range(K):
        segment = data[i*window_size:i*window_size+window_size,:]
        segments[i] = numpy.vstack(segment)
    return segments

In [80]:
window_size = 12

segs = segment_signal(X, window_size) # 40 * 50ms = 2000ms
segs.shape

(3002, 12, 18)

In [81]:
def extract_diff_2(raw):

    N = raw.shape[0] # number of segments of sensor readings ()
    I = raw.shape[1] # number of sets of readings (14)
    J = raw.shape[2] # number of values in each set of readings (12)
    feature_num = (I - 1) * J
    feature = numpy.empty((feature_num))
    features = numpy.empty((N, feature_num))
    for n in range(N):
        idx = 0;
        for i in range(1, I):
            for j in range(J):
                feature[idx] = raw[n][i][j] - raw[n][i-1][j]
                idx += 1
        features[n] = feature
        

    return features

In [82]:
features = extract_diff_2(segs)

In [83]:
features.shape

(3002, 198)

In [84]:
le = pp.LabelEncoder() 
le.fit(['wavehands', 'busdriver', 'frontback', 'sidestep',  # currently 8 actions
        'turnclap', 'squatturnclap', 'window', 'window360'])

LabelEncoder()

In [48]:
# Creating label 
targetNum = data.shape[0] / window_size

y = numpy.empty(targetNum)

index = 0
for k in range(8):
    for j in range(targetNum/8):
        y[index] = k
        index += 1
        
y.shape

(800,)

In [117]:
def generate_label(overall_size, class_size, segment_size):
    y = numpy.empty(overall_size/segment_size)
    index = 0
    for k in range(class_size):
        for j in range((overall_size/class_size)/segment_size):
            y[index] = k
            index += 1
    return y

In [132]:
ts1 = generate_label(overall_size=9608, class_size=8, segment_size=12) #training set 1: 
print ts1.shape
ts2 = generate_label(overall_size=9608, class_size=8, segment_size=12) #training set 2: 
print ts2.shape
ts3 = generate_label(overall_size=8407, class_size=7, segment_size=12) #training set 3: 
print ts3.shape
ts4 = generate_label(overall_size=8407, class_size=7, segment_size=12) #training set 4: 
print ts4.shape

(800,)
(800,)
(700,)
(700,)


In [143]:
temp1 = numpy.append(ts1, ts2)
temp2 = numpy.append(ts3, ts4)
y = numpy.append(temp1, temp2)
y.shape

(3000,)

In [144]:
add1 = numpy.array([1])
y = numpy.append(y, add1)
y = numpy.append(y, add1)
y.shape

(3002,)

In [145]:
layer_1_val = 15
layer_2_val = 10
################################################################
####having 15 neurons
kfold = KFold(n_splits=10, shuffle=True)
fold_index = 0
for train, test in kfold.split(features):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                     hidden_layer_sizes=(layer_1_val, layer_2_val), random_state=1).fit(features[train], y[train])
    predictions = clf.predict(features[test])
    accuracy = clf.score(features[test], y[test])
    cm = confusion_matrix(y[test], predictions)

    print('In the %i fold, the classification accuracy is %f' %(fold_index, accuracy))
    print('And the confusion matrix is: ')
    print(cm)
    fold_index += 1
#############################################################
##### Choose by uncommenting on either one
#############################################################
# kfold = KFold(n_splits=10, shuffle=True)

# fold_index = 0
# for train, test in kfold.split(features):
#     svm = SVC(kernel = 'linear', C = 50).fit(features[train], y[train])
#     svm_predictions = svm.predict(features[test])
#     recall = recall_score(y[test], svm_predictions, average='macro') # 
#     accuracy = svm.score(features[test], y[test])
#     cm = confusion_matrix(y[test], svm_predictions)

#     print('In the %i fold, the classification accuracy is %f and the recall is %f' %(fold_index, accuracy, recall))
#     print('And the confusion matrix is: ')
#     print(cm)
#     fold_index += 1
################################################################

In the 0 fold, the classification accuracy is 0.744186
And the confusion matrix is: 
[[34  1  1  0  1  1  0  0]
 [ 3 33  2  0  1  0  0  0]
 [ 0  2 23  7 12  1  0  0]
 [ 0  0  4 31  0  1  0  0]
 [ 0  1  2  2 27  9  0  0]
 [ 1  1  0  1 16 17  0  0]
 [ 0  0  0  0  0  1 46  1]
 [ 0  0  0  0  0  0  5 13]]
In the 1 fold, the classification accuracy is 0.651163
And the confusion matrix is: 
[[30  2  0  0  0  0  0  0]
 [ 1 33  0  1  0  0  0  0]
 [ 0  1 20 19  3  9  0  0]
 [ 0  1  9 26  1  2  0  0]
 [ 0  0  3  2 14 17  0  0]
 [ 0  0  6  1 15 25  0  0]
 [ 1  0  0  1  1  0 32  3]
 [ 0  0  0  0  2  0  4 16]]
In the 2 fold, the classification accuracy is 0.723333
And the confusion matrix is: 
[[42  2  1  0  0  0  0  0]
 [ 1 42  0  0  0  0  0  0]
 [ 0  1 18  9  3  2  0  0]
 [ 0  0  9 32  1  3  0  0]
 [ 0  1  1  1 16 17  0  0]
 [ 0  0  6  0 15 20  0  1]
 [ 2  0  0  0  0  0 27  2]
 [ 1  0  0  0  0  0  4 20]]
In the 3 fold, the classification accuracy is 0.676667
And the confusion matrix is: 
[[39  2  

In [15]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,6812.0,-3396.0,-7148.0,4962.0,4813.0,-20029.0,3972.0,5640.0,-8084.0,482.0,14328.0,15665.0,-16716.0,5220.0,-2980.0,-2333.0,2612.0,2444.0
1,5904.0,540.0,-3728.0,13788.0,1977.0,-21460.0,2832.0,2320.0,-4424.0,-7742.0,10958.0,19337.0,-16748.0,3696.0,-4216.0,-4708.0,2480.0,629.0
2,7760.0,7284.0,-3028.0,7359.0,-5367.0,-20499.0,4088.0,-4952.0,-684.0,-7002.0,7714.0,24900.0,-15184.0,2860.0,-4968.0,-3825.0,1419.0,-1042.0
3,11572.0,10316.0,-2072.0,7043.0,-4187.0,-18170.0,1328.0,-9364.0,-760.0,-9859.0,5732.0,29699.0,-13888.0,5144.0,-5600.0,889.0,1253.0,-428.0
4,13132.0,11772.0,-6128.0,6702.0,-14349.0,-12547.0,7088.0,-19652.0,2212.0,-4795.0,3598.0,26370.0,-16116.0,1652.0,-5192.0,6297.0,-2564.0,1486.0
5,22792.0,13648.0,-11584.0,8512.0,-15276.0,-6565.0,13456.0,-29732.0,-684.0,-10593.0,-3299.0,14846.0,-16224.0,3860.0,-3020.0,5528.0,-2782.0,-1001.0
6,29100.0,15404.0,-18512.0,6401.0,-9049.0,-1986.0,24084.0,-32768.0,-1812.0,-1193.0,-1216.0,1080.0,-15176.0,3148.0,-2904.0,-129.0,-156.0,-1419.0
7,26800.0,18108.0,-20504.0,2073.0,5540.0,6006.0,21632.0,-32268.0,-1872.0,1941.0,3034.0,-12448.0,-13524.0,3752.0,-2008.0,-5331.0,4002.0,-709.0
8,18360.0,15132.0,-13308.0,-1426.0,14496.0,15019.0,12640.0,-24396.0,-1480.0,3862.0,2422.0,-23269.0,-15408.0,2924.0,-5964.0,-5446.0,3921.0,-516.0
9,10008.0,11172.0,-9080.0,-3244.0,16027.0,18943.0,4332.0,-16140.0,-1212.0,1716.0,-1336.0,-28757.0,-14812.0,4216.0,-5872.0,1505.0,291.0,765.0
