In [1]:
import numpy as np
import pandas as pd
import scipy.io as sio
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

(33475, 25)
33475


In [None]:

def data_load(a):   # load data of one persons' 7 motion. a = '\chx_'
    ra = 'ra'
    rd = 'rd'
    sa = 'sa'
    sd = 'sd'
    sit = 'sit'
    stand = 'stand'
    walk = 'walk'

    data_ra = sio.loadmat(r'D:\code\jupyter_file\data'+a+ra)[ra]
    data_rd = sio.loadmat(r'D:\code\jupyter_file\data'+a+rd)[rd]
    data_sa = sio.loadmat(r'D:\code\jupyter_file\data'+a+sa)[sa]
    data_sd = sio.loadmat(r'D:\code\jupyter_file\data'+a+sd)[sd]
    data_sit = sio.loadmat(r'D:\code\jupyter_file\data'+a+sit)[sit]
    data_stand = sio.loadmat(r'D:\code\jupyter_file\data'+a+stand)[stand]
    data_walk = sio.loadmat(r'D:\code\jupyter_file\data'+a+walk)[walk]
    return data_ra, data_rd, data_sa, data_sd, data_sit, data_stand, data_walk

def filt_bad_data(data):
    # count bad data number in 20-points window
    index = []
    for i in range(0,len(data) - 20, 20):
        dataWin = data[i:i+20, 0:5]
        if sum(sum(dataWin < 250)) < 2 and sum(sum(dataWin > 11000)) < 2:
            index.append(i)

    # process index[], connect data windows
    index.append(99999)# add an inf to make final value right
    a = index[0]
    b = index[0] + 20
    goodDataWinIndex = []
    for i in range(0, len(index) - 1):
        if b == index[i + 1]:
            b = index[i + 1] + 20
        else:
            goodDataWinIndex.append([a, b])
            a = index[i + 1]
            b = index[i + 1] + 20

    # filter the data with window length<80
    row = np.size(goodDataWinIndex, 0)
    col = np.size(goodDataWinIndex, 1)
    finalIndex = []
    for i in range(row):
        if goodDataWinIndex[i][1] - goodDataWinIndex[i][0] >= 80:# locomotion lasting time over 1s
            finalIndex.append(goodDataWinIndex[i])
    return finalIndex

def get_feature(data):  # calculate features
    window_i = 20
    window_l = 90
    feature = np.zeros(((data.shape[1] - window_l)//window_i + 1, 25), dtype = float)
    for i in range(5):
        for j in range((data.shape[1] - window_l) // window_i):
            feature[j, 5 * i] = np.mean(data[i, window_i*j : window_i*j + window_l])
            feature[j, 5 * i + 1] = np.std(data[i, window_i*j : window_i*j + window_l])
            feature[j, 5 * i + 2] = np.var(data[i, window_i*j : window_i*j + window_l])
            feature[j, 5 * i + 3] = np.mean(abs(data[i, window_i*j : window_i*j + window_l] - np.mean(data[i, window_i*j : window_i*j + window_l])))
            feature[j, 5 * i + 4] = max(data[i, window_i*j : window_i*j + window_l]) - min(data[i, window_i*j : window_i*j + window_l])
    return feature

# subject = ['chx_', 'hoy_', 'kl_', 'mm_', 'wyx_', 'yhl_', 'yjl_', 'yyj_', 'zjs_', 'zkj_', 'zs_']
train_subject = ['\chx_', '\hoy_', '\kl_', '\mm_', '\wyx_', '\yhl_', '\yjl_', '\yyj_', '\zjs_', '\zkj_']
data_ra = []
data_rd = []
data_sa = []
data_sd = []
data_sit = []
data_stand = []
data_walk = []

for i in train_subject:
    data = data_load(i)
    data_ra.extend(data[0])
    data_rd.extend(data[1])
    data_sa.extend(data[2])
    data_sd.extend(data[3])
    data_sit.extend(data[4])
    data_stand.extend(data[5])
    data_walk.extend(data[6])

data_ra = np.array(data_ra)
data_rd = np.array(data_rd)
data_sa = np.array(data_sa)
data_sd = np.array(data_sd)
data_sit = np.array(data_sit)
data_stand = np.array(data_stand)
data_walk = np.array(data_walk) # raw data, all data of the same motion

#print(data_ra.shape)

win_l = 90
win_i = 20
id_shift = 0

ra_index = filt_bad_data(data_ra)
rd_index = filt_bad_data(data_rd)
sa_index = filt_bad_data(data_sa)
sd_index = filt_bad_data(data_sd)
walk_index = filt_bad_data(data_walk)
stand_index = filt_bad_data(data_stand)
sit_index = filt_bad_data(data_sit)

data_id = []
d1 = []
d2 = []
d3 = []
d4 = []
d5 = []
y = []
for a in ra_index:
    for b in range((a[1] - a[0] - win_l + win_i) // win_i):
        y.append(0)
        data_id += [id_shift] * win_l
        d1 += list(data_ra[a[0] + win_i * b : a[0] + win_i * b + win_l, 0])
        d2 += list(data_ra[a[0] + win_i * b : a[0] + win_i * b + win_l, 1])
        d3 += list(data_ra[a[0] + win_i * b : a[0] + win_i * b + win_l, 2])
        d4 += list(data_ra[a[0] + win_i * b : a[0] + win_i * b + win_l, 3])
        d5 += list(data_ra[a[0] + win_i * b : a[0] + win_i * b + win_l, 4])        
        id_shift += 1
filt_ra = np.array([d1, d2, d3, d4, d5])

#print(len(data_id), len(d1))
data_id = []
d1 = []
d2 = []
d3 = []
d4 = []
d5 = []
y = []
for a in rd_index:
    for b in range((a[1] - a[0] - win_l + win_i) // win_i):
        y.append(1)
        data_id += [id_shift] * win_l
        d1 += list(data_rd[a[0] + win_i * b : a[0] + win_i * b + win_l, 0])
        d2 += list(data_rd[a[0] + win_i * b : a[0] + win_i * b + win_l, 1])
        d3 += list(data_rd[a[0] + win_i * b : a[0] + win_i * b + win_l, 2])
        d4 += list(data_rd[a[0] + win_i * b : a[0] + win_i * b + win_l, 3])
        d5 += list(data_rd[a[0] + win_i * b : a[0] + win_i * b + win_l, 4])        
        id_shift += 1
filt_rd = np.array([d1, d2, d3, d4, d5])
        
#print(len(data_id), len(d1))
data_id = []
d1 = []
d2 = []
d3 = []
d4 = []
d5 = []
y = []
for a in sa_index:
    for b in range((a[1] - a[0] - win_l + win_i) // win_i):
        y.append(2)
        data_id += [id_shift] * win_l
        d1 += list(data_sa[a[0] + win_i * b : a[0] + win_i * b + win_l, 0])
        d2 += list(data_sa[a[0] + win_i * b : a[0] + win_i * b + win_l, 1])
        d3 += list(data_sa[a[0] + win_i * b : a[0] + win_i * b + win_l, 2])
        d4 += list(data_sa[a[0] + win_i * b : a[0] + win_i * b + win_l, 3])
        d5 += list(data_sa[a[0] + win_i * b : a[0] + win_i * b + win_l, 4])        
        id_shift += 1
#print(len(data_id), len(d1))
filt_sa = np.array([d1, d2, d3, d4, d5])

data_id = []
d1 = []
d2 = []
d3 = []
d4 = []
d5 = []
y = []    
for a in sd_index:
    for b in range((a[1] - a[0] - win_l + win_i) // win_i):
        y.append(3)
        data_id += [id_shift] * win_l
        d1 += list(data_sd[a[0] + win_i * b : a[0] + win_i * b + win_l, 0])
        d2 += list(data_sd[a[0] + win_i * b : a[0] + win_i * b + win_l, 1])
        d3 += list(data_sd[a[0] + win_i * b : a[0] + win_i * b + win_l, 2])
        d4 += list(data_sd[a[0] + win_i * b : a[0] + win_i * b + win_l, 3])
        d5 += list(data_sd[a[0] + win_i * b : a[0] + win_i * b + win_l, 4])        
        id_shift += 1
#print(len(data_id), len(d1))
filt_sd = np.array([d1, d2, d3, d4, d5])

data_id = []
d1 = []
d2 = []
d3 = []
d4 = []
d5 = []
y = []        
for a in walk_index[0:100]:
    for b in range((a[1] - a[0] - win_l + win_i) // win_i):
        y.append(4)
        data_id += [id_shift] * win_l
        d1 += list(data_walk[a[0] + win_i * b : a[0] + win_i * b + win_l, 0])
        d2 += list(data_walk[a[0] + win_i * b : a[0] + win_i * b + win_l, 1])
        d3 += list(data_walk[a[0] + win_i * b : a[0] + win_i * b + win_l, 2])
        d4 += list(data_walk[a[0] + win_i * b : a[0] + win_i * b + win_l, 3])
        d5 += list(data_walk[a[0] + win_i * b : a[0] + win_i * b + win_l, 4])        
        id_shift += 1
#print(len(data_id), len(d1))
#print(id_shift)
filt_walk = np.array([d1, d2, d3, d4, d5])

data_id = []
d1 = []
d2 = []
d3 = []
d4 = []
d5 = []
y = []        
for a in stand_index[0:1]:
    for b in range((a[1] - a[0] - win_l + win_i) // win_i):
        y.append(5)
        data_id += [id_shift] * win_l
        d1 += list(data_stand[a[0] + win_i * b : a[0] + win_i * b + win_l, 0])
        d2 += list(data_stand[a[0] + win_i * b : a[0] + win_i * b + win_l, 1])
        d3 += list(data_stand[a[0] + win_i * b : a[0] + win_i * b + win_l, 2])
        d4 += list(data_stand[a[0] + win_i * b : a[0] + win_i * b + win_l, 3])
        d5 += list(data_stand[a[0] + win_i * b : a[0] + win_i * b + win_l, 4])        
        id_shift += 1
#print(len(data_id), len(d1))
#print(id_shift)
filt_stand = np.array([d1, d2, d3, d4, d5])

data_id = []
d1 = []
d2 = []
d3 = []
d4 = []
d5 = []
y = []        
for a in sit_index[0:1]:
    for b in range((a[1] - a[0] - win_l + win_i) // win_i):
        y.append(6)
        data_id += [id_shift] * win_l
        d1 += list(data_sit[a[0] + win_i * b : a[0] + win_i * b + win_l, 0])
        d2 += list(data_sit[a[0] + win_i * b : a[0] + win_i * b + win_l, 1])
        d3 += list(data_sit[a[0] + win_i * b : a[0] + win_i * b + win_l, 2])
        d4 += list(data_sit[a[0] + win_i * b : a[0] + win_i * b + win_l, 3])
        d5 += list(data_sit[a[0] + win_i * b : a[0] + win_i * b + win_l, 4])        
        id_shift += 1
filt_sit = np.array([d1, d2, d3, d4, d5])
        

f_sit = get_feature(filt_sit)
f_stand = get_feature(filt_stand)
f_walk = get_feature(filt_walk)
f_stairA = get_feature(filt_sa)
f_stairD = get_feature(filt_sd)
f_rampA = get_feature(filt_ra)
f_rampD = get_feature(filt_rd)

f_data = []
label = []
#    data = []

#    l_sit = np.zeros((f_sit.shape[0], 1))
#    l_stand = np.zeros((f_stand.shape[0], 1))
#    l_walk = np.zeros((f_walk.shape[0], 1))
#    l_stairA = np.zeros((f_stairA.shape[0], 1))
#    l_stairD = np.zeros((f_stairD.shape[0], 1))
#    l_rampA = np.zeros((f_rampA.shape[0], 1))
#    l_rampD = np.zeros((f_rampD.shape[0], 1))

for i in range(f_sit.shape[0]):
    label.append(0)

for i in range(f_stand.shape[0]):
    label.append(1)

for i in range(f_walk.shape[0]):
    label.append(2)

for i in range(f_stairA.shape[0]):
    label.append(3)

for i in range(f_stairD.shape[0]):
    label.append(4)

for i in range(f_rampA.shape[0]):
    label.append(5)

for i in range(f_rampD.shape[0]):
    label.append(6)

f_data = np.concatenate((f_sit, f_stand, f_walk, f_stairA, f_stairD, f_rampA, f_rampD), axis = 0)
print(f_data.shape)
print(len(label))

In [11]:
print(filt_sit.shape)

(5, 207720)


In [7]:


#    label = np.concatenate((l_sit, l_stand, l_walk, l_stairA, l_stairD, l_rampA, l_rampD), axis = 0)
#    data = np.concatenate((f_data, label), axis = 1)

train_data_r, test_data_r, train_label, test_label = train_test_split(f_data, label, test_size = 0.2, random_state = 0)
# train_data = preprocessing.normalize(train_data_r, norm = 'max')
# test_data = preprocessing.normalize(test_data_r, norm = 'max')
scaler = preprocessing.StandardScaler().fit(train_data_r)
train_data = scaler.transform(train_data_r)
test_data = scaler.transform(test_data_r)
'''
c = svm.SVC(C = 52, kernel = 'rbf', gamma = 0.1, decision_function_shape = 'ovr')
c.fit(train_data, train_label)
print("train acc:", c.score(train_data, train_label))
print('test acc:', c.score(test_data, test_label))
test_pred = c.predict(test_data)
print(confusion_matrix(test_label, test_pred))
'''
parameters = {'gamma':[0.0001, 0.001, 0.01, 0.1, 1, 10], 'C':np.linspace(1, 100, 100)}
gs = GridSearchCV(svm.SVC(), parameters, refit = True, cv = 5, verbose = 1, n_jobs = -1)
gs.fit(train_data, train_label)
print('best parameter: ', gs.best_params_)
print('best efficience:', gs.best_score_)
print('report: ', classification_report(test_label, gs.predict(test_data_r)))
#    knn = KNeighborsClassifier()
#    knn.fit(train_data, train_label)
#    print(knn.score(train_data, train_label))
#    print(knn.score(test_data, test_label))

#    test_pred = knn.predict(test_data)
#    name = ['sit', 'stand', 'walk', 'stairA', 'stairD', 'rampA', 'rampD']  
#    print(confusion_matrix(test_label, test_pred))

Fitting 5 folds for each of 600 candidates, totalling 3000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed: 10.2min
[Parallel(n_jobs=-1)]: Done 426 tasks      | elapsed: 22.4min
[Parallel(n_jobs=-1)]: Done 776 tasks      | elapsed: 38.9min
[Parallel(n_jobs=-1)]: Done 1226 tasks      | elapsed: 59.3min
[Parallel(n_jobs=-1)]: Done 1776 tasks      | elapsed: 83.4min
[Parallel(n_jobs=-1)]: Done 2426 tasks      | elapsed: 111.4min
[Parallel(n_jobs=-1)]: Done 3000 out of 3000 | elapsed: 135.6min finished


best parameter:  {'C': 80.0, 'gamma': 0.1}
best efficience: 0.9947722180731889
report:                precision    recall  f1-score   support

           0       0.00      0.00      0.00      2055
           1       0.00      0.00      0.00       968
           2       0.33      0.00      0.00       561
           3       0.00      0.00      0.00       757
           4       0.00      0.00      0.00       842
           5       0.15      1.00      0.25       971
           6       0.00      0.00      0.00       541

    accuracy                           0.15      6695
   macro avg       0.07      0.14      0.04      6695
weighted avg       0.05      0.15      0.04      6695



  'precision', 'predicted', average, warn_for)


In [9]:
from sklearn import svm
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
train_data_r, test_data_r, train_label, test_label = train_test_split(f_data, label, test_size = 0.2, random_state = 0)
# train_data = preprocessing.normalize(train_data_r, norm = 'max')
# test_data = preprocessing.normalize(test_data_r, norm = 'max')
scaler = preprocessing.StandardScaler().fit(train_data_r)
train_data = scaler.transform(train_data_r)
test_data = scaler.transform(test_data_r)

In [10]:
from sklearn.metrics import confusion_matrix
c = svm.SVC(C = 80, kernel = 'rbf', gamma = 0.1, decision_function_shape = 'ovr')# 
c.fit(train_data, train_label)
print("train acc:", c.score(train_data, train_label))
print('test acc:', c.score(test_data, test_label))
test_pred = c.predict(test_data)
print(confusion_matrix(test_label, test_pred))

train acc: 0.9995519044062733
test acc: 0.9955190440627334
[[2055    0    0    0    0    0    0]
 [   0  967    0    0    0    1    0]
 [   0    0  558    0    0    1    2]
 [   0    0    0  752    3    2    0]
 [   0    0    3    4  834    1    0]
 [   0    0    0    0    0  968    3]
 [   0    0    1    0    1    8  531]]
