In [1]:
import numpy as np
import pandas as pd
import scipy.io as sio
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [64]:
def get_feature(data, window_l, window_i):  # calculate features
    feature = np.zeros(((data.shape[1] - window_l)//window_i + 1, 25), dtype = float)
    for i in range(5):
        for j in range((data.shape[1] - window_l) // window_i):
            feature[j, 5 * i] = np.mean(data[i, window_i*j : window_i*j + window_l])
            feature[j, 5 * i + 1] = np.std(data[i, window_i*j : window_i*j + window_l])
            feature[j, 5 * i + 2] = np.var(data[i, window_i*j : window_i*j + window_l])
            feature[j, 5 * i + 3] = np.mean(abs(data[i, window_i*j : window_i*j + window_l] - np.mean(data[i, window_i*j : window_i*j + window_l])))
            feature[j, 5 * i + 4] = max(data[i, window_i*j : window_i*j + window_l]) - min(data[i, window_i*j : window_i*j + window_l])
    return feature

# train/test data spliter for time series data classification 
def timeseries_train_test_split(data, label, test_size):
    # time series data split
    # data: get_features. 2D list
    # label: 1,2,3
    # test_size: 0.2
    split_point = round(data.shape[0]*(1-test_size))
    train_data = data[:split_point, :]
    test_data = data[split_point:, :]
    train_label = []
    test_label = []
    for i in range(train_data.shape[0]):
        train_label.append(label)
    for i in range(test_data.shape[0]):
        test_label.append(label)
    return train_data, test_data, train_label, test_label

In [397]:
# load data (.mat)
data_sa = sio.loadmat(r'D:\code\data\3subjects_FSR-iFMG_dataset\zkj\100\up_stairs_100')['up_stairs_100']
data_sd = sio.loadmat(r'D:\code\data\3subjects_FSR-iFMG_dataset\zkj\100\down_stairs_100')['down_stairs_100']
data_walk = sio.loadmat(r'D:\code\data\3subjects_FSR-iFMG_dataset\zkj\100\walk_100')['walk_100']
# set window increase & window length
# 16FSR 8PT sample rate 83(17 25 33 42 50 58 66 75 83 91 100)
# iFMG sample rate 1200(240 360 480 600 720 840 960 1080 1200 1320 1440)
window_i = 240
window_l = 1440 # 200, 300, 400, 500, 600, 700, 800, 900, 1000

# 滑窗计算特征，获得数据集
f_walk = get_feature(data_walk.T, window_l, window_i)
f_stairA = get_feature(data_sa.T, window_l, window_i)
f_stairD = get_feature(data_sd.T, window_l, window_i)
# ======================================================================================================================================================
# 随机划分数据集时，获得总的数据集和label
'''
f_data = []
label = []
for i in range(f_walk.shape[0]):
    label.append(0)

for i in range(f_stairA.shape[0]):
    label.append(1)

for i in range(f_stairD.shape[0]):
    label.append(2)
    
f_data = np.concatenate((f_walk, f_stairA, f_stairD), axis = 0)

print("train set data shape: ", f_data.shape)
print("label length: ", len(label))
print(data_sa.shape[1])
# randomly devide data set
train_data_r, test_data_r, train_label, test_label = train_test_split(f_data, label, test_size = 0.3, random_state = None, stratify = label)
'''
# ======================================================================================================================================================
# 按照前80% 后20% 的比例划分训练集和测试集
walk_train, walk_test, walk_train_label, walk_test_label = timeseries_train_test_split(f_walk, 0, 0.2)
sa_train, sa_test, sa_train_label, sa_test_label = timeseries_train_test_split(f_stairA, 1, 0.2)
sd_train, sd_test, sd_train_label, sd_test_label = timeseries_train_test_split(f_stairD, 2, 0.2)

train_data_r = np.concatenate((walk_train, sa_train, sd_train), axis = 0)
test_data_r = np.concatenate((walk_test, sa_test, sd_test), axis = 0)
train_label = walk_train_label + sa_train_label + sd_train_label
test_label = walk_test_label + sa_test_label + sd_test_label
print("train set data shape: ", train_data_r.shape)
print("train label length: ", len(train_label))
print("test set data shape: ", test_data_r.shape)
print("test label length: ", len(test_label))

train set data shape:  (1502, 25)
train label length:  1502
test set data shape:  (376, 25)
test label length:  376


In [398]:
scaler = preprocessing.StandardScaler().fit(train_data_r)
train_data = scaler.transform(train_data_r)
test_data = scaler.transform(test_data_r)
'''
c = svm.SVC(C = 52, kernel = 'rbf', gamma = 0.1, decision_function_shape = 'ovr')
c.fit(train_data, train_label)
print("train acc:", c.score(train_data, train_label))
print('test acc:', c.score(test_data, test_label))
test_pred = c.predict(test_data)
print(confusion_matrix(test_label, test_pred))
'''
parameters = {'gamma':[0.0001, 0.001, 0.01, 0.1, 1, 10], 'C':np.linspace(1, 100, 100)}
gs = GridSearchCV(svm.SVC(), parameters, refit = True, cv = 5, verbose = 1, n_jobs = -1)
gs.fit(train_data, train_label)
print('best parameter: ', gs.best_params_)
print('best efficience:', gs.best_score_)
print('report: ', classification_report(test_label, gs.predict(test_data_r)))

Fitting 5 folds for each of 600 candidates, totalling 3000 fits
best parameter:  {'C': 47.0, 'gamma': 0.001}
best efficience: 0.9953333333333333
report:                precision    recall  f1-score   support

           0       0.00      0.00      0.00       182
           1       0.00      0.00      0.00        84
           2       0.29      1.00      0.45       110

    accuracy                           0.29       376
   macro avg       0.10      0.33      0.15       376
weighted avg       0.09      0.29      0.13       376



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [399]:
# change C & gamma before run this block!!!
c = svm.SVC(C = gs.best_params_['C'], kernel = 'rbf', gamma = gs.best_params_['gamma'], decision_function_shape = 'ovr')# 
c.fit(train_data, train_label)
print("train acc:", c.score(train_data, train_label))
print('test acc:', c.score(test_data, test_label))
test_pred = c.predict(test_data)
print(confusion_matrix(test_label, test_pred))

train acc: 1.0
test acc: 0.9946808510638298
[[181   0   1]
 [  0  83   1]
 [  0   0 110]]
