In [1]:
import pandas as pd
import numpy as np
from scipy.stats import kurtosis, skew
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from scipy.stats import mode
import matplotlib
import pickle

In [2]:
matplotlib.use("TkAgg")
from sklearn.model_selection import train_test_split

In [3]:
# Window Length.
n = 100
STRIDE = 50

In [4]:
def getFeatures(data, fs):
    f1 = np.mean(data, axis=0)
    C = np.cov(data.T)
    f2 = np.concatenate((C[0, 0:3], C[1, 1:3], C[2, 2:3]))
    f3 = np.array([skew(data.iloc[:, 0]), skew(data.iloc[:, 1]), skew(data.iloc[:, 2])])
    f4 = np.array([kurtosis(data.iloc[:, 0]), kurtosis(data.iloc[:, 1]), kurtosis(data.iloc[:, 2])])
    f5 = np.zeros(3)
    f6 = np.zeros(3)
    for i in range(0,3):
        g = abs(np.fft.fft(data.iloc[:,i]))
        g = g[0:round(len(g)/2)]
        g[0] = 0
        max_i = np.argmax(g)
        f5[i] = g[max_i]
        f6[i] = fs * max_i
    return np.concatenate((f1, f2, f3, f4, f5, f6))

In [5]:
def generate_features(df, stride=STRIDE):
    df_out = pd.DataFrame(columns=[i for i in range(0, 42)])
    fs = 1 / n
    i = 0
    for i in range(0, df.shape[0] - n, stride):
        features_accel = getFeatures(df.iloc[i:i+n, 0:3], fs)
        features_gyro = getFeatures(df.iloc[i:i+n, 3:6], fs)
        features = np.concatenate((features_accel, features_gyro))
        df_out.loc[i] = features
        i += 1
    return df_out

In [6]:
# choose via MODE ## TBD ##
def get_detection_window(df, stride=STRIDE):
    df_out = pd.DataFrame(columns=[0])
    i = 0
    for i in range(0, df.shape[0] - n, stride):
        df_slice = df.iloc[i:i+n,:]
        df_out.loc[i] = df_slice.mode().iloc[0,0]
#         df_out.loc[i] = df.iloc[i+n-1]
        i += 1
    return df_out

In [7]:
def extrapolate_predictions(predictions, size):
    extrapolated = np.fromiter((val for val in predictions for _ in range(0, STRIDE)), float)
    return extrapolated[0:size]

In [11]:
def read_test_data(session):
    df_data_arm = pd.read_csv('test_data/{}/armIMU.txt'.format(session), delim_whitespace=True, header=None)
    print('debug_read_data_1')
    size = df_data_arm.shape[0]
    print('debug_read_data_2')
    arm_features = generate_features(df_data_arm, stride=1)
    print('debug_read_data_3')
    df_data_wrist = pd.read_csv('test_data/{}/wristIMU.txt'.format(session), delim_whitespace=True, header=None)
    print('debug_read_data_4')
    wrist_features = generate_features(df_data_wrist, stride=1)
    print('debug_read_data_5')
#     df_data_detection = pd.read_csv('test_data/{}/detection.txt', header=None)
#     df_data_detection = get_detection_window(df_data_detection, stride=1)
    zero_padding = pd.DataFrame(np.zeros(n))
    print('debug_read_data_6')
    return size, pd.concat([arm_features, wrist_features], axis=1, sort=False, ignore_index=True)

In [9]:
def validate_data(x_val,y_val):
    x_train, x_test, y_train, y_test = train_test_split( x_val, y_val, test_size=0.2, random_state=42)
    clf = SVC(gamma='auto')
    clf.fit(x_train, y_train)
    # Save the model.
    filename = 'finalized_model.sav'
    pickle.dump(clf, open(filename, 'wb'))
    
    predictions = clf.predict(x_test)
    score = accuracy_score(y_test, predictions)
    print('The Validation accuracy score of the model {}'.format(score))

In [None]:
# sessions = ['Session01', 'Session05', 'Session06', 'Session07', 'Session12', 'Session13']
sessions = ['Session01']
# setting empty dataframes for training
x_train = pd.DataFrame(columns=[i for i in range(0, 84)])
y_train = pd.DataFrame(columns=[0])

for session in sessions:
    df_data_arm = pd.read_csv('TrainingData/{}/armIMU.txt'.format(session),  delim_whitespace=True, header=None)
    arm_features = generate_features(df_data_arm)
    df_data_wrist = pd.read_csv('TrainingData/{}/wristIMU.txt'.format(session), delim_whitespace=True, header=None)
    wrist_features = generate_features(df_data_wrist)
    features = pd.concat([arm_features, wrist_features], axis=1, sort=False, ignore_index=True)
    df_data_detection = pd.read_csv('TrainingData/{}/detection.txt'.format(session), header=None)
    x_train = x_train.append(other=features, ignore_index=True)
    y_train = y_train.append(other=get_detection_window(df_data_detection), ignore_index=True)

validate_data(x_train, y_train)


In [None]:
# Testing
test_sessions = ['Session02', 'Session03', 'Session15', 'Session16']
filename = 'finalized_model.sav'
print("Load this model: {}".format(filename))
clf = pickle.load(open(filename, 'rb'))
for session in test_sessions:
    print("Testing this session: {}".format(session))
    size, x_test = read_test_data(session)
    print("Debug 1")
    predictions = clf.predict(x_test)
    print("Debug 2")
    predictions = extrapolate_predictions(predictions, size)
    print("Debug 3")
    score = accuracy_score(y_test, predictions)
    print("Debug 4")
    print('The accuracy score of the model {}'.format(score))

Load this model: finalized_model.sav
Testing this session: Session02
debug_read_data_1
debug_read_data_2
