In [71]:
import pandas as pd
import numpy as np
import math
from collections import Counter
import os
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.externals import joblib
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from scipy.stats import linregress
import datetime

In [2]:
sensor_data = pd.DataFrame.from_csv("../../data/database/sensor_data.csv")
sensor_data = sensor_data[~((sensor_data.TagName == 'Start1') | (sensor_data.TagName == 'Start2'))]
Counter(sensor_data.TagName)

Counter({'Tag0': 352461,
         'Tag1': 272234,
         'Tag2': 377420,
         'Tag3': 398370,
         'Tag4': 428642,
         'Tag5': 435086,
         'Tag6': 358288,
         'Tag7': 315266,
         'Tag8': 431390,
         'Tag9': 381880})

In [3]:
def min_max_normalization(feature, mini, maxi):
    if (maxi == mini):
        # all the values is same:
        return np.array([1] * len(feature))
    if type(feature) == list:
        feature = np.array(feature)
    feature[feature < mini] = mini
    feature[feature > maxi] = maxi

    feature = ((feature - mini) / (maxi - mini))
    return feature

if os.path.exists("../../Results/percentiles.txt") is False:
    
    percentile_df = pd.DataFrame.from_csv("../../Results/percentiles.txt")

    normalized_sensor_data = sensor_data.copy()
    for i in range(len(percentile_df)):
        cur = percentile_df.iloc[i].values
        sensor = cur[0]
        val = cur[1]
        p97 = cur[2]
        p03 = cur[3]
        normalized_sensor_data.loc[(sensor_data.SENSORTYPE == sensor),val] = min_max_normalization(sensor_data[(sensor_data.SENSORTYPE == sensor)][val].values, p03, p97)

    normalized_sensor_data.to_csv("../../data/database/normalized_sensor_data.csv")

In [4]:
sensor_data_acc = sensor_data[(sensor_data.SENSORTYPE == 1)]
# tag_id_groupby = sensor_data_acc_tag12.groupby(['TagName', 'tester_id'])
tag_id_groupby_acc = sensor_data_acc.groupby(['TagName', 'tester_id'])
tag_id_dict = tag_id_groupby_acc.groups
y = list(tag_id_dict.keys())
y = [i[0] for i in y]
y = np.array(y)

sensor_data_magnetic = sensor_data[(sensor_data.SENSORTYPE == 2)]
tag_id_groupby_magnetic = sensor_data_magnetic.groupby(['TagName', 'tester_id'])

sensor_data_orientation = sensor_data[(sensor_data.SENSORTYPE == 3)]
tag_id_groupby_orientation = sensor_data_orientation.groupby(['TagName', 'tester_id'])

sensor_data_gyro = sensor_data[(sensor_data.SENSORTYPE == 4)]
tag_id_groupby_gyro = sensor_data_gyro.groupby(['TagName', 'tester_id'])

sensor_data_gravity = sensor_data[(sensor_data.SENSORTYPE == 9)]
tag_id_groupby_gravity = sensor_data_gravity.groupby(['TagName', 'tester_id'])

sensor_data_quaternion = sensor_data[(sensor_data.SENSORTYPE == 11)]
tag_id_groupby_quaternion = sensor_data_quaternion.groupby(['TagName', 'tester_id'])

sensor_data_tilt = sensor_data[(sensor_data.SENSORTYPE == 26)]
tag_id_groupby_tilt = sensor_data_tilt.groupby(['TagName', 'tester_id'])

In [69]:
def get_slope(x, y, size):
    ratio = float(len(x)) / float(size)
    res1 = []
    res2 = []
    for i in range(size):
        res2.append(list(y[math.floor(i*ratio):math.ceil((i+1.0)*ratio)]))
        res1.append(list(x[math.floor(i*ratio):math.ceil((i+1.0)*ratio)]))
    slopes = []
    for i in range(len(res1)):
        if (len(set(list(res1[i]))) == 1):
            slopes.append(0)
        else:
            slopes.append(linregress(res1[i], res2[i]).slope)
    return np.array(slopes)

In [7]:
def shrink_array(array,size):
    
    ratio = float(len(array)) / float(size)
    res = []
    for i in range(size):
        res.append(np.mean(array[math.floor(i*ratio):math.ceil((i+1.0)*ratio)], axis = 0))
    return np.array(res)

In [85]:
# X = []

# for key in list(tag_id_dict.keys()):
#     gesture_feature = gesture_features(tag_id_groupby.get_group(key)[['VALUES1', 'VALUES2', 'VALUES3']].values).reshape(-1)
#     if np.all(~np.isnan(gesture_feature)):
#         X.append(gesture_feature)
#     else:
#         print(key)
# X = np.array(X)

X = []

for key in list(tag_id_dict.keys()):
#     t = pd.to_datetime(tag_id_groupby_acc.get_group(key)['TIMESTAMP']).values.astype(datetime.datetime)/100000
#     acc_1_slope = get_slope(tag_id_groupby_acc.get_group(key)['VALUES1'].values, t , 200)
#     acc_2_slope = get_slope(tag_id_groupby_acc.get_group(key)['VALUES2'].values, t , 200)
#     acc_3_slope = get_slope(tag_id_groupby_acc.get_group(key)['VALUES3'].values, t , 200)
    
#     acc_12_slope_dif = acc_1_slope - acc_2_slope
#     acc_23_slope_dif = acc_2_slope - acc_3_slope
#     acc_13_slope_dif = acc_1_slope - acc_3_slope
    
    acc_12_slope = get_slope(tag_id_groupby_acc.get_group(key)['VALUES1'].values, tag_id_groupby_acc.get_group(key)['VALUES2'].values , 200)
    acc_23_slope = get_slope(tag_id_groupby_acc.get_group(key)['VALUES2'].values, tag_id_groupby_acc.get_group(key)['VALUES3'].values , 200)
    acc_13_slope = get_slope(tag_id_groupby_acc.get_group(key)['VALUES3'].values, tag_id_groupby_acc.get_group(key)['VALUES3'].values , 200)
    acc_feature = shrink_array(tag_id_groupby_acc.get_group(key)[['VALUES1','VALUES2', 'VALUES3']].values, 200).reshape(-1)
    
    
#     gyro_feature = shrink_array(tag_id_groupby_gyro.get_group(key)[['VALUES1','VALUES2', 'VALUES3']].values, 200).reshape(-1)
#     orientation_feature = shrink_array(tag_id_groupby_orientation.get_group(key)[['VALUES1', 'VALUES2', 'VALUES3']].values, 200).reshape(-1)

#     quaternion_feature = shrink_array(tag_id_groupby_quaternion.get_group(key)[['VALUES1','VALUES2', 'VALUES3']].values, 200).reshape(-1)
#     magnetic_feature = shrink_array(tag_id_groupby_magnetic.get_group(key)[['VALUES1','VALUES2', 'VALUES3']].values, 200).reshape(-1)

#     if key in tag_id_groupby_tilt.groups:
#         tilt_feature = np.array([1])
#     else:
#         tilt_feature = np.array([0])

#     X.append(acc_feature)

    X.append(np.concatenate((acc_feature, acc_12_slope, acc_23_slope, acc_13_slope)))
X = np.array(X)

In [86]:
X.shape

(1070, 1200)

In [87]:
# idx_list = list(range(len(X)))
# np.random.shuffle(idx_list)
# train_idx = idx_list[:math.floor(len(X) * (1- test_percent))]
# test_idx = idx_list[math.floor(len(X) * (1- test_percent)):]

# # y[y == 'Tag2'] = 2
# # y[y == 'Tag1'] = 1

# train_x = X[train_idx]
# train_y = y[train_idx]
# test_x = X[test_idx]
# test_y = y[test_idx]

clf4 = SVC(kernel='poly', C=1, degree=3, verbose = True)
# clf4.fit(train_x, train_y) 
# joblib.dump(clf4, '../../Results/baseline SVC 0.80 raw data acc with gyro 200 chunk.pkl') 
# print(classification_report(test_y, clf4.predict(test_x)))
res = cross_val_score(clf4, X, y, cv=5)
print(res)
print(np.mean(res))

[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][ 0.78181818  0.85        0.68571429  0.84761905  0.68571429]
0.770173160173
