In [1]:
import pandas as pd
import numpy as np
import math
from collections import Counter
import os
import matplotlib.pyplot as plt
from scipy.stats import linregress
from sklearn.svm import SVC
from sklearn.externals import joblib
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

In [2]:
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis



In [3]:
sensor_data = pd.DataFrame.from_csv("../../data/database/sensor_data.csv")
sensor_data = sensor_data[~((sensor_data.TagName == 'Start1') | (sensor_data.TagName == 'Start2'))]
Counter(sensor_data.TagName)


Counter({'Tag0': 352461,
         'Tag1': 272234,
         'Tag2': 377420,
         'Tag3': 398370,
         'Tag4': 428642,
         'Tag5': 435086,
         'Tag6': 358288,
         'Tag7': 315266,
         'Tag8': 431390,
         'Tag9': 381880})

In [4]:
linear_acc_feature = pd.DataFrame.from_csv("../../data/linear_accuracy_features_df.csv")

In [12]:
linear_acc_feature.head()

Unnamed: 0,unix_timestamp,TagName,tester_id,linear_distance_x,linear_distance_y,linear_distance_z,linear_velocity_x,linear_velocity_y,linear_velocity_z
0,1515398000.0,Tag0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1515398000.0,Tag0,0.0,4e-06,-4.668691e-08,-3e-06,0.00176,-1.9e-05,-0.001068
2,1515398000.0,Tag0,0.0,1.7e-05,4.726809e-08,-1.1e-05,0.003328,0.000103,-0.002288
3,1515398000.0,Tag0,0.0,4.3e-05,2.830433e-07,-2.9e-05,0.005259,-9.3e-05,-0.003878
4,1515398000.0,Tag0,0.0,7.3e-05,-1.04174e-06,-5.2e-05,0.006686,-0.000423,-0.005201


In [8]:
def min_max_normalization(feature, mini = None, maxi = None):
    if ((maxi is None) or (mini is None)):
        maxi = np.max(feature)
        mini = np.min(feature)
#         print(maxi)
#         print(mini)
    else:
        if (maxi == mini):
            # all the values is same:
            return np.array([1] * feature.size).reshape(feature.shape)
        if type(feature) == list:
            feature = np.array(feature)
        feature[feature < mini] = mini
        feature[feature > maxi] = maxi

    feature = ((feature - mini) / (maxi - mini))
        
    
    return feature


if os.path.exists("../../data/database/normalized_sensor_data.csv") is False:
    load_original_data()
    percentile_df = pd.DataFrame.from_csv("../../Results/percentiles_sensortype.txt")

    normalized_sensor_data = pd.DataFrame(columns=sensor_data.columns,index=sensor_data.index)
    normalized_sensor_data.loc[:,'SENSORTYPE'] = sensor_data['SENSORTYPE'].values
    normalized_sensor_data.loc[:,'TagName'] = sensor_data['TagName'].values
    normalized_sensor_data.loc[:,'tester_id'] = sensor_data['tester_id'].values
    normalized_sensor_data.loc[:,'TIMESTAMP'] = sensor_data['TIMESTAMP'].values
#     for i in range(len(percentile_df)):
#         cur = percentile_df.iloc[i].values
#         sensor = cur[0]
#         val = ['VALUES1','VALUES2', 'VALUES3']
#         p97 = cur[1]
#         p03 = cur[2]
#         normalized_sensor_data.loc[(sensor_data.SENSORTYPE == sensor), val] = min_max_normalization(sensor_data[(sensor_data.SENSORTYPE == sensor)][val].values, p03, p97)
#         print("processing: " + str(cur))
    for sensor in (percentile_df[' SENSORTYPE'].values):
        if sensor == 26:
            continue
        val = ['VALUES1','VALUES2', 'VALUES3']
        print("processing sensortype: " + str(sensor))

        data = sensor_data[sensor_data.SENSORTYPE == sensor][val].values.reshape(-1,1)
        scaler = StandardScaler()
        scaler.fit(data)
        normalized_sensor_data.loc[(sensor_data.SENSORTYPE == sensor), val] = scaler.transform(data).reshape(int(data.size / 3),3)

#     normalized_sensor_data.to_csv("../../data/database/normalized_sensor_data.csv")
#     del sensor_data
else:
    normalized_sensor_data = pd.DataFrame.from_csv("../../data/database/normalized_sensor_data.csv")

In [9]:
sensor_data_option = "Normalize"
if sensor_data_option == "Normalize":
    # tag_id_groupby = sensor_data_acc_tag12.groupby(['TagName', 'tester_id'])
    tag_id_groupby_acc = normalized_sensor_data[(normalized_sensor_data.SENSORTYPE == 1)].groupby(['TagName', 'tester_id'])

    tag_id_groupby_magnetic = normalized_sensor_data[(normalized_sensor_data.SENSORTYPE == 2)].groupby(['TagName', 'tester_id'])

    tag_id_groupby_orientation = normalized_sensor_data[(normalized_sensor_data.SENSORTYPE == 3)].groupby(['TagName', 'tester_id'])

    tag_id_groupby_gyro = normalized_sensor_data[(normalized_sensor_data.SENSORTYPE == 4)].groupby(['TagName', 'tester_id'])

    tag_id_groupby_gravity = normalized_sensor_data[(normalized_sensor_data.SENSORTYPE == 9)].groupby(['TagName', 'tester_id'])
    
    tag_id_linear_acc = normalized_sensor_data[(normalized_sensor_data.SENSORTYPE == 10)].groupby(['TagName', 'tester_id'])

    tag_id_groupby_quaternion = normalized_sensor_data[(normalized_sensor_data.SENSORTYPE == 11)].groupby(['TagName', 'tester_id'])

    tag_id_groupby_tilt = normalized_sensor_data[(normalized_sensor_data.SENSORTYPE == 26)].groupby(['TagName', 'tester_id'])
else:

    # tag_id_groupby = sensor_data_acc_tag12.groupby(['TagName', 'tester_id'])
    tag_id_groupby_acc = sensor_data[(sensor_data.SENSORTYPE == 1)].groupby(['TagName', 'tester_id'])

    tag_id_groupby_magnetic = sensor_data[(sensor_data.SENSORTYPE == 2)].groupby(['TagName', 'tester_id'])

    tag_id_groupby_orientation = sensor_data[(sensor_data.SENSORTYPE == 3)].groupby(['TagName', 'tester_id'])

    tag_id_groupby_gyro = sensor_data[(sensor_data.SENSORTYPE == 4)].groupby(['TagName', 'tester_id'])

    tag_id_groupby_gravity = sensor_data[(sensor_data.SENSORTYPE == 9)].groupby(['TagName', 'tester_id'])

    tag_id_linear_acc = sensor_data[(sensor_data.SENSORTYPE == 10)].groupby(['TagName', 'tester_id'])
    
    tag_id_groupby_quaternion = sensor_data[(sensor_data.SENSORTYPE == 11)].groupby(['TagName', 'tester_id'])

    tag_id_groupby_tilt = sensor_data[(sensor_data.SENSORTYPE == 26)].groupby(['TagName', 'tester_id'])

In [5]:
N_frame_no = 1 

test_percent = 0.1 # 10% samples are used for testing

In [6]:
# def shrink_array(array,size):
    
#     ratio = float(len(array)) / float(size)
#     res = []
#     for i in range(size):
#         res.append(np.mean(array[math.floor(i*ratio):math.ceil((i+1.0)*ratio)], axis = 0))
#     return np.array(res)

In [7]:
# overlap
def shrink_array(array,size):
    
    ratio = float(len(array)) / float(size+1)
    res = []
    for i in range(size):
        res.append(np.mean(array[math.floor(i*ratio):math.ceil((i+2.0)*ratio)], axis = 0))
    return np.array(res)

In [8]:
def gesture_features(accs):
#     for i in range(accs.shape[0]): # x, y, z
#         accs[i] = min_max_normalization(accs[i], np.min(accs[i]), np.max(accs[i]))
        
    if N_frame_no > 1:
        Ls = math.floor(len(accs)/ (N_frame_no + 1))
        segments = None
        for i in range(N_frame_no + 1):
            if segments is None:
                segments = np.array([accs[i*Ls:(i+1)*Ls]])
            else:
                segments = np.append(segments, np.array([accs[i*Ls:(i+1)*Ls]]), axis=0)

        frames = None
        for i in range(N_frame_no):
            cur_frame = segments[i:i+2]
            cur_frame = cur_frame.reshape((cur_frame.shape[0]*cur_frame.shape[1],cur_frame.shape[2]))
            if frames is None:
                frames = np.array([cur_frame])
            else:
                frames = np.append(frames, np.array([cur_frame]), axis = 0)
        return np.array([frame_features(f) for f in frames]).reshape(-1)
    else:
        return frame_features(accs).reshape(-1)

In [9]:
def frame_features(cur_frame):
    
    mean_cur_frame = np.mean(cur_frame ,axis= 0)
    
    energy_cur_frame=[]
    for T in range(cur_frame.shape[1]): #x,y,z
        T_sum = 0
        for i in cur_frame[:,T]:
            T_sum += math.pow(abs(i),2)
        energy_cur_frame.append(T_sum / len(cur_frame))
    energy_cur_frame = np.array(energy_cur_frame)
    
    entropy_cur_frame = []
    for T in range(cur_frame.shape[1]): #x,y,z
        T_sum = 0
        entropy_sum = 0
        for i in cur_frame[:,T]:
            T_sum += abs(i)
            
        for m in cur_frame[:,T]:
            p_m_T = abs(m)/T_sum
            entropy_sum += p_m_T*math.log(1/p_m_T)

        entropy_cur_frame.append(entropy_sum)
    entropy_cur_frame = np.array(entropy_cur_frame)
    
    std_cur_frame = np.std(cur_frame, axis=0)
    
    ## TODO     ## TODO    ## TODO    ## TODO    ## TODO    ## TODO    ## TODO    ## TODO
    ## TODO     ## TODO    ## TODO    ## TODO    ## TODO    ## TODO    ## TODO    ## TODO
    ## TODO     ## TODO    ## TODO    ## TODO    ## TODO    ## TODO    ## TODO    ## TODO
    # correlation
    
    return np.concatenate((mean_cur_frame,energy_cur_frame,std_cur_frame))

In [23]:
# # X = []

# # for key in list(tag_id_dict.keys()):
# #     gesture_feature = gesture_features(tag_id_groupby.get_group(key)[['VALUES1', 'VALUES2', 'VALUES3']].values).reshape(-1)
# #     if np.all(~np.isnan(gesture_feature)):
# #         X.append(gesture_feature)
# #     else:
# #         print(key)
# # X = np.array(X)

# X = []

# for key in list(tag_id_dict.keys()):

#     acc_feature = shrink_array(tag_id_groupby_acc.get_group(key)[['VALUES1','VALUES2', 'VALUES3']].values, 30)
# #     acc_feature = min_max_normalization(acc_feature)

    
# #     gyro_feature = shrink_array(tag_id_groupby_gyro.get_group(key)[['VALUES1','VALUES2', 'VALUES3']].values, 30)
# #     orientation_feature = shrink_array(tag_id_groupby_orientation.get_group(key)[['VALUES1', 'VALUES2', 'VALUES3']].values, 30)
# #     quaternion_feature = shrink_array(tag_id_groupby_quaternion.get_group(key)[['VALUES1','VALUES2', 'VALUES3']].values, 30)
# #     magnetic_feature = shrink_array(tag_id_groupby_magnetic.get_group(key)[['VALUES1','VALUES2', 'VALUES3']].values, 30)
# #     if key in tag_id_groupby_tilt.groups:
# #         tilt_feature = np.array([1])
# #     else:
# #         tilt_feature = np.array([0])
        
#     X.append(acc_feature)
# #     X.append(np.concatenate((acc_feature, tilt_feature)))
    
# X = np.array(X)
# # pca = PCA(n_components=1)
# # pca.fit(X.reshape(-1,3))
# # X = pca.transform(X.reshape(-1,3))
# # X = X.reshape(int(X.shape[0]/30), 30)


X = []
y = []
for key in list(tag_id_linear_acc.groups.keys()):
    linear_acc_feature = shrink_array(tag_id_linear_acc.get_group(key)[['VALUES1', 'VALUES2', 'VALUES3']].values, 30)
#     acc_feature = shrink_array(tag_id_groupby_acc.get_group(key)[['VALUES1', 'VALUES2', 'VALUES3']].values, 30)
    X.append(linear_acc_feature)
#     X.append(np.concatenate((acc_feature, linear_acc_feature), axis = 1))
    y.append(key[0])
X = np.array(X)
y = np.array(y)

In [17]:
X = []
y = []
linear_acc_feature_groups = linear_acc_feature.groupby(['TagName','tester_id'])
for key in list(linear_acc_feature_groups.groups.keys()):
    linear_acc_feature = shrink_array(linear_acc_feature_groups.get_group(key)[['linear_distance_x',
                                                                                'linear_distance_y', 
                                                                                'linear_distance_z', 
                                                                                'linear_velocity_x',
                                                                                'linear_velocity_y', 
                                                                                'linear_velocity_z']].values, 30)
#     acc_feature = shrink_array(tag_id_groupby_acc.get_group(key)[['VALUES1', 'VALUES2', 'VALUES3']].values, 30)
    X.append(linear_acc_feature)
#     X.append(np.concatenate((acc_feature, linear_acc_feature), axis = 1))
    y.append(key[0])
X = np.array(X)
y = np.array(y)

In [18]:
X.shape

(1040, 30, 6)

In [19]:
idx_list = list(range(len(X)))
np.random.shuffle(idx_list)
train_idx = idx_list[:math.floor(len(X) * (1- test_percent))]
test_idx = idx_list[math.floor(len(X) * (1- test_percent)):]

# y[y == 'Tag2'] = 2
# y[y == 'Tag1'] = 1

train_x = X[train_idx]
train_y = y[train_idx]
test_x = X[test_idx]
test_y = y[test_idx]

In [20]:
# lda = LinearDiscriminantAnalysis()
# lda.fit(train_x.reshape(train_x.shape[0],90),train_y)
# train_x = lda.transform(train_x.reshape(train_x.shape[0],90))
# test_x = lda.transform(test_x.reshape(test_x.shape[0],90))

y.shape

(1040,)

In [21]:
clf4 = SVC(kernel='poly', C=1, degree=3, verbose = True)
# clf4.fit(train_x, train_y) 
# joblib.dump(clf4, '../../Results/baseline SVC 0.80 raw data acc with gyro 200 chunk.pkl') 
# print(classification_report(test_y, clf4.predict(test_x)))
if len(X.shape) > 2:
    res = cross_val_score(clf4, X.reshape(list(X.shape)[0],-1), y, cv = 5)
else:
    res = cross_val_score(clf4, X, y, cv=5)
print(res)
print(np.mean(res))

[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][ 0.58571429  0.6         0.4952381   0.6         0.445     ]
0.54519047619


In [None]:
clf = RandomForestClassifier()
clf.fit(train_x, train_y)
print(classification_report(test_y, clf.predict(test_x)))


In [28]:
clf4.fit(train_x.reshape(len(train_x),-1), train_y)


[LibSVM]

'             precision    recall  f1-score   support\n\n       Tag0       0.25      0.42      0.31        12\n       Tag1       0.50      1.00      0.67        11\n       Tag2       0.56      0.83      0.67         6\n       Tag3       0.60      0.75      0.67        12\n       Tag4       0.60      0.60      0.60         5\n       Tag5       0.80      0.44      0.57         9\n       Tag6       0.25      0.06      0.10        16\n       Tag7       0.17      0.20      0.18         5\n       Tag8       0.78      0.41      0.54        17\n       Tag9       0.67      0.55      0.60        11\n\navg / total       0.53      0.50      0.48       104\n'

In [30]:
print(classification_report(test_y, clf4.predict(test_x.reshape(len(test_x),-1))))
print(confusion_matrix(test_y, clf4.predict(test_x.reshape(len(test_x),-1))))

             precision    recall  f1-score   support

       Tag0       0.25      0.42      0.31        12
       Tag1       0.50      1.00      0.67        11
       Tag2       0.56      0.83      0.67         6
       Tag3       0.60      0.75      0.67        12
       Tag4       0.60      0.60      0.60         5
       Tag5       0.80      0.44      0.57         9
       Tag6       0.25      0.06      0.10        16
       Tag7       0.17      0.20      0.18         5
       Tag8       0.78      0.41      0.54        17
       Tag9       0.67      0.55      0.60        11

avg / total       0.53      0.50      0.48       104

[[ 5  2  0  3  0  0  0  1  1  0]
 [ 0 11  0  0  0  0  0  0  0  0]
 [ 0  1  5  0  0  0  0  0  0  0]
 [ 0  2  0  9  1  0  0  0  0  0]
 [ 1  1  0  0  3  0  0  0  0  0]
 [ 0  0  2  0  0  4  1  1  1  0]
 [12  2  0  0  0  0  1  1  0  0]
 [ 0  2  0  1  0  0  0  1  0  1]
 [ 2  1  1  1  0  1  2  0  7  2]
 [ 0  0  1  1  1  0  0  2  0  6]]
