In [1]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import linregress
from sklearn.ensemble import RandomForestClassifier

from keras.layers import Input, Dense, Conv1D, MaxPooling1D, Flatten, concatenate, Conv2D
from keras.models import Model
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
from keras.models import model_from_json

Using TensorFlow backend.


In [2]:
def load_5models_from_disk():
    models = []
    for i in range(5):
        json_file = open("./model" + str(i) +".json", 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)
        # load weights into new model
        loaded_model.load_weights("temp" + str(i) +".hdf5")
        print("Loaded model from disk")

        # evaluate loaded model on test data
        loaded_model.compile(optimizer='rmsprop',
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])
        models.append(loaded_model)
    return models

In [3]:
def shrink_array(array,size):
    
    ratio = float(len(array)) / float(size+1)
    res = []
    for i in range(size):
        res.append(np.mean(array[math.floor(i*ratio):math.ceil((i+1.0)*ratio)], axis = 0))
    return np.array(res)

In [4]:
train_data = pd.DataFrame.from_csv("../../data/database/train_data.csv")
test_data = pd.DataFrame.from_csv("../../data/database/test_data.csv")
gesture_features = pd.DataFrame.from_csv("../../data/gesture_feature_df.csv")
train_data.columns

Index(['unix_timestamp', 'TagName', 'tester_id', 'v_1', 'v_2', 'v_3', 'd_1',
       'd_2', 'd_3', 'global_acc1', 'global_acc2', 'global_acc3',
       'v_12_square', 'acc_12_square', 'd_12_square'],
      dtype='object')

In [5]:
feature_cols = ['global_acc3','acc_12_square']

In [6]:
def get_feature_label(data):
    groups = data.groupby(['TagName','tester_id'])
    keys = groups.groups.keys()
    y = []
    X = []
    gesture = []
    for k in keys:
        frame_feature = shrink_array(groups.get_group(k)[feature_cols].values, 50)
        X.append(frame_feature)
        y.append(k[0])
        gesture.append(gesture_features.loc[(gesture_features.TagName == k[0]) & (gesture_features.tester_id == k[1]), 'd_change'].values[0])
    return np.array(X),np.array(y),np.array(gesture)

In [12]:
def SVC_training(X_train, y_train,x_test, y_test, f_test):
    if len(X_train.shape) > 2:
        X_train = X_train.reshape(list(X_train.shape)[0],-1)
        x_test = x_test.reshape(list(x_test.shape)[0],-1)
    
    for k in ['poly','rbf','linear']:
        max_score = 0
        max_i = 0
        max_res = []
        for i in range(1,21):
            i = i/2
            clf4 = SVC(kernel=k, C=i, degree=3, verbose = True)

            clf4.fit(X_train, y_train) 
            # joblib.dump(clf4, '../../Results/baseline SVC 0.80 raw data acc with gyro 200 chunk.pkl') 
            res = clf4.predict(x_test)
            score = accuracy_score(y_test, res)
            if score>max_score:
                max_score = score
                max_i = i
                max_res = res
        print(k)
        print("max score: " + str(max_score) + " C = " + str(max_i))
        
        print(classification_report(y_test, max_res))
        print(confusion_matrix(y_test, max_res))
        
        for i in range(len(max_res)):
            if (max_res[i] =='Tag0') or (max_res[i] == "Tag6"):
                max_res[i] = rf_clf.predict(f_test[i])[0]
            
        print(classification_report(y_test, max_res))
        print(confusion_matrix(y_test, max_res))
            
        


def DL_training(X_train, y_train,x_test, y_test, f_test):

    y = np.concatenate([y_train,y_test])
    tag_list = []
    for i in range(10):
        tag_list.append(['Tag'+str(i),i])
    for i in tag_list:
        tag_str = i[0]
        tag_int = i[1]
        y[y==tag_str] = tag_int
    y_categorical = to_categorical(y)
    
    y_train_cate = y_categorical[:len(y_train)]
    y_test_cate = y_categorical[len(y_train):]

    X_train = X_train.reshape(list(X_train.shape)[0],-1)
    x_test = x_test.reshape(list(x_test.shape)[0],-1)
    

    for i in range(5):
        # This returns a tensor
        inputs = Input(shape=(X_train.shape[1:]))

        # a layer instance is callable on a tensor, and returns a tensor
        layer1 = Dense(64, activation='relu')(inputs)
        layer2 = Dense(128, activation='relu')(layer1)
        layer3 = Dense(64, activation='relu')(layer2)
        layer4 = Dense(32, activation='relu')(layer3)
        predictions = Dense(10, activation='softmax')(layer4)

        mcp = ModelCheckpoint("./temp" + str(i) + ".hdf5", monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
        model = Model(inputs=inputs, outputs=predictions)
#         print(model.summary())
        model.compile(optimizer='rmsprop',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        model_his = model.fit(X_train, y_train_cate, batch_size=32, epochs=40, verbose = 0,
                              validation_data=(x_test, y_test_cate), callbacks = [mcp]
                             )  # starts training

        model_json = model.to_json()
        with open("./model" + str(i) +".json", "w") as json_file:
            json_file.write(model_json)
            
    models = load_5models_from_disk()
    scores = []
    tests = []
    predicts = []
    f = []
    for model in models:
        res = model.predict(x_test)
        scores.append(accuracy_score(np.argmax(y_test_cate,1), np.argmax(res, 1)))
        tests += np.argmax(y_test_cate,1).tolist()
        f += f_test.tolist()
        predicts += np.argmax(res, 1).tolist()
    
    print(classification_report(tests, predicts))
    print(confusion_matrix(tests, predicts))
    
    for i in range(len(predicts)):
        if (predicts[i] ==0) or (predicts[i] == 6):
            if rf_clf.predict(f[i])[0] == 'Tag0':
                predicts[i] = 0
            else:
                predicts[i] = 6

    print(classification_report(tests, predicts))
    print(confusion_matrix(tests, predicts))
    
def CONV1d_training(X_train, y_train,x_test, y_test, f_test):
    y = np.concatenate([y_train,y_test])
    tag_list = []
    for i in range(10):
        tag_list.append(['Tag'+str(i),i])
    for i in tag_list:
        tag_str = i[0]
        tag_int = i[1]
        y[y==tag_str] = tag_int
    y_categorical = to_categorical(y)
    
    y_train_cate = y_categorical[:len(y_train)]
    y_test_cate = y_categorical[len(y_train):]

    
    for i in range(5):
        input_val1 = Input(shape=X_train.shape[1:])

        con1 = Conv1D(filters=30,kernel_size=10)(input_val1)
        max_pooling_1d_1 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(con1)
        flat_1 = Flatten()(max_pooling_1d_1)
        layer2 = Dense(128, activation='relu')(flat_1)
        layer4 = Dense(32, activation='relu')(layer2)
        predictions = Dense(y_categorical.shape[-1], activation='softmax')(layer4)

        model = Model(inputs = input_val1, outputs=predictions)
#         print(model.summary())
        mcp = ModelCheckpoint("./temp" + str(i) + ".hdf5", monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
    
        model.compile(optimizer='rmsprop',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
    
        model_his = model.fit(X_train, y_train_cate, batch_size=32, epochs=40, verbose = 0,
                              validation_data=(x_test, y_test_cate), callbacks = [mcp]
                             )  # starts training

        model_json = model.to_json()
        with open("./model" + str(i) +".json", "w") as json_file:
            json_file.write(model_json)
            
    models = load_5models_from_disk()
    scores = []
    tests = []
    predicts = []
    f = []
    
    for model in models:
        res = model.predict(x_test)
        scores.append(accuracy_score(np.argmax(y_test_cate,1), np.argmax(res, 1)))
        tests += np.argmax(y_test_cate,1).tolist()
        predicts += np.argmax(res, 1).tolist()
        f += f_test.tolist()
        
    print(classification_report(tests, predicts))
    print(confusion_matrix(tests, predicts))
    
    for i in range(len(predicts)):
        if (predicts[i] ==0) or (predicts[i] == 6):
            if rf_clf.predict(f[i])[0] == 'Tag0':
                predicts[i] = 0
            else:
                predicts[i] = 6

    print(classification_report(tests, predicts))
    print(confusion_matrix(tests, predicts))    

In [8]:
# import matplotlib.pyplot as plt
# def plot_gesture(gesture):
#     for i in range(gesture.shape[1]):
#         plt.plot(gesture[:,i])
#     plt.show()

In [9]:
X_train, y_train, f_train = get_feature_label(train_data)
x_test, y_test, f_test = get_feature_label(test_data)
print(X_train.shape)
print(y_train.shape)
print(f_train.shape)

(830, 50, 2)
(830,)
(830,)


In [10]:
rf_clf = RandomForestClassifier()
y_train_06 = y_train[(y_train=='Tag0') | (y_train=='Tag6')]
f_train_06 = f_train[(y_train=='Tag0') | (y_train=='Tag6')]
if len(f_train_06.shape) == 1:
    rf_clf.fit(f_train_06.reshape(len(f_train_06),1),y_train_06)
else:
    rf_clf.fit(f_train_06,y_train_06)

In [13]:
SVC_training(X_train, y_train, x_test, y_test, f_test)

[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]poly
max score: 0.804761904762 C = 0.5
             precision    recall  f1-score   support

       Tag0       0.71      0.81      0.76        21
       Tag1       0.94      0.81      0.87        21
       Tag2       0.90      0.86      0.88        21
       Tag3       0.80      0.95      0.87        21
       Tag4       0.89      0.81      0.85        21
       Tag5       0.64      0.76      0.70        21
       Tag6       0.78      0.67      0.72        21
       Tag7       0.83      0.90      0.86        21
       Tag8       0.89      0.76      0.82        21
       Tag9       0.75      0.71      0.73        21

avg / total       0.81      0.80      0.81       210

[[17  0  0  0  0  1  2  0  0  1]
 [ 1 17  1  0  0  1  0  0  0  1]
 [ 0  1 18  1  0  1  0  0  0  0]
 [ 0  0  0 20  0  0  0  0  0  1]
 [ 1  0  0  1 17  0  0  0  0  

In [14]:
DL_training(X_train, y_train, x_test, y_test, f_test)

Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
             precision    recall  f1-score   support

          0       0.74      0.82      0.77       105
          1       0.94      0.96      0.95       105
          2       0.92      0.93      0.93       105
          3       0.85      0.97      0.91       105
          4       0.96      0.95      0.96       105
          5       0.90      0.76      0.82       105
          6       0.77      0.74      0.76       105
          7       0.90      0.90      0.90       105
          8       0.94      0.87      0.90       105
          9       0.91      0.90      0.90       105

avg / total       0.88      0.88      0.88      1050

[[ 86   0   0   0   0   0  17   0   0   2]
 [  0 101   0   0   0   0   0   0   0   4]
 [  0   0  98   4   0   1   1   0   1   0]
 [  0   0   3 102   0   0   0   0   0   0]
 [  0   0   0   0 100   0   0   5   0   0]
 [  3   1   2   9   1  80   1  

In [15]:
CONV1d_training(X_train, y_train, x_test, y_test, f_test)

Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
             precision    recall  f1-score   support

          0       0.78      0.83      0.81       105
          1       0.99      0.94      0.97       105
          2       0.92      0.95      0.93       105
          3       0.90      0.99      0.95       105
          4       0.95      0.95      0.95       105
          5       0.88      0.82      0.85       105
          6       0.80      0.80      0.80       105
          7       0.99      0.96      0.98       105
          8       0.95      0.85      0.89       105
          9       0.88      0.93      0.91       105

avg / total       0.90      0.90      0.90      1050

[[ 87   0   0   0   0   0  17   0   0   1]
 [  0  99   0   0   0   0   0   0   0   6]
 [  0   0 100   5   0   0   0   0   0   0]
 [  0   0   1 104   0   0   0   0   0   0]
 [  0   0   0   0 100   0   0   0   0   5]
 [  3   0   3   6   1  86   1  

In [17]:
g = gesture_features.groupby(['TagName'])

In [19]:
for i in range(10):
    print("Tag" + str(i))
    v = g.get_group("Tag" + str(i)).d_change.values
    print(np.mean(v))
    print(np.std(v))

Tag0
0.0251633133074
0.172669880427
Tag1
-0.533930148824
0.185776339633
Tag2
-0.492384350953
0.263275811534
Tag3
-0.634171660652
0.361112519218
Tag4
-0.594274538652
0.257722142262
Tag5
-0.185724596056
0.354015164845
Tag6
-0.229694763636
0.300580473984
Tag7
-0.560041225895
0.216076053837
Tag8
-0.0230429142299
0.233432037176
Tag9
-0.572249550629
0.258339263312
