In [1]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import linregress
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.externals import joblib
from sklearn.naive_bayes import GaussianNB

from keras.layers import Input, Dense, Conv1D, MaxPooling1D, Flatten, concatenate, Conv2D
from keras.models import Model
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
from keras.models import model_from_json
from sklearn.naive_bayes import GaussianNB

Using TensorFlow backend.


In [2]:
def load_5models_from_disk():
    models = []
    for i in range(5):
        json_file = open("./model" + str(i) +".json", 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)
        # load weights into new model
        loaded_model.load_weights("temp" + str(i) +".hdf5")
#         print("Loaded model from disk")

        # evaluate loaded model on test data
        loaded_model.compile(optimizer='rmsprop',
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])
        models.append(loaded_model)
    return models

In [3]:
def shrink_array(array,size):
    
    ratio = float(len(array)) / float(size+1)
    res = []
    for i in range(size):
        res.append(np.mean(array[math.floor(i*ratio):math.ceil((i+2.0)*ratio)], axis = 0))
    return np.array(res)

In [4]:
data = pd.DataFrame.from_csv("../../data/global_acc_features_df.csv")
f_df = pd.DataFrame.from_csv("../../data/gesture_feature_df.csv")

In [5]:
data.columns

Index(['unix_timestamp', 'TagName', 'tester_id', 'v_1', 'v_2', 'v_3', 'd_1',
       'd_2', 'd_3', 'global_acc1', 'global_acc2', 'global_acc3',
       'v_12_square', 'acc_12_square', 'd_12_square'],
      dtype='object')

In [6]:
feature_cols = ['global_acc3','acc_12_square']

In [7]:
# def get_feature_label(data, N):
#     groups = data.groupby(['TagName','tester_id'])
#     keys = groups.groups.keys()
#     y = []
#     X = []
#     for k in keys:
#         frame_feature = shrink_array(groups.get_group(k)[feature_cols].values, N)
#         X.append(frame_feature)
#         y.append(k[0])
#     return np.array(X),np.array(y)

# def get_feature_label_f(data, N):
#     groups = data.groupby(['TagName','tester_id'])
#     keys = groups.groups.keys()
#     y = []
#     X = []
#     f = []
#     for k in keys:
#         frame_feature = shrink_array(groups.get_group(k)[feature_cols].values, N)
#         X.append(frame_feature)
#         y.append(k[0])
#         f.append(f_df[(f_df.TagName == k[0]) & (f_df.tester_id == k[1])].d_change.values[0])
#     return np.array(X),np.array(y),np.array(f)

def get_feature_label(data_groups, keys, N):
    y = []
    X = []
    f = []
    for k in keys:
        frame_feature = shrink_array(data_groups.get_group(k)[feature_cols].values, N)
        X.append(frame_feature)
        y.append(k[0])
        f.append(f_df[(f_df.TagName == k[0]) & (f_df.tester_id == k[1])].d_change.values[0])
    return np.array(X),np.array(y),np.array(f).reshape(-1,1)

In [130]:
def SVC_training(X_train, y_train,x_test, y_test, f_test):
    if len(X_train.shape) > 2:
        X_train = X_train.reshape(list(X_train.shape)[0],-1)
        x_test = x_test.reshape(list(x_test.shape)[0],-1)
    main_score = []
    overal_score = []
    for k in ['poly','rbf','linear']:
        clf4 = SVC(kernel=k, C=1.5, degree=3, verbose = False)
        clf4.fit(X_train, y_train) 
        res = clf4.predict(x_test)
        score = accuracy_score(y_test, res)
        print(k + " score: " + str(score))
        main_score.append(score)
        
        if (print_conf):
            print(classification_report(y_test, res))
            print(confusion_matrix(y_test, res))
        
        if ensemble_06 is True:
            for i in range(len(res)):
                if (res[i] =='Tag0') or (res[i] == "Tag6"):
                    binary_res = rf_clf.predict_proba([f_test[i]])[0]
                    main_res = clf4.predict_proba(x_test[i])[0]

                    binary_res_0 = binary_res[np.where(rf_clf.classes_ == 'Tag0')[0][0]]
                    binary_res_6 = binary_res[np.where(rf_clf.classes_ == 'Tag6')[0][0]]
                    main_res_0 = main_res[np.where(rf_clf.classes_ == 'Tag0')[0][0]]
                    main_res_6 = main_res[np.where(rf_clf.classes_ == 'Tag6')[0][0]]

                    r = 1.0/(main_res_0+main_res_6)
                    main_res_0 = r*main_res_0
                    main_res_6 = r*main_res_6
                    
                    if binary_res_0 + main_res_0 > binary_res_6+main_res_6:
                        res[i] = 'Tag0'
                    else:
                        res[i] = 'Tag6'
            print("-----ensembled---------")
            score = accuracy_score(y_test, res)
            print(score)
            if (print_conf):
                print(classification_report(y_test, max_res))
                print(confusion_matrix(y_test, max_res))
        overal_score.append(score)
    return main_score, overal_score
            
        
def RF_training(X_train, y_train,x_test, y_test, f_test):
    if len(X_train.shape) > 2:
        X_train = X_train.reshape(list(X_train.shape)[0],-1)
        x_test = x_test.reshape(list(x_test.shape)[0],-1)

        clf4 = RandomForestClassifier(n_estimators=30)

        clf4.fit(X_train, y_train) 
        # joblib.dump(clf4, '../../Results/baseline SVC 0.80 raw data acc with gyro 200 chunk.pkl') 
        res = clf4.predict(x_test)
        score = accuracy_score(y_test, res)
        if (print_conf):
            print(classification_report(y_test, res))
            print(confusion_matrix(y_test, res))
        print("RF score: " + str(score))
        
        if ensemble_06 is True:
            for i in range(len(res)):
                if (res[i] =='Tag0') or (res[i] == "Tag6"):
                    binary_res = rf_clf.predict_proba([f_test[i]])[0]
                    main_res = clf4.predict_proba(x_test[i])[0]

                    binary_res_0 = binary_res[np.where(rf_clf.classes_ == 'Tag0')[0][0]]
                    binary_res_6 = binary_res[np.where(rf_clf.classes_ == 'Tag6')[0][0]]
                    main_res_0 = main_res[np.where(rf_clf.classes_ == 'Tag0')[0][0]]
                    main_res_6 = main_res[np.where(rf_clf.classes_ == 'Tag6')[0][0]]

                    r = 1.0/(main_res_0+main_res_6)
                    main_res_0 = r*main_res_0
                    main_res_6 = r*main_res_6
                    
                    if binary_res_0 + main_res_0 > binary_res_6+main_res_6:
                        res[i] = 'Tag0'
                    else:
                        res[i] = 'Tag6'

            print("-----ensembled---------")
            print((accuracy_score(y_test, res)))
            if (print_conf):
                print(classification_report(y_test, res))
                print(confusion_matrix(y_test, res))
        return score, (accuracy_score(y_test, res))


def DL_training(X_train, y_train,x_test, y_test, f_test):

    y = np.concatenate([y_train,y_test])
    tag_list = []
    for i in range(10):
        tag_list.append(['Tag'+str(i),i])
    for i in tag_list:
        tag_str = i[0]
        tag_int = i[1]
        y[y==tag_str] = tag_int
    y_categorical = to_categorical(y)
    
    y_train_cate = y_categorical[:len(y_train)]
    y_test_cate = y_categorical[len(y_train):]

    X_train = X_train.reshape(list(X_train.shape)[0],-1)
    x_test = x_test.reshape(list(x_test.shape)[0],-1)
    
    main_scores = []
    overall_scores= []
    for i in range(5):
        # This returns a tensor
        inputs = Input(shape=(X_train.shape[1:]))

        # a layer instance is callable on a tensor, and returns a tensor
        layer1 = Dense(64, activation='relu')(inputs)
        layer2 = Dense(128, activation='relu')(layer1)
        layer3 = Dense(64, activation='relu')(layer2)
        layer4 = Dense(32, activation='relu')(layer3)
        predictions = Dense(10, activation='softmax')(layer4)

        mcp = ModelCheckpoint("./temp" + str(i) + ".hdf5", monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
        model = Model(inputs=inputs, outputs=predictions)
#         print(model.summary())
        model.compile(optimizer='rmsprop',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        model_his = model.fit(X_train, y_train_cate, batch_size=32, epochs=40, verbose = 0,
                              validation_data=(x_test, y_test_cate), callbacks = [mcp]
                             )  # starts training

        model.load_weights("./temp" + str(i) +".hdf5")

        res = model.predict(x_test)
        predict = np.argmax(res, 1).tolist()
        score = accuracy_score(np.argmax(y_test_cate,1), predict)
        print("DL score:" +str(score))
        main_scores.append(score)
        
        if(print_conf):
            print(classification_report(np.argmax(y_test_cate,1), np.argmax(res, 1)))
            print(confusion_matrix(np.argmax(y_test_cate,1), np.argmax(res, 1)))
    
        if ensemble_06 is True:
            for i in range(len(res)):
                if (predict[i] ==0) or (predict[i] == 6):
                    binary_res = rf_clf.predict_proba([f_test[i]])[0]
                    main_res = res[i]

                    binary_res_0 = binary_res[np.where(rf_clf.classes_ == 'Tag0')[0][0]]
                    binary_res_6 = binary_res[np.where(rf_clf.classes_ == 'Tag6')[0][0]]
                    main_res_0 = main_res[0]
                    main_res_6 = main_res[6]

                    r = 1.0/(main_res_0+main_res_6)
                    main_res_0 = r*main_res_0
                    main_res_6 = r*main_res_6
                    if binary_res_0 + main_res_0 > binary_res_6+main_res_6:
                        predict[i] = 0
                    else:
                        predict[i] = 6
                    
            print("-----ensembled---------")
            score = accuracy_score(np.argmax(y_test_cate,1), predict)
            print(score)
            overall_scores.append(score)
            if(print_conf):
                print(np.argmax(y_test_cate,1), predict)
            
    return main_scores, overall_scores
    
def CONV1d_training(X_train, y_train,x_test, y_test, f_test):
    y = np.concatenate([y_train,y_test])
    tag_list = []
    for i in range(10):
        tag_list.append(['Tag'+str(i),i])
    for i in tag_list:
        tag_str = i[0]
        tag_int = i[1]
        y[y==tag_str] = tag_int
    y_categorical = to_categorical(y)
    
    y_train_cate = y_categorical[:len(y_train)]
    y_test_cate = y_categorical[len(y_train):]

    main_scores=[]
    overall_scores=[]
    
    for i in range(5):
        input_val1 = Input(shape=X_train.shape[1:])

        con1 = Conv1D(filters=30,kernel_size=5)(input_val1)
        max_pooling_1d_1 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(con1)
        flat_1 = Flatten()(max_pooling_1d_1)
        layer2 = Dense(128, activation='relu')(flat_1)
        layer4 = Dense(32, activation='relu')(layer2)
        predictions = Dense(y_categorical.shape[-1], activation='softmax')(layer4)

        model = Model(inputs = input_val1, outputs=predictions)
#         print(model.summary())
        mcp = ModelCheckpoint("./temp" + str(i) + ".hdf5", monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
    
        model.compile(optimizer='rmsprop',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
    
        model_his = model.fit(X_train, y_train_cate, batch_size=32, epochs=40, verbose = 0,
                              validation_data=(x_test, y_test_cate), callbacks = [mcp]
                             )  # starts training

        model.load_weights("./temp" + str(i) +".hdf5")

        res = model.predict(x_test)
        predict = np.argmax(res, 1).tolist()
        score = accuracy_score(np.argmax(y_test_cate,1), predict)
        print("CONV score:" +str(score))
        main_scores.append(score)
        
        if(print_conf):
            print(classification_report(np.argmax(y_test_cate,1), np.argmax(res, 1)))
            print(confusion_matrix(np.argmax(y_test_cate,1), np.argmax(res, 1)))
    
        if ensemble_06 is True:
            for i in range(len(res)):
                if (predict[i] ==0) or (predict[i] == 6):
                    binary_res = rf_clf.predict_proba([f_test[i]])[0]
                    main_res = res[i]

                    binary_res_0 = binary_res[np.where(rf_clf.classes_ == 'Tag0')[0][0]]
                    binary_res_6 = binary_res[np.where(rf_clf.classes_ == 'Tag6')[0][0]]
                    main_res_0 = main_res[0]
                    main_res_6 = main_res[6]

                    r = 1.0/(main_res_0+main_res_6)
                    main_res_0 = r*main_res_0
                    main_res_6 = r*main_res_6
                    if binary_res_0 + main_res_0 > binary_res_6+main_res_6:
                        predict[i] = 0
                    else:
                        predict[i] = 6
                    
            print("-----ensembled---------")
            score = accuracy_score(np.argmax(y_test_cate,1), predict)
            print(score)
            overall_scores.append(score)
            if(print_conf):
                print(np.argmax(y_test_cate,1), predict)
                
    return main_scores,overall_scores

In [113]:
a = np.array([[1,2],[1,0]])
b=np.array([[3,4],[3,0]])
np.concatenate([a,b])

array([[1, 2],
       [1, 0],
       [3, 4],
       [3, 0]])

In [35]:
all_groups = data.groupby(['TagName','tester_id'])
all_keys = list(all_groups.groups.keys())
np.random.shuffle(all_keys)



In [122]:
ensemble_06 = True
print_conf = False
if ensemble_06 is True:
    rf_clf = RandomForestClassifier(n_estimators=30,criterion='gini',max_depth=10)
    rf_clf.fit(f_train[(y_train=='Tag0') | (y_train=='Tag6')], y_train[(y_train=='Tag0') | (y_train=='Tag6')])

In [127]:

rf_clf.predict_proba([[0.0]])[0]

array([ 1.,  0.])

In [100]:
res = []
for i in range(0,300):
    res.append(rf_clf.predict([[i/100]]))

## Determine N

In [None]:
N=14
X_train, y_train, f_train = get_feature_label(all_groups, all_keys[:-int(len(all_keys)/5)],N)
X_test, y_test, f_test= get_feature_label(all_groups, all_keys[-int(len(all_keys)/5):],N)

In [76]:
svc_kernals = ['poly','rbf','linear']

In [131]:
log=""
log+="RF," + str(RF_training(X_train, y_train, X_test, y_test, f_test)) + "\n"
svc_main_res, svc_overall_res = SVC_training(X_train, y_train, X_test, y_test, f_test)
for i in range(3):
    log+=svc_kernals[i]+","+str(svc_main_res[i])+","+str(svc_overall_res[i])+"\n"
log+="DL," + str(DL_training(X_train, y_train, X_test, y_test, f_test)) + "\n"
log+="CON1V" + str(CONV1d_training(X_train, y_train, X_test, y_test, f_test)) + "\n"

RF score: 0.682692307692




-----ensembled---------
0.701923076923
poly score: 0.802884615385




AttributeError: predict_proba is not available when  probability=False

In [124]:
np.where(rf_clf.classes_ == 'Tag6')[0][0]

1

## Determine network structure

In [None]:
N = 14
X_train, y_train = get_feature_label(train_data, N)
x_test, y_test, f_test = get_feature_label_f(test_data, N)


In [None]:
X_train.shape

In [None]:
dl = []
conv = []
for i in range(5):
    dl.append(DL_training(X_train, y_train, x_test, y_test, f_test))
    conv.append(CONV1d_training(X_train, y_train, x_test, y_test, f_test))
    
print(np.mean(dl))
print(np.mean(conv))

In [None]:
dl = []
conv = []
for i in range(5):
    dl.append(DL_training(X_train, y_train, x_test, y_test, f_test))
#     conv.append(CONV1d_training(X_train, y_train, x_test, y_test, f_test))
    
print(np.mean(dl))
print(np.mean(conv))

## Determine C for SVM

In [49]:
def SVC_training_c(X_train, y_train,x_test, y_test, f_test,c):
    if len(X_train.shape) > 2:
        X_train = X_train.reshape(list(X_train.shape)[0],-1)
        x_test = x_test.reshape(list(x_test.shape)[0],-1)
    overal_max = []
    for k in ['poly','rbf','linear']:
        max_score = 0
        max_res = []

        clf4 = SVC(kernel=k, C=c, degree=3, verbose = False)
        clf4.fit(X_train, y_train) 
        # joblib.dump(clf4, '../../Results/baseline SVC 0.80 raw data acc with gyro 200 chunk.pkl') 
        res = clf4.predict(x_test)
        score = accuracy_score(y_test, res)
        if score>max_score:
            max_score = score
            max_res = res
        print(k)
        print("max score: " + str(max_score) + " C = " + str(c))
        
#         print(classification_report(y_test, max_res))
#         print(confusion_matrix(y_test, max_res))
        if ensemble_06 is True:
            for i in range(len(max_res)):
                if (max_res[i] =='Tag0') or (max_res[i] == "Tag6"):
                    max_res[i] = rf_clf.predict([f_test[i]])[0]
#             print("-----ensembled---------")
#             print(classification_report(y_test, max_res))
#             print(confusion_matrix(y_test, max_res))
        overal_max.append(max_score)
    return overal_max
            

In [64]:
res = []

for i in [0.4,1.4,1.45,1.5,1.55,1.6]:
    res.append(SVC_training_c(X_train, y_train, X_test, y_test, f_test,i))


poly
max score: 0.783653846154 C = 0.4
rbf
max score: 0.673076923077 C = 0.4
linear
max score: 0.701923076923 C = 0.4
poly
max score: 0.802884615385 C = 1.4
rbf
max score: 0.783653846154 C = 1.4
linear
max score: 0.706730769231 C = 1.4
poly
max score: 0.802884615385 C = 1.45
rbf
max score: 0.778846153846 C = 1.45
linear
max score: 0.706730769231 C = 1.45
poly
max score: 0.802884615385 C = 1.5
rbf
max score: 0.783653846154 C = 1.5
linear
max score: 0.711538461538 C = 1.5
poly
max score: 0.802884615385 C = 1.55
rbf
max score: 0.783653846154 C = 1.55
linear
max score: 0.701923076923 C = 1.55
poly
max score: 0.798076923077 C = 1.6
rbf
max score: 0.783653846154 C = 1.6
linear
max score: 0.701923076923 C = 1.6
