In [29]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost.sklearn import XGBRegressor
from xgboost.sklearn import XGBClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder
import numpy
import xlrd

In [30]:
raw_df = pd.read_csv("data.csv", header = None)

In [31]:
raw_df.rename(columns = {0:'Ae', 1:'Ap', 2:'Rs', 3:'Fz', 4:'label'}, inplace = True)

In [32]:
raw_matrix = raw_df.as_matrix()
raw_feature = raw_matrix[:, 0:-2]
raw_label = raw_matrix[:, -1]
raw_feature_mean = raw_feature.mean(axis = 0)
raw_feature_std = raw_feature.std(axis = 0)
raw_feature_normalized = (raw_feature - raw_feature_mean)/raw_feature_std
np.random.shuffle(raw_feature_normalized)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)

In [33]:
raw_feature_normalized

array([[ 1.12115264, -0.60885777,  0.36514837],
       [ 0.52320456, -0.71474607, -0.73029674],
       [ 1.12115264, -0.92652269,  1.64316767],
       ...,
       [-0.07474351, -0.18530454, -1.2780193 ],
       [ 0.52320456, -0.39708115,  0.36514837],
       [ 0.52320456, -0.71474607, -1.46059349]])

In [34]:
lr = LogisticRegression(solver='newton-cg', multi_class='multinomial', C=4, tol=1e-6, max_iter=20)

In [35]:
count_CV = 0
test_acc_record = []
test_pre_record = []

for train_index,test_index in kfold.split(raw_feature_normalized, raw_label):
    lr.fit(raw_feature_normalized[train_index], raw_label[train_index])
    pred_smile_label = lr.predict(raw_feature_normalized[test_index])
    real_label = raw_label[test_index]
    
    test_count_num = 0
    real_label_index = 0
    acc_label_num = 0
    pre_label_num = 0
    
    for label in pred_smile_label:
        if label == real_label[real_label_index]:
            pre_label_num += 1
        elif (label - real_label[real_label_index])*(label - real_label[real_label_index]) == 1:
            acc_label_num += 1
        real_label_index += 1
        test_count_num += 1
    
    print('#### In Cross Validation %d: ####'% count_CV)
    count_CV += 1
    print('NumofIns Precisely Classified : ',pre_label_num,'\t',
          'NumofIns Accurately Classified : ',acc_label_num,'\t',
          'NumofIns : ',test_count_num,'\t',
          'Pre_Accuracy : ',pre_label_num/test_count_num,'\t',
          'Accuracy : ',(pre_label_num + acc_label_num)/test_count_num)
    
    test_pre_record.append(pre_label_num/test_count_num)
    test_acc_record.append((pre_label_num + acc_label_num)/test_count_num)

print('mean of NumofIns precisely classified',numpy.mean(test_pre_record))
print('mean of NumofIns accurately classified',numpy.mean(test_acc_record))

#### In Cross Validation 0: ####
NumofIns Precisely Classified :  19 	 NumofIns Accurately Classified :  3 	 NumofIns :  40 	 Pre_Accuracy :  0.475 	 Accuracy :  0.55
#### In Cross Validation 1: ####
NumofIns Precisely Classified :  20 	 NumofIns Accurately Classified :  3 	 NumofIns :  40 	 Pre_Accuracy :  0.5 	 Accuracy :  0.575
#### In Cross Validation 2: ####
NumofIns Precisely Classified :  20 	 NumofIns Accurately Classified :  2 	 NumofIns :  38 	 Pre_Accuracy :  0.5263157894736842 	 Accuracy :  0.5789473684210527
#### In Cross Validation 3: ####
NumofIns Precisely Classified :  20 	 NumofIns Accurately Classified :  2 	 NumofIns :  38 	 Pre_Accuracy :  0.5263157894736842 	 Accuracy :  0.5789473684210527
#### In Cross Validation 4: ####
NumofIns Precisely Classified :  19 	 NumofIns Accurately Classified :  2 	 NumofIns :  38 	 Pre_Accuracy :  0.5 	 Accuracy :  0.5526315789473685
#### In Cross Validation 5: ####
NumofIns Precisely Classified :  20 	 NumofIns Accurately Classifie

In [36]:
SVM = SVC(kernel='rbf',decision_function_shape='ovo',C=20,shrinking =False,tol =1e-6)

In [37]:
count_CV = 0
test_acc_record = []
test_pre_record = []

for train_index,test_index in kfold.split(raw_feature_normalized, raw_label):
    SVM.fit(raw_feature_normalized[train_index], raw_label[train_index])
    pred_smile_label = SVM.predict(raw_feature_normalized[test_index])
    real_label = raw_label[test_index]
    
    test_count_num = 0
    real_label_index = 0
    acc_label_num = 0
    pre_label_num = 0
    
    for label in pred_smile_label:
        if label == real_label[real_label_index]:
            pre_label_num += 1
        elif (label - real_label[real_label_index])*(label - real_label[real_label_index]) == 25:
            acc_label_num += 1
        real_label_index += 1
        test_count_num += 1
    
    print('#### In Cross Validation %d: ####'% count_CV)
    count_CV += 1
    print('NumofIns Precisely Classified : ',pre_label_num,'\t',
          'NumofIns Accurately Classified : ',acc_label_num,'\t',
          'NumofIns : ',test_count_num,'\t',
          'Pre_Accuracy : ',pre_label_num/test_count_num,'\t',
          'Accuracy : ',(pre_label_num + acc_label_num)/test_count_num)
    
    test_pre_record.append(pre_label_num/test_count_num)
    test_acc_record.append((pre_label_num + acc_label_num)/test_count_num)

print('mean of NumofIns precisely classified',numpy.mean(test_pre_record))
print('mean of NumofIns accurately classified',numpy.mean(test_acc_record))

#### In Cross Validation 0: ####
NumofIns Precisely Classified :  18 	 NumofIns Accurately Classified :  0 	 NumofIns :  40 	 Pre_Accuracy :  0.45 	 Accuracy :  0.45
#### In Cross Validation 1: ####
NumofIns Precisely Classified :  19 	 NumofIns Accurately Classified :  0 	 NumofIns :  40 	 Pre_Accuracy :  0.475 	 Accuracy :  0.475
#### In Cross Validation 2: ####
NumofIns Precisely Classified :  19 	 NumofIns Accurately Classified :  0 	 NumofIns :  38 	 Pre_Accuracy :  0.5 	 Accuracy :  0.5
#### In Cross Validation 3: ####
NumofIns Precisely Classified :  19 	 NumofIns Accurately Classified :  0 	 NumofIns :  38 	 Pre_Accuracy :  0.5 	 Accuracy :  0.5
#### In Cross Validation 4: ####
NumofIns Precisely Classified :  17 	 NumofIns Accurately Classified :  0 	 NumofIns :  38 	 Pre_Accuracy :  0.4473684210526316 	 Accuracy :  0.4473684210526316
#### In Cross Validation 5: ####
NumofIns Precisely Classified :  19 	 NumofIns Accurately Classified :  0 	 NumofIns :  38 	 Pre_Accuracy :  0.

In [38]:
XGB_R = XGBRegressor(#booster = 'gblinear',
                   #objective='multi:softmax',
    #num_class=7,#必须要考虑到0的情况。这个数据集里面没有零
    n_estimators=200,
    max_depth=4,
    min_child_weight = 5,
    scale_pos_weight = 5,
    num_boost_round =5,
    max_delta_step=1000,
    alpha =2,
    eta=1
    #colsample_bytree=0.9
    #gamma=5,
    #process_type='update'
)

In [39]:
count_CV = 0
test_acc_record = []
test_pre_record = []

for train_index,test_index in kfold.split(raw_feature_normalized, raw_label):
    XGB_R.fit(raw_feature_normalized[train_index], raw_label[train_index])
    pred_smile_label = XGB_R.predict(raw_feature_normalized[test_index])
    real_label = raw_label[test_index]
    
    test_count_num = 0
    real_label_index = 0
    acc_label_num = 0
    pre_label_num = 0
    
    for label in pred_smile_label:
        if (label - real_label[real_label_index])*(label - real_label[real_label_index]) <= 0.5:
            pre_label_num += 1
        elif (label - real_label[real_label_index])*(label - real_label[real_label_index]) <= 1:
            acc_label_num += 1
        real_label_index += 1
        test_count_num += 1
    
    print('#### In Cross Validation %d: ####'% count_CV)
    count_CV += 1
    print('NumofIns Precisely Classified : ',pre_label_num,'\t',
          'NumofIns Accurately Classified : ',acc_label_num,'\t',
          'NumofIns : ',test_count_num,'\t',
          'Pre_Accuracy : ',pre_label_num/test_count_num,'\t',
          'Accuracy : ',(pre_label_num + acc_label_num)/test_count_num)
    
    test_pre_record.append(pre_label_num/test_count_num)
    test_acc_record.append((pre_label_num + acc_label_num)/test_count_num)

print('mean of NumofIns precisely classified',numpy.mean(test_pre_record))
print('mean of NumofIns accurately classified',numpy.mean(test_acc_record))

#### In Cross Validation 0: ####
NumofIns Precisely Classified :  9 	 NumofIns Accurately Classified :  4 	 NumofIns :  40 	 Pre_Accuracy :  0.225 	 Accuracy :  0.325
#### In Cross Validation 1: ####
NumofIns Precisely Classified :  13 	 NumofIns Accurately Classified :  4 	 NumofIns :  40 	 Pre_Accuracy :  0.325 	 Accuracy :  0.425
#### In Cross Validation 2: ####
NumofIns Precisely Classified :  17 	 NumofIns Accurately Classified :  2 	 NumofIns :  38 	 Pre_Accuracy :  0.4473684210526316 	 Accuracy :  0.5
#### In Cross Validation 3: ####
NumofIns Precisely Classified :  11 	 NumofIns Accurately Classified :  4 	 NumofIns :  38 	 Pre_Accuracy :  0.2894736842105263 	 Accuracy :  0.39473684210526316
#### In Cross Validation 4: ####
NumofIns Precisely Classified :  7 	 NumofIns Accurately Classified :  3 	 NumofIns :  38 	 Pre_Accuracy :  0.18421052631578946 	 Accuracy :  0.2631578947368421
#### In Cross Validation 5: ####
NumofIns Precisely Classified :  12 	 NumofIns Accurately Classi

In [41]:
XGB_C = XGBClassifier(
    #booster = 'gblinear',
    #objective='multi:softmax',
    #num_class=7,#必须要考虑到0的情况。这个数据集里面没有零
    n_estimators=200,
    max_depth=4,
    min_child_weight = 5,
    scale_pos_weight = 5,
    num_boost_round =5,
    max_delta_step=1000,
    alpha =2,
    eta=1
    #colsample_bytree=0.9
    #gamma=5,
    #process_type='update'
)

In [42]:
count_CV = 0
test_acc_record = []
test_pre_record = []

for train_index,test_index in kfold.split(raw_feature_normalized, raw_label):
    XGB_C.fit(raw_feature_normalized[train_index], raw_label[train_index])
    pred_smile_label = XGB_C.predict(raw_feature_normalized[test_index])
    real_label = raw_label[test_index]
    
    test_count_num = 0
    real_label_index = 0
    acc_label_num = 0
    pre_label_num = 0
    
    for label in pred_smile_label:
        if label == real_label[real_label_index]:
            pre_label_num += 1
        elif (label - real_label[real_label_index])*(label - real_label[real_label_index]) == 25:
            acc_label_num += 1
        real_label_index += 1
        test_count_num += 1
    
    print('#### In Cross Validation %d: ####'% count_CV)
    count_CV += 1
    print('NumofIns Precisely Classified : ',pre_label_num,'\t',
          'NumofIns Accurately Classified : ',acc_label_num,'\t',
          'NumofIns : ',test_count_num,'\t',
          'Pre_Accuracy : ',pre_label_num/test_count_num,'\t',
          'Accuracy : ',(pre_label_num + acc_label_num)/test_count_num)
    
    test_pre_record.append(pre_label_num/test_count_num)
    test_acc_record.append((pre_label_num + acc_label_num)/test_count_num)

print('mean of NumofIns precisely classified',numpy.mean(test_pre_record))
print('mean of NumofIns accurately classified',numpy.mean(test_acc_record))

  if diff:
  if diff:


#### In Cross Validation 0: ####
NumofIns Precisely Classified :  16 	 NumofIns Accurately Classified :  0 	 NumofIns :  40 	 Pre_Accuracy :  0.4 	 Accuracy :  0.4
#### In Cross Validation 1: ####
NumofIns Precisely Classified :  13 	 NumofIns Accurately Classified :  0 	 NumofIns :  40 	 Pre_Accuracy :  0.325 	 Accuracy :  0.325


  if diff:
  if diff:


#### In Cross Validation 2: ####
NumofIns Precisely Classified :  18 	 NumofIns Accurately Classified :  0 	 NumofIns :  38 	 Pre_Accuracy :  0.47368421052631576 	 Accuracy :  0.47368421052631576
#### In Cross Validation 3: ####
NumofIns Precisely Classified :  18 	 NumofIns Accurately Classified :  0 	 NumofIns :  38 	 Pre_Accuracy :  0.47368421052631576 	 Accuracy :  0.47368421052631576


  if diff:
  if diff:


#### In Cross Validation 4: ####
NumofIns Precisely Classified :  17 	 NumofIns Accurately Classified :  0 	 NumofIns :  38 	 Pre_Accuracy :  0.4473684210526316 	 Accuracy :  0.4473684210526316
#### In Cross Validation 5: ####
NumofIns Precisely Classified :  16 	 NumofIns Accurately Classified :  0 	 NumofIns :  38 	 Pre_Accuracy :  0.42105263157894735 	 Accuracy :  0.42105263157894735


  if diff:
  if diff:


#### In Cross Validation 6: ####
NumofIns Precisely Classified :  16 	 NumofIns Accurately Classified :  0 	 NumofIns :  37 	 Pre_Accuracy :  0.43243243243243246 	 Accuracy :  0.43243243243243246
#### In Cross Validation 7: ####
NumofIns Precisely Classified :  15 	 NumofIns Accurately Classified :  0 	 NumofIns :  37 	 Pre_Accuracy :  0.40540540540540543 	 Accuracy :  0.40540540540540543
#### In Cross Validation 8: ####
NumofIns Precisely Classified :  14 	 NumofIns Accurately Classified :  0 	 NumofIns :  37 	 Pre_Accuracy :  0.3783783783783784 	 Accuracy :  0.3783783783783784
#### In Cross Validation 9: ####
NumofIns Precisely Classified :  16 	 NumofIns Accurately Classified :  0 	 NumofIns :  37 	 Pre_Accuracy :  0.43243243243243246 	 Accuracy :  0.43243243243243246
mean of NumofIns precisely classified 0.4189438122332859
mean of NumofIns accurately classified 0.4189438122332859


  if diff:
  if diff:


In [43]:
GBR = GradientBoostingRegressor(n_estimators=100,learning_rate= 0.01, loss= 'ls',max_depth=3 )

In [45]:
count_CV = 0
test_acc_record = []
test_pre_record = []

for train_index,test_index in kfold.split(raw_feature_normalized, raw_label):
    GBR.fit(raw_feature_normalized[train_index], raw_label[train_index])
    pred_smile_label = GBR.predict(raw_feature_normalized[test_index])
    real_label = raw_label[test_index]

    test_count_num = 0
    real_label_index = 0
    acc_label_num = 0
    pre_label_num = 0
    
    for label in pred_smile_label:
        if (label - real_label[real_label_index])*(label - real_label[real_label_index]) <= 0.5:
            pre_label_num += 1
        elif (label - real_label[real_label_index])*(label - real_label[real_label_index]) <= 1:
            acc_label_num += 1
        real_label_index += 1
        test_count_num += 1
    
    print('#### In Cross Validation %d: ####'% count_CV)
    count_CV += 1
    print('NumofIns Precisely Classified : ',pre_label_num,'\t',
          'NumofIns Accurately Classified : ',acc_label_num,'\t',
          'NumofIns : ',test_count_num,'\t',
          'Pre_Accuracy : ',pre_label_num/test_count_num,'\t',
          'Accuracy : ',(pre_label_num + acc_label_num)/test_count_num)
    
    test_pre_record.append(pre_label_num/test_count_num)
    test_acc_record.append((pre_label_num + acc_label_num)/test_count_num)

print('mean of NumofIns precisely classified',numpy.mean(test_pre_record))
print('mean of NumofIns accurately classified',numpy.mean(test_acc_record))

#### In Cross Validation 0: ####
NumofIns Precisely Classified :  5 	 NumofIns Accurately Classified :  1 	 NumofIns :  40 	 Pre_Accuracy :  0.125 	 Accuracy :  0.15
#### In Cross Validation 1: ####
NumofIns Precisely Classified :  3 	 NumofIns Accurately Classified :  4 	 NumofIns :  40 	 Pre_Accuracy :  0.075 	 Accuracy :  0.175
#### In Cross Validation 2: ####
NumofIns Precisely Classified :  4 	 NumofIns Accurately Classified :  2 	 NumofIns :  38 	 Pre_Accuracy :  0.10526315789473684 	 Accuracy :  0.15789473684210525
#### In Cross Validation 3: ####
NumofIns Precisely Classified :  4 	 NumofIns Accurately Classified :  1 	 NumofIns :  38 	 Pre_Accuracy :  0.10526315789473684 	 Accuracy :  0.13157894736842105
#### In Cross Validation 4: ####
NumofIns Precisely Classified :  3 	 NumofIns Accurately Classified :  2 	 NumofIns :  38 	 Pre_Accuracy :  0.07894736842105263 	 Accuracy :  0.13157894736842105
#### In Cross Validation 5: ####
NumofIns Precisely Classified :  5 	 NumofIns Acc