In [1]:
import xlrd
import numpy as np
import pandas as pd
from xgboost.sklearn import XGBRegressor
from xgboost.sklearn import XGBClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [2]:
def re_cate(label):
    if label >37.2 and label < 37.6:
        return 1
    else:
        return 0

In [3]:
raw_df = pd.read_csv("./dataset/moment_table.csv", header = None)
raw_df.rename(columns = {0:'moment', 1:'flatness', 2:'distance'}, inplace = True)
raw_label = raw_df['distance'].as_matrix()
raw_df['distance'] = raw_df.apply(lambda x : re_cate(x.distance), axis = 1)
cls_label = raw_df['distance'].as_matrix()
raw_df.drop(columns = ['distance'], axis = 1, inplace = True)

  This is separate from the ipykernel package so we can avoid doing imports until
  """


In [4]:
raw_label

array([37.08, 37.1 , 37.12, 37.14, 37.16, 37.18, 37.2 , 37.22, 37.24,
       37.26, 37.28, 37.3 , 37.32, 37.34, 37.36, 37.38, 37.4 , 37.42,
       37.44, 37.46, 37.48, 37.5 , 37.52, 37.54, 37.56, 37.58, 37.6 ,
       37.62, 37.64, 37.08, 37.1 , 37.12, 37.14, 37.16, 37.18, 37.2 ,
       37.22, 37.24, 37.26, 37.28, 37.3 , 37.32, 37.34, 37.36, 37.38,
       37.4 , 37.42, 37.44, 37.46, 37.48, 37.5 , 37.52, 37.54, 37.56,
       37.58, 37.6 , 37.62, 37.64, 37.08, 37.1 , 37.12, 37.14, 37.16,
       37.18, 37.2 , 37.22, 37.24, 37.26, 37.28, 37.3 , 37.32, 37.34,
       37.36, 37.38, 37.4 , 37.42, 37.44, 37.46, 37.48, 37.5 , 37.52,
       37.54, 37.56, 37.58, 37.6 , 37.62, 37.64])

In [5]:
raw_feature = raw_df.as_matrix()
raw_feature_mean = raw_feature.mean(axis = 0)
raw_feature_std = raw_feature.std(axis = 0)
raw_feature_normalized = (raw_feature - raw_feature_mean)/raw_feature_std

  """Entry point for launching an IPython kernel.


In [6]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)

In [8]:
grd = GradientBoostingRegressor(n_estimators=100, learning_rate= 0.01, loss= 'ls', max_depth=3)
grd.fit(raw_feature_normalized, raw_label)
grd_enc_rlt = grd.apply(raw_feature_normalized)

grd_enc = OneHotEncoder()
grd_enc.fit(grd_enc_rlt)

enc_onehot = grd_enc.transform(grd_enc_rlt).toarray()
X_train_lr = np.append(raw_feature_normalized, enc_onehot, axis=1)

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [11]:
lr = LogisticRegression(solver='newton-cg', multi_class='multinomial', C=4, tol=1e-6, max_iter=20)

In [12]:
SVM = SVC(kernel='rbf',decision_function_shape='ovo',C=20,shrinking =False,tol =1e-6)

In [17]:
count_CV = 0
test_acc_record = []
test_pre_record = []
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []

for train_index,test_index in kfold.split(X_train_lr, cls_label):
    lr.fit(X_train_lr[train_index], cls_label[train_index])
    pred_smile_label = lr.predict(X_train_lr[test_index])
    real_label = cls_label[test_index]
    
    TP_num = 0
    FP_num = 0
    FN_num = 0
    TN_num = 0

    
    test_count_num = 0
    real_label_index = 0
    pre_label_num = 0
    
    for label in pred_smile_label:
        if label == real_label[real_label_index]:
            if label == 0:
                TN_num += 1
            if label == 1:
                TP_num += 1
            pre_label_num += 1
        else:
            if label == 0:
                FN_num += 1
            if label == 1:
                FP_num += 1
            
        real_label_index += 1
        test_count_num += 1
    
    print('#### In Cross Validation %d: ####'% count_CV)
    count_CV += 1
    print('NumofIns Precisely Classified : ',pre_label_num,'\t',
          'NumofIns : ',test_count_num,'\t',
          'Pre_Accuracy : ',pre_label_num/test_count_num,'\t',)
    
    test_pre_record.append(pre_label_num/test_count_num)
    FP_num += 1
    print("TP:", TP_num)
    print("FP:", FP_num)
    print("FN:", FN_num)
    print("TN:", TN_num)
    accuracy = (TP_num + TN_num)/(TP_num + FP_num + FN_num + TN_num)
    precision = TP_num/(TP_num + FP_num)
    recall = TP_num/(TP_num + FN_num)
    f1 = (2 * precision * recall)/(precision + recall)
    print(accuracy)
    print(precision)
    print(recall)
    print(f1)
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)
    
print('mean of NumofIns precisely classified',np.mean(test_pre_record))
print('mean of accuracy',np.mean(accuracy_list))
print('mean of precision',np.mean(precision_list))
print('mean of recall',np.mean(recall_list))
print('mean of f1',np.mean(f1_list))

#### In Cross Validation 0: ####
NumofIns Precisely Classified :  9 	 NumofIns :  9 	 Pre_Accuracy :  1.0 	
TP: 6
FP: 1
FN: 0
TN: 3
0.9
0.8571428571428571
1.0
0.923076923076923
#### In Cross Validation 1: ####
NumofIns Precisely Classified :  9 	 NumofIns :  9 	 Pre_Accuracy :  1.0 	
TP: 6
FP: 1
FN: 0
TN: 3
0.9
0.8571428571428571
1.0
0.923076923076923
#### In Cross Validation 2: ####
NumofIns Precisely Classified :  9 	 NumofIns :  9 	 Pre_Accuracy :  1.0 	
TP: 6
FP: 1
FN: 0
TN: 3
0.9
0.8571428571428571
1.0
0.923076923076923
#### In Cross Validation 3: ####
NumofIns Precisely Classified :  9 	 NumofIns :  9 	 Pre_Accuracy :  1.0 	
TP: 6
FP: 1
FN: 0
TN: 3
0.9
0.8571428571428571
1.0
0.923076923076923
#### In Cross Validation 4: ####
NumofIns Precisely Classified :  9 	 NumofIns :  9 	 Pre_Accuracy :  1.0 	
TP: 6
FP: 1
FN: 0
TN: 3
0.9
0.8571428571428571
1.0
0.923076923076923
#### In Cross Validation 5: ####
NumofIns Precisely Classified :  9 	 NumofIns :  9 	 Pre_Accuracy :  1.0 	
TP: 6
F