In [2]:
import xlrd
import numpy as np
import pandas as pd
from xgboost.sklearn import XGBRegressor
from xgboost.sklearn import XGBClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [3]:
raw_df = pd.read_csv("./dataset/data_entropy.csv", header = None)
raw_df.rename(columns = {0:'Ae', 1:'Ap', 2:'Rs', 3:'Fz', 4:'label'}, inplace = True)

In [4]:
raw_matrix = raw_df.values
raw_feature = raw_matrix[:, 0:-2]
raw_label = raw_matrix[:, -1]
raw_feature_mean = raw_feature.mean(axis = 0)
raw_feature_std = raw_feature.std(axis = 0)
raw_feature_normalized = (raw_feature - raw_feature_mean)/raw_feature_std
#np.random.shuffle(raw_feature_normalized)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)

In [5]:
grd = GradientBoostingRegressor(n_estimators=100, learning_rate= 0.01, loss= 'ls', max_depth=3)

In [6]:
lr = LogisticRegression(solver='newton-cg', multi_class='multinomial', C=4, tol=1e-6, max_iter=20)

In [7]:
SVM = SVC(kernel='rbf',decision_function_shape='ovo',C=20,shrinking =False,tol =1e-6)

In [8]:
count_CV = 0
test_acc_record = []
test_pre_record = []
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []

grd.fit(raw_feature_normalized, raw_label)
grd_enc_rlt = grd.apply(raw_feature_normalized)

grd_enc = OneHotEncoder()
grd_enc.fit(grd_enc_rlt)

enc_onehot = grd_enc.transform(grd_enc_rlt).toarray()
X_train_lr = np.append(raw_feature_normalized, enc_onehot, axis=1)
    
accuracy_label_list = []
for ele in raw_label:
    if ele >= 1.6:
        accuracy_label_list.append(1)
    else:
        accuracy_label_list.append(0)
accuracy_label = np.array(accuracy_label_list)

for train_index,test_index in kfold.split(X_train_lr, accuracy_label):
    lr.fit(X_train_lr[train_index], accuracy_label[train_index])
    pred_smile_label = lr.predict(X_train_lr[test_index])
    real_label = accuracy_label[test_index]
  
    TP_num = 0
    FP_num = 0
    FN_num = 0
    TN_num = 0
    
    test_count_num = 0
    real_label_index = 0
    pre_label_num = 0
    
    for label in pred_smile_label:
        if label == real_label[real_label_index]:
            if label == 0:
                TN_num += 1
            if label == 1:
                TP_num += 1
            pre_label_num += 1
        else:
            if label == 0:
                FN_num += 1
            if label == 1:
                FP_num += 1
                
        real_label_index += 1
        test_count_num += 1
    
    print('#### In Cross Validation %d: ####'% count_CV)
    count_CV += 1
    print('NumofIns Precisely Classified : ',pre_label_num,'\t',
          'NumofIns : ',test_count_num,'\t',
          'Pre_Accuracy : ',pre_label_num/test_count_num,'\t',)
    
    test_pre_record.append(pre_label_num/test_count_num)

    print("TP:", TP_num)
    print("FP:", FP_num)
    print("FN:", FN_num)
    print("TN:", TN_num)
    accuracy = (TP_num + TN_num)/(TP_num + FP_num + FN_num + TN_num)
    precision = TP_num/(TP_num + FP_num)
    recall = TP_num/(TP_num + FN_num)
    f1 = (2 * precision * recall)/(precision + recall)
    print(accuracy)
    print(precision)
    print(recall)
    print(f1)
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)
    
print('mean of NumofIns precisely classified',np.mean(test_pre_record))
print('mean of accuracy',np.mean(accuracy_list))
print('mean of precision',np.mean(precision_list))
print('mean of recall',np.mean(recall_list))
print('mean of f1',np.mean(f1_list))

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


#### In Cross Validation 0: ####
NumofIns Precisely Classified :  35 	 NumofIns :  39 	 Pre_Accuracy :  0.8974358974358975 	
TP: 31
FP: 4
FN: 0
TN: 4
0.8974358974358975
0.8857142857142857
1.0
0.9393939393939393
#### In Cross Validation 1: ####
NumofIns Precisely Classified :  35 	 NumofIns :  39 	 Pre_Accuracy :  0.8974358974358975 	
TP: 31
FP: 4
FN: 0
TN: 4
0.8974358974358975
0.8857142857142857
1.0
0.9393939393939393
#### In Cross Validation 2: ####
NumofIns Precisely Classified :  35 	 NumofIns :  39 	 Pre_Accuracy :  0.8974358974358975 	
TP: 30
FP: 3
FN: 1
TN: 5
0.8974358974358975
0.9090909090909091
0.967741935483871
0.9374999999999999
#### In Cross Validation 3: ####
NumofIns Precisely Classified :  28 	 NumofIns :  38 	 Pre_Accuracy :  0.7368421052631579 	
TP: 26
FP: 6
FN: 4
TN: 2
0.7368421052631579
0.8125
0.8666666666666667
0.8387096774193549




#### In Cross Validation 4: ####
NumofIns Precisely Classified :  30 	 NumofIns :  38 	 Pre_Accuracy :  0.7894736842105263 	
TP: 26
FP: 4
FN: 4
TN: 4
0.7894736842105263
0.8666666666666667
0.8666666666666667
0.8666666666666667
#### In Cross Validation 5: ####
NumofIns Precisely Classified :  33 	 NumofIns :  38 	 Pre_Accuracy :  0.868421052631579 	
TP: 28
FP: 3
FN: 2
TN: 5
0.868421052631579
0.9032258064516129
0.9333333333333333
0.9180327868852459
#### In Cross Validation 6: ####
NumofIns Precisely Classified :  33 	 NumofIns :  38 	 Pre_Accuracy :  0.868421052631579 	
TP: 29
FP: 4
FN: 1
TN: 4
0.868421052631579
0.8787878787878788
0.9666666666666667
0.9206349206349207
#### In Cross Validation 7: ####
NumofIns Precisely Classified :  32 	 NumofIns :  37 	 Pre_Accuracy :  0.8648648648648649 	
TP: 25
FP: 0
FN: 5
TN: 7
0.8648648648648649
1.0
0.8333333333333334
0.9090909090909091




#### In Cross Validation 8: ####
NumofIns Precisely Classified :  32 	 NumofIns :  37 	 Pre_Accuracy :  0.8648648648648649 	
TP: 29
FP: 4
FN: 1
TN: 3
0.8648648648648649
0.8787878787878788
0.9666666666666667
0.9206349206349207
#### In Cross Validation 9: ####
NumofIns Precisely Classified :  30 	 NumofIns :  37 	 Pre_Accuracy :  0.8108108108108109 	
TP: 26
FP: 3
FN: 4
TN: 4
0.8108108108108109
0.896551724137931
0.8666666666666667
0.8813559322033899
mean of NumofIns precisely classified 0.8496006127585074
mean of accuracy 0.8496006127585074
mean of precision 0.8917039435351448
mean of recall 0.9267741935483871
mean of f1 0.9071413692323287


In [12]:
pred_feature = X_train_lr[38:57]

In [13]:
pred_feature.shape

(19, 803)