In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn
from sklearn import tree
from sklearn.linear_model import LinearRegression
from scipy import stats
import pylab as pl
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, recall_score, precision_score
import math
from sklearn.datasets.samples_generator import make_blobs
from sklearn.svm import SVC

''' Summary of the file.

    功能：
        读取特征值文件和标签文件，通过交叉验证获得训练集和测试集，以KNN方法划分并获得准确率，通过混淆矩阵标示结果。

    输出：
        结果的混淆矩阵，每次划分的准确率，十次的平均准确率
'''



' Summary of the file.\n\n    功能：\n        读取特征值文件和标签文件，通过交叉验证获得训练集和测试集，以KNN方法划分并获得准确率，通过混淆矩阵标示结果。\n\n    输出：\n        结果的混淆矩阵，每次划分的准确率，十次的平均准确率\n'

In [2]:
# LBP_block
subfeatures=pd.read_csv(r'E:\test\process_lpb_block2\lbp_kmeans_hist_feature.csv',header=None)
labels = pd.read_csv(r'E:\test\process_lpb_block2\ClassNo_full.txt',header=None)

In [3]:
# 把先提取的家族10的特征挪到最后
result = pd.concat([subfeatures, labels], axis=1,ignore_index=True)
# print(result)
result2 = result.iloc[:1431]
list = []
for i in range(len(result)):
    if result.iloc[i,-1] == 10:
        list.append(i)
result = result.drop(list)
result = pd.concat([result,result2],axis=0,ignore_index=True)
subfeatures = result.iloc[:,:-1]
labels = result.iloc[:,-1]

In [4]:

# 平均准确率归零
avgscore = 0
recallscore = 0
precisionscore = 0
N = 10





In [5]:
# 进行十次SVM_Tree测试
for i in range(N):
    # 以10%的比例进行交叉验证
    # X_train, X_test, y_train, y_test = cross_validation.train_test_split(subfeatures, features_labels, test_size=0.1)
    X_train, X_test, y_train, y_test = train_test_split(subfeatures, labels, test_size=0.1)

    # 进行训练
    print('train...')
    # 进行SVC训练 使用线性核
    # clf = SVC(kernel='linear')                     # 高斯核 rbf
    clf = tree.DecisionTreeClassifier()
    clf.fit(X_train.values, y_train.values.reshape(len(y_train)))

    # 预试
    print('test...')
    c_test = clf.predict(X_test.values)

    # print(y_test)
    # print(c_test)

    # 计算预测划分准确率
    print('accurary...')
    score = clf.score(X_test.values, y_test.values.reshape(len(y_test)))
    print(classification_report(y_test, c_test))
    avgscore = avgscore + score
    recallscore = recallscore + recall_score(y_test, c_test, average="macro")
    precisionscore = precisionscore + precision_score(y_test, c_test, average="macro")
    print(score)

    # 通过混淆矩阵进行结果标示
    cm = confusion_matrix(y_test, c_test)
    np.set_printoptions(threshold=10000)
    np.set_printoptions(precision=2)
    print('Confusion matrix, without normalization')
    print(str(cm))



train...
test...
accurary...
              precision    recall  f1-score   support

           1       0.63      0.51      0.57       125
           2       0.90      0.90      0.90       143
           3       0.72      0.74      0.73       116
           4       0.88      0.82      0.85       138
           5       0.94      0.99      0.97       153
           6       0.93      0.99      0.96       130
           7       0.82      0.91      0.87       149
           8       1.00      0.99      0.99        70
           9       0.82      0.78      0.80       153
          10       0.88      0.92      0.90       138

   micro avg       0.85      0.85      0.85      1315
   macro avg       0.85      0.85      0.85      1315
weighted avg       0.85      0.85      0.85      1315

0.8539923954372624
Confusion matrix, without normalization
[[ 64   6  12   6   5   3   6   0  12  11]
 [  4 128   4   0   0   1   4   0   1   1]
 [  8   2  86   1   2   2  10   0   1   4]
 [  5   1   6 113   0   

test...
accurary...
              precision    recall  f1-score   support

           1       0.63      0.47      0.54       140
           2       0.87      0.84      0.86       147
           3       0.70      0.72      0.71       103
           4       0.88      0.83      0.86       154
           5       0.95      0.99      0.97       158
           6       0.96      0.99      0.97       116
           7       0.83      0.91      0.87       149
           8       1.00      0.96      0.98        82
           9       0.71      0.75      0.73       130
          10       0.85      0.93      0.89       136

   micro avg       0.84      0.84      0.84      1315
   macro avg       0.84      0.84      0.84      1315
weighted avg       0.83      0.84      0.84      1315

0.8395437262357415
Confusion matrix, without normalization
[[ 66  10  13   3   4   1  11   0  16  16]
 [ 11 124   5   0   1   1   2   0   2   1]
 [  4   2  74   3   2   0   3   0  10   5]
 [ 10   0   3 128   1   2   0   0

In [6]:
# 输出N次的平均准确率
avgscore = avgscore / N
recallscore = recallscore / N
precisionscore = precisionscore / N

print('avgscore....')
print(avgscore)
print('False positive rate')
print(1-precisionscore)
print('false negative rate')
print(1-recallscore)

avgscore....
0.8476045627376425
False positive rate
0.15519534073583263
false negative rate
0.1524216396430944
