In [None]:
import os
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #忽略烦人的警告
tf.logging.set_verbosity(tf.logging.ERROR)   #忽略烦人的警告
print(tf.__version__)
import numpy as np
from sklearn.decomposition import PCA

import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split
import pickle   #对象存储用

from sklearn import preprocessing

import matplotlib.pyplot as plt
import matplotlib
import math

%matplotlib inline
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
dataroot_1 = "./UNSW-NB15 - CSV Files/a part of training and testing set/UNSW_NB15_training-set.csv"
dataroot_2 = "./UNSW-NB15 - CSV Files/a part of training and testing set/UNSW_NB15_testing-set.csv"

1.13.1


In [None]:
labelList = [
    "Normal", "Fuzzers", "Analysis", "Backdoor", "DoS", "Exploits", 
    "Generic", "Reconnaissance", "Shellcode", "Worms"
]
distribution_feature = [    #产生的新特征
    "Normal_Score", "Fuzzers_Score", "Analysis_Score", "Backdoor_Score", "DoS_Score", "Exploits_Score", 
    "Generic_Score", "Reconnaissance_Score", "Shellcode_Score", "Worms_Score"
]
def load_obj(name):
    with open('./' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

distributions = {}
distributions['Normal'] = load_obj("normal_Distribution_set")
distributions['Fuzzers'] = load_obj("Fuzzers_Distribution_set")
distributions['Analysis'] = load_obj("Analysis_Distribution_set")
distributions['Backdoor'] = load_obj("Backdoor_Distribution_set")
distributions['DoS'] = load_obj("DoS_Distribution_set")
distributions['Exploits'] = load_obj("Exploits_Distribution_set")
distributions['Generic'] = load_obj("Generic_Distribution_set")
distributions['Reconnaissance'] = load_obj("Reconnaissance_Distribution_set")
distributions['Shellcode'] = load_obj("Shellcode_Distribution_set")
distributions['Worms'] = load_obj("Worms_Distribution_set")


attack_types = 9  #总已知攻击种类
feature_numbers = 23   #用于处理DSM的特征，位列前35的特征


#定义difference weight,取值0~1
def calculate_dw(distribution_1, distribution_2, feature_index):
    count1 = calculate_count(distribution_1, feature_index)
    count2 = calculate_count(distribution_2, feature_index)
    h = 20
    dw = 0
    for i in range(h):
        start = i*5
        end = start + 5
        T1 = distribution_1[str(feature_index)][str(start) + '-' + str(end)]/count1
        T2 = distribution_2[str(feature_index)][str(start) + '-' + str(end)]/count2
        dw += abs(T1 - T2)
    return dw/2
#用于计算分布字典里的实例数
def calculate_count(distribution, feature_index):
    h = 20
    count = 0
    for i in range(h):
        start = i*5
        end = start + 5
        count += distribution[str(feature_index)][str(start) + '-' + str(end)]
    return count
#定义hdw，hybird difference weight  第二个参数为：hdw应用的对应得分公式的对应的哪种字典
def calculate_hdw(feature_index, label):
    base = 0
    for key in distributions:
        if(key == label):
            continue
        dw = calculate_dw(distributions[label], distributions[key], feature_index)
        if(dw > base):
            base = dw
    return base


#计算例子在某种标签上的得分，需要提供对应标签, 这个标签的的分布统计字典
def get_score(example, distributeObj, label):    
    score = 0
    count = calculate_count(distributeObj, 0)  #取哪个特征都一样
    minBase = 1
    baseArr = []
    for i in range(feature_numbers):

        start = int(example[i]/0.05//1)*5   #取200个分组里的分组起始位置
        if(start == 100):     #特殊处理当该属性为1.0时溢出的情况
            start = 95      
        end = start + 5
        dataRange = '{0}-{1}'.format(start, end )
        base = distributeObj[str(i)][dataRange]/count
        if(base != 0 and base < minBase):
            minBase = base
        baseArr.append(base)
        
    for i in range(feature_numbers):
        if(baseArr[i] != 0):
            score += math.log(baseArr[i]) * calculate_hdw(i, label)  / feature_numbers
        else:
            score += math.log(minBase) * calculate_hdw(i, label)  / feature_numbers
            
    return score

def get_score_threshold(dataset_x, dataset_y):
    #10个分数数组，表示在这个label字典下label实例的得分情况
    Scores = {
        "Normal" : [],
        "Fuzzers" : [],
        "Analysis" : [],
        "Backdoor" : [],
        "DoS" : [], 
        "Exploits" : [],
        "Generic" : [],
        "Reconnaissance" : [],
        "Shellcode" : [],
        "Worms" : []
    }
    for i,item in enumerate(dataset_x):
        if(dataset_y.values[i].astype('int') == 0):
            
            Scores["Normal"].append(get_score(item, distributions['Normal'], 'Normal'))
            
        elif(dataset_y.values[i].astype('int') == 1):
            
            Scores["Fuzzers"].append(get_score(item, distributions['Fuzzers'], 'Fuzzers'))
            
        elif(dataset_y.values[i].astype('int') == 2):
            
            Scores["Analysis"].append(get_score(item, distributions['Analysis'], 'Analysis'))
            
        elif(dataset_y.values[i].astype('int') == 3):
            
            Scores["Backdoor"].append(get_score(item, distributions['Backdoor'], 'Backdoor'))
            
        elif(dataset_y.values[i].astype('int') == 4):
            
            Scores["DoS"].append(get_score(item, distributions['DoS'], 'DoS'))
                        
        elif(dataset_y.values[i].astype('int') == 5):
            
            Scores["Exploits"].append(get_score(item, distributions['Exploits'], 'Exploits'))
            
        elif(dataset_y.values[i].astype('int') == 6):
            
            Scores["Generic"].append(get_score(item, distributions['Generic'], 'Generic'))
            
        elif(dataset_y.values[i].astype('int') == 7):
            
            Scores["Reconnaissance"].append(get_score(item, distributions['Reconnaissance'], 'Reconnaissance'))
            
        elif(dataset_y.values[i].astype('int') == 8):
            
            Scores["Shellcode"].append(get_score(item, distributions['Shellcode'], 'Shellcode')) 
            
        elif(dataset_y.values[i].astype('int') == 9):
            
            Scores["Worms"].append(get_score(item, distributions['Worms'], 'Worms'))
            
    thresholds = []
    
    for item in Scores.values():
        item.sort()
        thresholds.append(item[len(item)//10])    #取”将样例分成1：9“的threshold
    return thresholds

def sigmoid(x):
    # TODO: Implement sigmoid function
    return 1/(1 + np.exp(-x))

def fill_score(i, item, Score_revert, label, label_num):
    #计算得分
    score = get_score(item, distributions[label], label)
    #score-normalization
    score_norm = sigmoid(score - thresholds[label_num])
    #放入要拼接的表
    Score_revert.loc[i, (label + '_Score')] = score_norm
    return

def create_distribution_score(dataset_x, thresholds):
    #创建空表
    Score_revert = pd.DataFrame(
        data=np.zeros(
            shape=(
                dataset_x.shape[0],
                len(distribution_feature)
            )
        ),columns = distribution_feature
    )
    #填充空表
    for i,item in enumerate(dataset_x):
        fill_score(i, item, Score_revert, 'Normal', 0)
        fill_score(i, item, Score_revert, 'Fuzzers', 1)
        fill_score(i, item, Score_revert, 'Analysis', 2)
        fill_score(i, item, Score_revert, 'Backdoor', 3)
        fill_score(i, item, Score_revert, 'DoS', 4)        
        fill_score(i, item, Score_revert, 'Exploits', 5)
        fill_score(i, item, Score_revert, 'Generic', 6)
        fill_score(i, item, Score_revert, 'Reconnaissance', 7)
        fill_score(i, item, Score_revert, 'Shellcode', 8)
        fill_score(i, item, Score_revert, 'Worms', 9)
            
    dataset_x = np.hstack((dataset_x,Score_revert.values))
    return dataset_x


In [None]:
#选择需要进行创建字典的特征。实际有
def feature_selection(dataset):
    return dataset[[
        'dur', 'dpkts', 'sbytes', 'dbytes', 'rate', 'sttl', 'dttl', 'sload', 'dload', 
        'sloss', 'sinpkt', 'sjit', 'tcprtt', 'synack', 'ackdat', 'smean', 'dmean', 
        'ct_srv_src', 'ct_state_ttl', 'ct_src_dport_ltm', 'ct_dst_sport_ltm', 
        'ct_dst_src_ltm', 'ct_srv_dst', 'proto_udp', 'service_-', 'service_dns', 'attack_cat'
    ]]

def split_valid_from_train(train_dataset, valid_size):
    # Method 1
    train_dataset, valid_dataset, _, _ = train_test_split(train_dataset, train_dataset['attack_cat'], test_size=valid_size, random_state=None)
    # pandas中先重置index再打乱train. 否则只会调整各个行的顺序，而不会改变pandas的index
    # 重置
    train_dataset = train_dataset.reset_index(drop=True)
    # 打乱
    indexMask = np.arange(len(train_dataset))
    for i in range(10):
        np.random.shuffle(indexMask)
    train_dataset = train_dataset.iloc[indexMask]

    return train_dataset, valid_dataset

def shuffle(dataset):  
    # 打乱
    indexMask = np.arange(len(dataset))
    for i in range(10):
        np.random.shuffle(indexMask)
    dataset = dataset.iloc[indexMask]
    dataset = dataset.reset_index(drop=True)

    return dataset 

def filter_useless_feature(dataset):
    dataset = dataset.drop(['label'], axis=1)    #两个特征没用
    dataset = dataset.drop(['id'], axis=1)
    return dataset

def filter_labels(dataset):
    dataset['attack_cat'] = dataset['attack_cat'].apply(labels_map)
    return dataset

def labels_map(label):
    if(type(label) == int):      #已是数字，无需处理
        return label
    
    if label == 'Normal':
        return 0
    if label == 'Fuzzers':
        return 1
    if label == 'Analysis':
        return 2
    if label == 'Backdoor':
        return 3
    if label == 'DoS':
        return 4
    if label == 'Exploits':
        return 5
    if label == 'Generic':
        return 6
    if label == 'Reconnaissance':
        return 7
    if label == 'Shellcode':
        return 8
    if label == 'Worms':
        return 9
    return -1

def labels_separate(dataset):
    y_ = dataset['attack_cat'].astype('int')
    temp = dataset
    temp.drop(['attack_cat'], axis=1, inplace=True)
    x_ = temp.values
    return x_, y_


In [None]:
def one_hot_process(dataset):
    # 独热编码 state
    state_dummies = pd.get_dummies(dataset['state'], prefix='state')
    dataset = pd.concat([dataset, state_dummies], axis=1)
    dataset = dataset.drop(['state'], axis=1)
    # 独热编码 proto
    proto_dummies = pd.get_dummies(dataset['proto'], prefix='proto')
    dataset = pd.concat([dataset, proto_dummies], axis=1)
    dataset = dataset.drop(['proto'], axis=1)
    # 独热编码 service     
    service_dummies = pd.get_dummies(dataset['service'], prefix='service')
    dataset = pd.concat([dataset, service_dummies], axis=1)
    dataset = dataset.drop(['service'], axis=1)
    return dataset
#归一化整个数据集，注意要存储归一化的参数x_avg,x_max,x_min以便后面真实环境预处理需要
def normalization(dataset):
    epsilon = 0.0001   #防止除数为0
    dataset = dataset.astype('float')
    target_features = ['dur','spkts', 'dpkts', 'sbytes',
       'dbytes', 'rate', 'sttl', 'dttl', 'sload', 'dload', 'sloss', 'dloss',
       'sinpkt', 'dinpkt', 'sjit', 'djit', 'swin', 'stcpb', 'dtcpb', 'dwin',
       'tcprtt', 'synack', 'ackdat', 'smean', 'dmean', 'trans_depth',
       'response_body_len', 'ct_srv_src', 'ct_state_ttl', 'ct_dst_ltm',
       'ct_src_dport_ltm', 'ct_dst_sport_ltm', 'ct_dst_src_ltm',
       'ct_ftp_cmd', 'ct_flw_http_mthd', 'ct_src_ltm',
       'ct_srv_dst'
    ]
    norm_params = pd.DataFrame(np.zeros([3, len(target_features)]), columns=target_features)  #存储归一化参数
    
    for feature in dataset.columns.values.tolist():    #对列（特征名）进行遍历
        if feature in target_features:
            x = dataset[feature]
            x_avg = dataset[feature].mean()
            x_max = np.max(dataset[feature])
            x_min = np.min(dataset[feature])

            norm_params[feature] = [x_avg, x_max, x_min]
            dataset[feature] = (x - x_min) / (x_max - x_min + epsilon)
    return dataset,norm_params
    
def combine_dataset(trainDF, testDF):
    all = pd.concat([trainDF, testDF], axis=0)
    return all, (trainDF.shape[0], testDF.shape[0])

def separate_dataset(comb, num_comb):  
    train_dataset_size , test_dataset_size = num_comb
    trainDF = comb.iloc[:train_dataset_size]
    testDF = comb.iloc[train_dataset_size: (train_dataset_size + test_dataset_size)]
    return trainDF, testDF

   
def data_process_full(trainDF, testDF):
    all, num_comb = combine_dataset(trainDF, testDF)
    all = one_hot_process(all)
    all,norm_params = normalization(all)
    trainDF,testDF = separate_dataset(all, num_comb)
    return trainDF,testDF,norm_params

In [None]:
df_train_1 = pd.read_csv(dataroot_1,header=0,low_memory=False)  
df_train_2 = pd.read_csv(dataroot_2,header=0,low_memory=False)  
dataset = df_train_1.append([df_train_2])

In [None]:
dataset = filter_useless_feature(dataset)
dataset = filter_labels(dataset)

In [None]:
normal_df = dataset.loc[dataset['attack_cat'] == 0]
normal_df = normal_df.sample(n=90000,random_state=1, replace=False)
df_normal_train, df_normal_test = split_valid_from_train(normal_df, 0.2)

Fuzzers_df = dataset.loc[dataset['attack_cat'] == 1]
Fuzzers_df = Fuzzers_df.sample(n=5000,random_state=1, replace=False)
df_Fuzzers_train, df_Fuzzers_test = split_valid_from_train(Fuzzers_df, 0.2)

Analysis_df = dataset.loc[dataset['attack_cat'] == 2]
Analysis_df = Analysis_df.sample(n=5000,random_state=1, replace=True)
df_Analysis_train, df_Analysis_test = split_valid_from_train(Analysis_df, 0.2)

Backdoor_df = dataset.loc[dataset['attack_cat'] == 3]
Backdoor_df = Backdoor_df.sample(n=5000,random_state=1, replace=True)
df_Backdoor_train, df_Backdoor_test = split_valid_from_train(Backdoor_df, 0.2)

DoS_df = dataset.loc[dataset['attack_cat'] == 4]
DoS_df = DoS_df.sample(n=5000,random_state=1, replace=False)
df_DoS_train, df_DoS_test = split_valid_from_train(DoS_df, 0.2)

Exploits_df = dataset.loc[dataset['attack_cat'] == 5]
Exploits_df = Exploits_df.sample(n=5000,random_state=1, replace=False)
df_Exploits_train, df_Exploits_test = split_valid_from_train(Exploits_df, 0.2)

Generic_df = dataset.loc[dataset['attack_cat'] == 6]
Generic_df = Generic_df.sample(n=5000,random_state=1, replace=False)
df_Generic_train, df_Generic_test = split_valid_from_train(Generic_df, 0.2)

Reconnaissance_df = dataset.loc[dataset['attack_cat'] == 7]
Reconnaissance_df = Reconnaissance_df.sample(n=5000,random_state=1, replace=False)
df_Reconnaissance_train, df_Reconnaissance_test = split_valid_from_train(Reconnaissance_df, 0.2)

Shellcode_df = dataset.loc[dataset['attack_cat'] == 8]
Shellcode_df = Shellcode_df.sample(n=5000,random_state=1, replace=True)
df_Shellcode_train, df_Shellcode_test = split_valid_from_train(Shellcode_df, 0.2)

Worms_df = dataset.loc[dataset['attack_cat'] == 9]
Worms_df = Worms_df.sample(n=5000,random_state=1, replace=True)
df_Worms_train, df_Worms_test = split_valid_from_train(Worms_df, 0.2)


In [None]:
#整合
df_train = df_normal_train.append([df_Fuzzers_train, df_Analysis_train, df_Backdoor_train, df_DoS_train, df_Exploits_train, df_Generic_train, df_Reconnaissance_train, df_Shellcode_train, df_Worms_train])
df_test = df_normal_test.append([df_Fuzzers_test, df_Analysis_test, df_Backdoor_test, df_DoS_test, df_Exploits_test, df_Generic_test, df_Reconnaissance_test, df_Shellcode_test, df_Worms_test])


In [None]:
#独热编码和正则化
df_train, df_test, norm_params = data_process_full(
    df_train, df_test
)

In [None]:
#特征选择
df_train = feature_selection(df_train)
df_test = feature_selection(df_test)

In [None]:
#分离标签
df_train_X, df_train_y = labels_separate(df_train)
df_test_X, df_test_y = labels_separate(df_test)

In [None]:
thresholds = get_score_threshold(df_train_X, df_train_y)

In [None]:
print(thresholds)

In [None]:
df_train_X = create_distribution_score(df_train_X, thresholds)

In [None]:
df_test_X = create_distribution_score(df_test_X, thresholds)

In [None]:
df_train_y = df_train_y.astype('int')
df_test_y = df_test_y.astype('int')

In [None]:
print(df_train_X.shape)
print(df_test_X.shape)
print(df_train_y.shape)

In [None]:
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.metrics import classification_report
from tensorflow.keras.layers import BatchNormalization
#超参数设置
learning_rate = 0.001
epochs = 30
batch_size = 256
activation = tf.nn.relu         #非输出层下的激活函数

In [None]:
#用于记录训练中每个batch的loss
class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.train_losses_per_batch = []
        self.train_losses_per_epoch = []
        self.valid_losses_per_epoch = []
        
    def on_batch_end(self, batch, logs={}):
        self.train_losses_per_batch.append(logs.get('loss'))
        
    def on_epoch_end(self, epoch, logs={}):
        self.train_losses_per_epoch.append(logs.get('loss'))
        self.valid_losses_per_epoch.append(logs.get('val_loss')*4)   #验证集由于只有1/4的训练集大小所以损失要乘以4
#用于记录训练中每个epoch的召回率，精确度以及f1 score
class Metrics(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.val_f1s = []
        self.val_recalls = []
        self.val_precisions = []
        return

    def on_epoch_end(self, epoch, logs={}):
        predictions = self.model.predict(df_test_X)
        val_predict = np.argmax(predictions, axis=1)     #把独热编码转化成数字
        val_targ = df_test_y
        _val_recall = recall_score(val_targ, val_predict, average='macro')
        _val_precision = precision_score(val_targ, val_predict, average='macro')
        _val_f1 = f1_score(val_targ, val_predict, average='macro')
        self.val_f1s.append(_val_f1)
        self.val_recalls.append(_val_recall)
        self.val_precisions.append(_val_precision)
        return       
#回调：模型跑完epoch后选取最好的epoch模型保存，选取标准为验证集损失最小的那一个
checkpoint = keras.callbacks.ModelCheckpoint(      
    "./UNSW-NB15_model.h5",
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    mode='min',
    period=1
)
def simple_model():
    model = keras.Sequential([
        keras.layers.Dense(df_train_X.shape[1], activation=activation, input_shape=(df_train_X.shape[1],)),
        BatchNormalization(),
        keras.layers.Dense(128, activation=activation),
        BatchNormalization(),
        keras.layers.Dense(64, activation=activation),
        BatchNormalization(),        
        keras.layers.Dense(32, activation=activation),
        BatchNormalization(),
        keras.layers.Dense(10,activation=tf.nn.softmax)
    ])
    model.compile(optimizer =tf.train.AdamOptimizer(learning_rate=learning_rate),
        loss="sparse_categorical_crossentropy",
        metrics=['accuracy']
    )
    model.summary()
    history = LossHistory()
    metrics = Metrics()
    model.fit(
        df_train_X,df_train_y,
        validation_data=[df_test_X, df_test_y],
        batch_size=batch_size,epochs=epochs,
        callbacks=[history, metrics, checkpoint]
    )   
    return model,history,metrics
model,history,metrics = simple_model()

In [None]:
#绘制训练集在batch下的损失变化
plt.title('The Cost with batchs runs',fontsize=30)
plt.xlabel('batchs',fontsize=20)
plt.ylabel('Cost',fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.plot(history.train_losses_per_batch)
plt.gcf().set_size_inches(15,4)
plt.show()
#绘制训练集与验证集在epoch下的损失比较
plt.title('The Cost with epoches runs',fontsize=30)
plt.xlabel('epoch',fontsize=20)
plt.ylabel('Cost',fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.plot(history.train_losses_per_epoch, '-o', label='train')
plt.plot(history.valid_losses_per_epoch, '-o', label='valid')
plt.legend(fontsize=30,loc='upper right')
plt.gcf().set_size_inches(15,4)
plt.show()

In [None]:
#选取验证集准确率最高的模型
model = keras.models.load_model('./UNSW-NB15_model.h5') 
model.compile(optimizer =tf.train.AdamOptimizer(learning_rate=learning_rate),
    loss="sparse_categorical_crossentropy",
    metrics=['accuracy']
)
#测试集的正确率
def use_evaluate_test():
    test_loss,test_acc = model.evaluate(df_test_X,df_test_y)
    print('Test accuracy:', test_acc)
    return 
use_evaluate_test()

In [None]:
predictions = model.predict(df_test_X)
print(classification_report(df_test_y, np.argmax(predictions, axis=1) , target_names=labelList))

In [None]:
##############

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier


# 运行程序
clf = DecisionTreeClassifier(criterion='entropy', min_samples_leaf=3)
dt_clf = clf.fit(df_train_X, df_train_y)

In [None]:
predictions = dt_clf.predict(df_test_X)
print(predictions)
print(accuracy_score(df_test_y, predictions))

In [None]:
score = 0
for i in range(5):
    clf = DecisionTreeClassifier(criterion='gini', min_samples_leaf=3)
    clf.fit(df_train_X, df_train_y)
    y_pred = clf.predict(df_test_X)
    result = clf.score(df_test_X, df_test_y)
    if(score < result):
        score = result
        print(score)
        print(classification_report(df_test_y, y_pred, target_names=labelList))
        

In [None]:
from sklearn.ensemble import RandomForestClassifier
score = 0
for i in range(5):
    rfc = RandomForestClassifier()
    rfc.fit(df_train_X, df_train_y)
    y_pred = rfc.predict(df_test_X)
    result = rfc.score(df_test_X, df_test_y)
    if(score < result):
        score = result
        print(score)
        print(classification_report(df_test_y, y_pred, target_names=labelList))
        
#测试集的正确率
#测试集的各个参数

In [None]:
from xgboost import XGBClassifier
xgb = XGBClassifier()

score = 0
for i in range(5):
    xgb = XGBClassifier()
    xgb.fit(df_train_X, df_train_y)
    y_pred = xgb.predict(df_test_X)
    result = xgb.score(df_test_X, df_test_y)
    if(score < result):
        score = result
        print(score)
        print(classification_report(df_test_y, y_pred, target_names=labelList))
        
#测试集的正确率
#测试集的各个参数

In [None]:
from sklearn.svm import SVC
clf = SVC()
clf.fit(df_train_X, df_train_y) 


In [None]:
predictions = clf.predict(df_test_X)
print(predictions)
print(accuracy_score(df_test_y, predictions))
print(classification_report(df_test_y, predictions , target_names=labelList))

In [None]:
# from sklearn.ensemble import RandomForestClassifier
# score = 0
# for i in range(5):
#     rfc = RandomForestClassifier()
#     rfc.fit(T_train_X, df_train_y)
#     y_pred = rfc.predict(T_test_X)
#     result = rfc.score(T_test_X, df_test_y)
#     if(score < result):
#         score = result
#         print(score)
#         print(classification_report(df_test_y, y_pred, target_names=labelList))
        
# #测试集的正确率
# #测试集的各个参数