In [1]:
import tensorflow as tf
import os
import pickle
import numpy as np
DataSet = "model_input.npy"  # 数据集

In [2]:
def load_data(filename):
    """获取数据"""
    arr=np.load(filename,allow_pickle=True)
    arr1=[]
    arr2=[]
    for i in range(len(arr)):
        arr1.append(arr[i][0])
        arr2.append(arr[i][1])
    return arr1,arr2,len(arr1)
    
class CifarData:
    """打乱数据集"""
    def __init__(self, filenames, need_shuffle):  # 训练集需要打乱
        all_data = []
        all_labels = []
        '''for filename in filenames:
            data, labels = load_data(filename)
            all_data.append(data)
            all_labels.append(labels)'''
        data,labels,data_size=load_data(filenames)
        all_data.append(data)
        all_labels.append(labels)
        self._size=data_size
        self._data = np.vstack(all_data)  # 转为纵向矩阵,10000组，每组3072数
        self._data = np.reshape(self._data,(len(self._data),-1))  # 转为纵向矩阵,10000组，每组3072数
        # print(self._data)
        self._labels = np.hstack(all_labels)  # 转为横向矩阵，10000个数
        # print(self._data.shape)
        # print(self._labels.shape)
        
        self._num_examples = self._data.shape[0]  # 训练集总数量
        # print(self._num_examples)
        self._need_shuffle = need_shuffle
        self._indicator = 0  # 当前遍历数据集的位置
        if self._need_shuffle:  # 判断是否需要打乱数据
            self._shuffle_data()
    def getSize(self):
        return self._size

    def _shuffle_data(self):  # 打乱数据
        p = np.random.permutation(self._num_examples)
        self._data = self._data[p]
        self._labels = self._labels[p]
    
    def next_batch(self, batch_size):  # 数据分组，每次取不同的组
        """return batch_size examples as a batch."""
        end_indicator = self._indicator + batch_size
        if end_indicator > self._num_examples:  # 考察位置大于总数，重新打乱数据，重新分组
            if self._need_shuffle:  # 可以打乱
                self._shuffle_data()  # 重新打乱
                self._indicator = 0
                end_indicator = batch_size
            else:
                raise Exception("have no more examples")
        if end_indicator > self._num_examples:  # 分块大小过大
            raise Exception("batch size is larger than all examples")
        batch_data = self._data[self._indicator: end_indicator]
        batch_labels = self._labels[self._indicator: end_indicator]
        self._indicator = end_indicator
        return batch_data, batch_labels
    
# train_data=CifarData("train_file_1.npy",True)
test_data=CifarData("数据包/test_file_1.npy",False)

In [3]:
def conv_wrapper(inputs,name,output_channel=32,\
    kernel_size=(3,3),strides=1,activation=tf.nn.relu):
    """卷积层装饰器"""
    # with batch normalization:conv->bn->activation
    with tf.name_scope(name):
        conv2d=tf.layers.conv2d(inputs,
                         output_channel,
                         kernel_size,
                         strides=strides,
                         padding='same',
                         activation=None,
                         trainable=False,
                         data_format='channels_first',
                         name=name+'/conv2d')
        bn=tf.layers.batch_normalization(conv2d, training=False,trainable=False)
        return activation(bn)
        
def pooling_wrapper(inputs,name):
    """池化层装饰器"""
    return tf.layers.max_pooling2d(inputs,(2,2),(2,2),name=name,padding='same',data_format='channels_first')


In [4]:
# 定义计算图
x = tf.placeholder(tf.float32, [None, 11264])  # 设置占位符
x_image=tf.reshape(x,[-1,1,88,128])  # 图像规模
x_image=tf.transpose(x_image,perm=[0,2,3,1])  # 通道转换
y = tf.placeholder(tf.int64, [None])  # y为的标注

#conv1
conv1_1=conv_wrapper(x_image,'conv1_1')
conv1_2=conv_wrapper(conv1_1,'conv1_2')
conv1_3=conv_wrapper(conv1_2,'conv1_3')
pooling1=pooling_wrapper(conv1_3,'pool1')
#conv2
conv2_1=conv_wrapper(pooling1,'conv2_1',output_channel=64)
conv2_2=conv_wrapper(conv2_1,'conv2_2',output_channel=64)
conv2_3=conv_wrapper(conv2_2,'conv2_3',output_channel=64)
pooling2=pooling_wrapper(conv2_3,'pool2')
#conv3
conv3_1=conv_wrapper(pooling2,'conv3_1',output_channel=64)
conv3_2=conv_wrapper(conv3_1,'conv3_2',output_channel=64)
conv3_3=conv_wrapper(conv3_2,'conv3_3',output_channel=64)
pooling3=pooling_wrapper(conv3_3,'pool3')

#flat(平坦化)
flatten=tf.layers.flatten(pooling3)  # 在保留第0轴的情况下对输入的张量进行Flatten(扁平化)
#输出 全连接层 输出形状[?,10]
logits=tf.layers.dense(flatten,10)  # 全连接层

loss=tf.losses.sparse_softmax_cross_entropy(labels=y,logits=logits)  # 交叉熵损失函数：y_->softmax,y->onhot,loss=ylogy_

predict = tf.argmax(logits,1)  # 样本中分布的最大值的位置，得到index

correct_prediction = tf.equal(predict, y)  # 相等为1，不相等为0
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))  # 平均数即为准确率

with tf.name_scope('train_op'):
    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)  # adam优化器，定义梯度下降方法，使损失函数最小


In [5]:
# 执行计算图
correct_result=[[] for x in range(40)]
predict_result=[[] for x in range(40)]
saver = tf.train.Saver()
with tf.Session() as sess:
    init = tf.global_variables_initializer()  # 变量初始化
    sess.run(init)
    saver.restore(sess, "./VGGModel/VGG19999.ckpt")
    corr_cnt=0
    for i in range(test_data.getSize()):
        datas,labels=test_data.next_batch(1)
        pre_val = sess.run(
            predict,
            feed_dict={
                x: datas,
                y: labels})
        print('ID:%d correct_label:%d predict_label:%d' % (i,labels[0],pre_val))
        correct_result[labels[0]].append([i,pre_val[0]])
        predict_result[pre_val[0]].append([i,labels[0]])
        if labels[0]==pre_val:
            corr_cnt=corr_cnt+1
    print(corr_cnt)


INFO:tensorflow:Restoring parameters from ./VGGModel/VGG19999.ckpt
ID:0 correct_label:1 predict_label:1
ID:1 correct_label:1 predict_label:1
ID:2 correct_label:1 predict_label:1
ID:3 correct_label:1 predict_label:1
ID:4 correct_label:1 predict_label:1
ID:5 correct_label:2 predict_label:1
ID:6 correct_label:1 predict_label:1
ID:7 correct_label:1 predict_label:1
ID:8 correct_label:1 predict_label:1
ID:9 correct_label:1 predict_label:1
ID:10 correct_label:2 predict_label:1
ID:11 correct_label:1 predict_label:1
ID:12 correct_label:2 predict_label:1
ID:13 correct_label:1 predict_label:1
ID:14 correct_label:1 predict_label:1
ID:15 correct_label:1 predict_label:1
ID:16 correct_label:1 predict_label:1
ID:17 correct_label:1 predict_label:1
ID:18 correct_label:1 predict_label:1
ID:19 correct_label:1 predict_label:1
ID:20 correct_label:1 predict_label:1
ID:21 correct_label:1 predict_label:1
ID:22 correct_label:1 predict_label:1
ID:23 correct_label:1 predict_label:1
ID:24 correct_label:1 predict_l

ID:211 correct_label:1 predict_label:1
ID:212 correct_label:1 predict_label:1
ID:213 correct_label:2 predict_label:1
ID:214 correct_label:1 predict_label:1
ID:215 correct_label:1 predict_label:1
ID:216 correct_label:2 predict_label:1
ID:217 correct_label:1 predict_label:1
ID:218 correct_label:2 predict_label:1
ID:219 correct_label:2 predict_label:1
ID:220 correct_label:2 predict_label:1
ID:221 correct_label:1 predict_label:1
ID:222 correct_label:2 predict_label:1
ID:223 correct_label:2 predict_label:1
ID:224 correct_label:2 predict_label:1
ID:225 correct_label:1 predict_label:1
ID:226 correct_label:1 predict_label:1
ID:227 correct_label:1 predict_label:1
ID:228 correct_label:1 predict_label:1
ID:229 correct_label:2 predict_label:1
ID:230 correct_label:1 predict_label:1
ID:231 correct_label:2 predict_label:1
ID:232 correct_label:1 predict_label:1
ID:233 correct_label:0 predict_label:1
ID:234 correct_label:0 predict_label:1
ID:235 correct_label:1 predict_label:1
ID:236 correct_label:1 pr

In [23]:
dic=np.load("数据包/protocol_dictionary.npy",allow_pickle=True)[()]  # 协议字典
print("数据报文数目：%d" % test_data.getSize())
print("整体准确率：%.4f" % (corr_cnt/test_data.getSize()))
# 协议准确率、召回率等...
for i in range(40):
    TP=0
    FP=0
    FN=0
    for j in range(len(correct_result[i])):
        if(correct_result[i][j][1]==i):  # 预测正确
            TP+=1
        if(correct_result[i][j][1]!=i):  # 真预测为假
            FN+=1
    for j in range(len(predict_result[i])):
        if(predict_result[i][j][1]!=i):  # 假预测为真
            FP+=1
    if TP==0:
        print("%-8s \tTP:0 \tFN:%d \tFP:%d \t召回率:0 \t精准度:0 \tf1值:0" % (list(dic.keys())[i],FN,FP))
        continue    
    recall=TP/(TP+FN)
    prec=TP/(TP+FP)
    f1=2*recall*prec/(recall+prec)
    print("%-8s \tTP:%d \tFN:%d \tFP:%d \t召回率:%.4f \t精准度:%.4f \tf1值:%.4f" % (list(dic.keys())[i],TP,FN,FP,recall,prec,f1)) 

数据报文数目：249
整体准确率：0.7149
SSL      	TP:0 	FN:2 	FP:0 	召回率:0 	精准度:0 	f1值:0
TCP      	TP:178 	FN:0 	FP:71 	召回率:1.0000 	精准度:0.7149 	f1值:0.8337
TLSv1.2  	TP:0 	FN:69 	FP:0 	召回率:0 	精准度:0 	f1值:0
ARP      	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
NBNS     	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
LLMNR    	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
DNS      	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
NTP      	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
TLSv1    	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
UDP      	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
SNMP     	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
HTTP     	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
SSDP     	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
MDNS     	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
DHCPv6   	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
DB-LSP-DISC 	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
STP      	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
LLC      	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
BROWSER  	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	f1值:0
LLDP     	TP:0 	FN:0 	FP:0 	召回率:0 	精准度:0 	