In [None]:
!pip install wandb

In [None]:
%cd drive/MyDrive/TOR_classfication/
!nvidia-smi

/content/drive/MyDrive/TOR_classfication
Mon May 31 04:17:47 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    24W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+----------------------------------------------

In [None]:
import pandas as pd
import pickle
import torch
from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm
from torch import nn
from torch.nn import functional as F
import wandb
import copy
import math
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [None]:
!wandb login 794db4eff86a98642f46c66ffe2f8cdc98fd4e14

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
class CausalConv1d(nn.Module):
    """
    输入输出形状一致
    扩张因果卷积
    只适用 stride=1 or 2 其余情况等式可能会发生变化
    """

    def __init__(self, in_size, out_size, kernel_size, stride=1, dilation=1):
        """
        :param in_size: int 输入通道数
        :param out_size: int 输出通道数
        :param kernel_size: int 卷积核大小
        :param stride: int 步幅
        :param dilation: int 扩张率
        """
        super(CausalConv1d, self).__init__()
        self.pad = (kernel_size - 1) * dilation
        self.bias = self.pad // stride
        self.conv1 = nn.Conv1d(in_size, out_size, kernel_size, padding=self.pad, stride=stride, dilation=dilation)

    def forward(self, x):
        x = self.conv1(x)
        x = x[..., :-self.bias]
        return x


class Residual(nn.Module):
    def __init__(self, input_channels, output_channels, use_1x1conv=False, strides=1):
        """
        :param input_channels: int, 输入通道数
        :param output_channels: int, 输出通道数
        :param use_1x1conv: boolean 是否使用1x1卷积核
        :param strides: int 第一个因果卷积块的stride 默认为1
        """
        super().__init__()
        self.conv1 = CausalConv1d(input_channels, output_channels, kernel_size=3, stride=strides, dilation=1)
        self.conv2 = CausalConv1d(output_channels, output_channels, kernel_size=3, dilation=2)
        if use_1x1conv:
            self.conv3 = nn.Conv1d(input_channels, output_channels, kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.conv4 = CausalConv1d(output_channels, output_channels, kernel_size=3, dilation=4)
        self.conv5 = CausalConv1d(output_channels, output_channels, kernel_size=3, dilation=8)

        self.bn1 = nn.BatchNorm1d(output_channels, eps=1e-5)
        self.bn2 = nn.BatchNorm1d(output_channels, eps=1e-5)
        self.bn3 = nn.BatchNorm1d(output_channels, eps=1e-5)
        self.bn4 = nn.BatchNorm1d(output_channels, eps=1e-5)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, X):
        Y1 = self.bn2(self.conv2(self.relu(self.bn1(self.conv1(X)))))
        if self.conv3:
            X = self.conv3(X)
        Y1 = self.relu(Y1 + X)
        Y = self.bn4(self.conv5(self.relu(self.bn3(self.conv4(Y1)))))
        return self.relu(Y + Y1)


class TCN(nn.Module):
    def __init__(self):
        super().__init__()
        self.residual0 = nn.Sequential(nn.Conv1d(1, 64, kernel_size=9, padding=4, dilation=1, stride=2),
                                       nn.BatchNorm1d(64, eps=1e-5), nn.ReLU(),
                                       nn.MaxPool1d(kernel_size=5, stride=2, padding=2), nn.Dropout(p=0.1))
        self.residual1 = nn.Sequential(Residual(input_channels=64, output_channels=64), nn.Dropout(p=0.1))
        self.residual2 = nn.Sequential(Residual(input_channels=64, output_channels=128, use_1x1conv=True, strides=2),
                                       nn.Dropout(p=0.1))
        self.residual3 = nn.Sequential(Residual(input_channels=128, output_channels=256, use_1x1conv=True, strides=2),
                                       nn.Dropout(p=0.1))
        self.residual4 = nn.Sequential(Residual(input_channels=256, output_channels=512, use_1x1conv=True, strides=2),
                                       nn.Dropout(p=0.1))
        self.out = nn.Sequential(nn.AdaptiveAvgPool1d(1), nn.Flatten())

    def forward(self, X):
        Y = self.residual4(self.residual3(self.residual2(self.residual1(self.residual0(X)))))
        return self.out(Y)


class Combine_net(nn.Module):
    def __init__(self):
        super().__init__()
        self.tcn = TCN()
        self.mlp = nn.Sequential(nn.Flatten(), nn.Linear(5, 32), nn.BatchNorm1d(32), nn.ReLU())
        self.combine = nn.Sequential(nn.Linear(512 + 32, 256), nn.BatchNorm1d(256), nn.ReLU(),
                                     nn.Dropout(p=0.5), nn.Linear(256, 95))

    def forward(self, X):
        Dir = X[..., :-5]
        metadata = X[..., -5:]
        Y = torch.cat((self.tcn(Dir), self.mlp(metadata)), dim=1)
        return self.combine(Y)


net = Combine_net()

In [None]:
def train(net, train_iter, dev_iter, num_epochs, lr, save_name, device):
    # 使用gpu训练模型
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv1d:
            nn.init.xavier_uniform_(m.weight)

    net.apply(init_weights)
    net.to(device)
    wandb.watch(net, log="all")
    optimizer = torch.optim.Adam(net.parameters(), lr=lr,weight_decay=1e-3)
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=math.sqrt(0.1), patience=2,verbose=True,min_lr=1e-5)
    loss = nn.CrossEntropyLoss()
    num_batchs = len(train_iter)
    best_dev_acc = 0
    count = 0
    for epoch in range(num_epochs):
        metric = Accumulator(3)
        net.train()
        for i, (X, y) in enumerate(train_iter):
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], accuracy(y_hat, y), X.shape[0])
            if (i + 1) % (num_batchs // 5) == 0 or i == num_batchs - 1:
                train_l = metric[0] / metric[2]
                train_acc = metric[1] / metric[2]
                wandb.log({"Train Accuracy": train_acc, "Training Loss": l, "Epoch": epoch + (i + 1) / num_batchs})
        dev_l, dev_acc = evaluate_accuracy_gpu(net, loss, dev_iter)
        scheduler.step(dev_acc)
        wandb.log({"Dev Accuracy": dev_acc, "Dev Loss": dev_l, "Epoch": epoch + 1})
        if dev_acc > best_dev_acc:
            best_model = copy.deepcopy(net)
            best_dev_acc=dev_acc
            best_epoch = epoch + 1
            count = 0
        else:
            count += 1
            if count == 10:
                print("early stop!")
                print("best model epoch:%d dev_acc:%.4f" % (best_epoch, best_dev_acc))
                torch.save(best_model.state_dict(), save_name)
                #torch.save(best_model, save_name)
                break

In [None]:
def proc_x_metadata(in_data):
    ans = torch.zeros(in_data.shape[0], 1, in_data.shape[2] + 5)
    for i in range(in_data.shape[0]):
        in_packet = in_data[i].eq(-1).sum()
        out_packet = in_data[i].eq(1).sum()
        total_packet = in_packet + out_packet
        metadata = torch.tensor(
            [total_packet, in_packet, out_packet, in_packet / total_packet, out_packet / total_packet],
            dtype=torch.float32).reshape(1, -1)
        ans[i] = torch.cat((in_data[i], metadata), dim=1)
    return ans


def LoadDataNoDefCW(batch_size):
    print("Loading non-defended dataset for closed-world scenario")
    # Point to the directory storing data
    dataset_dir = 'ClosedWorld/NoDef/'

    # X represents a sequence of traffic directions
    # y represents a sequence of corresponding label (website's label)

    # Load training data
    with open(dataset_dir + 'X_train_NoDef.pkl', 'rb') as handle:
        X_train = torch.tensor(pickle.load(handle, encoding='bytes'), dtype=torch.float32)
        X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
        X_train = proc_x_metadata(X_train)
    with open(dataset_dir + 'y_train_NoDef.pkl', 'rb') as handle:
        y_train = torch.tensor(pickle.load(handle, encoding='bytes'), dtype=torch.int64)

    # Load validation data
    with open(dataset_dir + 'X_valid_NoDef.pkl', 'rb') as handle:
        X_valid = torch.tensor(pickle.load(handle, encoding='bytes'), dtype=torch.float32)
        X_valid = X_valid.reshape(X_valid.shape[0], 1, X_valid.shape[1])
        X_valid = proc_x_metadata(X_valid)
    with open(dataset_dir + 'y_valid_NoDef.pkl', 'rb') as handle:
        y_valid = torch.tensor(pickle.load(handle, encoding='bytes'), dtype=torch.int64)

    print("Data dimensions:")
    print("X: Training data's shape : ", X_train.shape)
    print("y: Training data's shape : ", y_train.shape)
    print("X: Validation data's shape : ", X_valid.shape)
    print("y: Validation data's shape : ", y_valid.shape)

    # return X_train, y_train, X_valid, y_valid
    return (DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True, num_workers=2),
            DataLoader(TensorDataset(X_valid, y_valid), batch_size=batch_size, shuffle=True, num_workers=2))

In [None]:
class Accumulator:
    """For accumulating sums over `n` variables."""

    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


# 准确率计算 返回准确的个数
def accuracy(y_hat, y):
    if len(y_hat.shape) > 1 and y_hat.shape[0] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())


def evaluate_accuracy_gpu(net, loss, data_iter, device=None):
    # 在gpu上评估模型精度
    net.eval()
    if not device:
        device = next(iter(net.parameters())).device
    metric = Accumulator(3)
    for X, y in data_iter:
        X, y = X.to(device), y.to(device)
        y_hat = net(X)
        l = loss(y_hat, y)
        metric.add(l * X.shape[0], accuracy(y_hat, y), X.shape[0])
    return metric[0] / metric[2], metric[1] / metric[2]


def try_gpu(i=0):
    """Return gpu(i) if exists, otherwise return cpu()."""
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')


# Top k 预测 计算准确数  默认top1
def top_k_accuracy(y_hat, y, k=1):
    if len(y_hat.shape) > 1 and y_hat.shape[0] > 1:
        values, indices = y_hat.topk(k, dim=1)
    cmp = indices.type(y.dtype) == y.reshape(y.shape[0], 1)
    return float(cmp.type(y.dtype).sum())


# 默认top1
def evaluate_top_k_accuracy_gpu(net, data_iter, device=None, k=1):
    # 在gpu上评估模型精度
    net.eval()
    if not device:
        device = next(iter(net.parameters())).device
    metric = Accumulator(2)
    for X, y in data_iter:
        X = X.to(device)
        y = y.to(device)
        metric.add(top_k_accuracy(net(X), y, k), y.numel())
    return metric[0] / metric[1]

In [None]:
wandb.init(project="NoDef")
config = wandb.config
config.batch_size = 256
config.epochs = 100
config.lr = 0.001
config.save_name = "NoDef.pkl"

[34m[1mwandb[0m: Currently logged in as: [33mfuwafuwa[0m (use `wandb login --relogin` to force relogin)


In [None]:
train_iter, dev_iter = LoadDataNoDefCW(config.batch_size)

Loading non-defended dataset for closed-world scenario
Data dimensions:
X: Training data's shape :  torch.Size([76000, 1, 5005])
y: Training data's shape :  torch.Size([76000])
X: Validation data's shape :  torch.Size([9500, 1, 5005])
y: Validation data's shape :  torch.Size([9500])


In [None]:
train(net, train_iter, dev_iter, config.epochs, config.lr, config.save_name, try_gpu())

Epoch    11: reducing learning rate of group 0 to 3.1623e-04.
Epoch    17: reducing learning rate of group 0 to 1.0000e-04.
Epoch    23: reducing learning rate of group 0 to 3.1623e-05.
Epoch    29: reducing learning rate of group 0 to 1.0000e-05.
early stop!
best model epoch:26 dev_acc:0.9860


In [None]:
dataset_dir = 'ClosedWorld/NoDef/'
# Load testing data
with open(dataset_dir + 'X_test_NoDef.pkl', 'rb') as handle:
    X_test = torch.tensor(pickle.load(handle, encoding='bytes'), dtype=torch.float32)
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
    X_test = proc_x_metadata(X_test)
with open(dataset_dir + 'y_test_NoDef.pkl', 'rb') as handle:
    y_test = torch.tensor(pickle.load(handle, encoding='bytes'), dtype=torch.int64)
print("Data dimensions:")
print("X: Testing data's shape : ", X_test.shape)
print("y: Testing data's shape : ", y_test.shape)

net=Combine_net()
batch_size = 256
save_name = "NoDef.pkl"
test_iter = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size, shuffle=True, num_workers=0)
net.load_state_dict(torch.load(save_name))
net.to(try_gpu())
test_acc = evaluate_top_k_accuracy_gpu(net, test_iter, k=1)
print(f'test acc {test_acc:.4f}')

Data dimensions:
X: Testing data's shape :  torch.Size([9500, 1, 5005])
y: Testing data's shape :  torch.Size([9500])
test acc 0.9857
