In [None]:
import torch.nn as nn
import torch.optim as optim
import torch
import numpy as np
from torch.utils.data import TensorDataset, Dataset, DataLoader
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import pandas as pd

'''
borrowed from https://blog.csdn.net/fengdu78/article/details/104337519
'''

class mynet(nn.Module):
    def __init__(self,  act_layer = nn.ReLU()):
        super(mynet, self).__init__()
        self.act_layer = act_layer
        self.net = nn.Sequential(
            # [b, 41] => [b, rank]
            nn.Linear(41, 36),
            self.act_layer,
            nn.Linear(36, 24),
            self.act_layer,
            nn.Linear(24, 12),
            self.act_layer,
            nn.Linear(12, 6),
            self.act_layer,
            nn.Linear(6, 1),
            nn.Sigmoid(),
        )
 
 
    def forward(self, x):
        batchsize = x.size(0)
        x = x.view(batchsize, -1)
        x = self.net(x)
        # reshape
        x = x.view(batchsize, -1)
 
        return x

def fit(x, y, act_layer = nn.ReLU()):
    epochs = 10
    lr = 1e-3
    model = mynet(act_layer = nn.ReLU())
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    print(model)

    tensor_x=torch.from_numpy(x.astype(np.float32))
    tensor_y=torch.from_numpy(y.astype(np.float32))
    my_dataset=TensorDataset(tensor_x,tensor_y)
    my_dataset_loader=DataLoader(my_dataset,batch_size=2000,shuffle=False)

    for epoch in range(epochs):
        print(epoch)
        total_loss = 0
        for batchidx, (x, y) in enumerate(my_dataset_loader):
            pred = model(x)
            loss=criterion(pred,x)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss
        # if epoch % 100==0:

        print(total_loss.data.numpy())
    
    return model


train_df = pd.read_csv("data/kddcup99_train.csv",on_bad_lines='skip',header=None)
test_df = pd.read_csv("data/kddcup99_test.csv",on_bad_lines='skip',header=None)
attack_types = pd.read_table("data/trainning_attach_types", header=None,delim_whitespace=True)
attack_dict = {attack_types[0][i]+'.':1 for i in range(len(attack_types))}
attack_dict['normal.'] = 0
# attack_dict
train_df[41] = train_df[41].replace(attack_dict)
test_df[41] = test_df[41].replace(attack_dict)
map_dict = {}
for column in train_df.columns:
    if pd.api.types.is_object_dtype(train_df[column]):
        factorized_column, map = train_df[column].factorize()
        train_df[column] = factorized_column
        map_dict[column] = {map[i]:i for i in range(len(map))}

        _, map_t = test_df[column].factorize()
        # print([i for i in map_t if i not in map])
map_dict[2]['http_2784']=68
map_dict[2]['aol']=69
# map_dict
for column in test_df.columns:
    # print(column)
    if column in map_dict.keys():
        test_df[column] = test_df[column].replace(map_dict[column])

train_features_raw, train_labels = train_df.iloc[:,:-1], train_df.iloc[:,-1]
test_features_raw, test_labels = test_df.iloc[:,:-1], test_df.iloc[:,-1]

ae = fit(train_features_raw.to_numpy(), train_labels.to_numpy())
train_features = ae.encode(train_features_raw.to_numpy())
test_features = ae.encode(test_features_raw.to_numpy())
train_features

precision, recall, F1_score, _ = precision_recall_fscore_support(test_labels, y_pred, average=None)
acc = accuracy_score(test_labels, y_pred)
print("precision: {} \nrecall: {} \nF1 score: {} \naccuracy: {}".format(precision, recall, F1_score, acc))