In [1]:
import pickle
import os
from easydict import EasyDict
from utils import *
import matplotlib.pyplot as plt
import torch
import numpy as np
import random
# set experiment configs
opt = EasyDict()
# choose a dataset from ["quarter-circle", "half-circle"]
opt.data = "half-circle"
# choose a model from ["CIDA", "PCIDA", "ADDA", "SO", "DANN", "CDANN", "MDD", "CUA"]
opt.model = "ADDA"
# choose run on which device ["cuda", "cpu"]
opt.device = "cpu"

# set random seed
opt.seed = 2333

print("numpy version:", np.__version__)
print("pytorch version:", torch.__version__)

numpy version: 1.18.5
pytorch version: 1.5.0


In [2]:
from imblearn.datasets import make_imbalance

with open('/Users/avin/Desktop/Intrusion Detection/train.pk', 'rb') as f:
    xx, yy = pickle.load(f)
with open('/Users/avin/Desktop/Intrusion Detection/test.pk', 'rb') as f:
    xx_test,yy_test = pickle.load(f)
X,Y = make_imbalance(xx, yy, sampling_strategy={'normal':1500, 'injection':500, 'impersonation':500, 'flooding':500},random_state=0)
X_test, Y_test= make_imbalance(xx_test, yy_test, sampling_strategy={'normal':2700, 'injection':100, 'impersonation':100, 'flooding':100},random_state=0)
#xx, yy = make_imbalance(X, Y, sampling_strategy={'normal':3000, 'injection':1000, 'impersonation':1000, 'flooding':1000},random_state=0)


In [3]:
import keras
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelBinarizer

encoder = LabelEncoder()
binarizer = LabelBinarizer()

encoded_y = encoder.fit_transform(Y)
encoded_y_test = encoder.fit_transform(Y_test)


In [4]:
#for i in range(0,encoded_y.shape[0]):
#    if encoded_y[i]==0:
#        encoded_y[i]=1
#    if encoded_y[i]==2:
#        encoded_y[i]=1
#for i in range(0,encoded_y.shape[0]):
#    if encoded_y[i]==3:
#        encoded_y[i]=0
#print(np.count_nonzero(encoded_y == 0))
#print(np.count_nonzero(encoded_y == 1))#outlier

#for i in range(0,encoded_y_test.shape[0]):
#    if encoded_y_test[i]==0:
#        encoded_y_test[i]=1
#    if encoded_y_test[i]==2:
#        encoded_y_test[i]=1
#for i in range(0,encoded_y_test.shape[0]):
#    if encoded_y_test[i]==3:
#        encoded_y_test[i]=0
#print(np.count_nonzero(encoded_y_test == 0))
#print(np.count_nonzero(encoded_y_test == 1))#outlier

In [5]:
# training configs
opt.num_epoch = 100
opt.batch_size = 10
opt.lr = 1e-4
opt.lr_T = 1e-4
opt.gamma = 100
opt.beta1 = 0.9
opt.weight_decay = 5e-4
opt.wgan = False
opt.no_bn = True  # do not use batch normalization # True

# model size configs
opt.nx = 90  # dimension of the input data
opt.nh = 800  # dimension of hidden
opt.nc = 4  # number of label class

# dataset configs

opt.dim_domain = 1  # dimension of domain index

# number of domains in the dataset
if opt.data == "quarter-circle":
    opt.num_domain = 15
    opt.num_source = 6
    opt.normalize_domain = False
elif opt.data == "half-circle":
    opt.num_domain = 2
    opt.num_source = 1
    opt.normalize_domain = False    
elif opt.data == "sine":
    opt.num_domain = 12
    opt.num_source = 5
    opt.normalize_domain = True  # normalize data per domain
else:
    assert False, "Can't find data"
opt.num_target = opt.num_domain - opt.num_source

# model specific configs

opt.cond_disc = False  # use conditional discriminator
opt.continual_da = False  # use continual domain adaptation

opt.lambda_gan = 2.0

if opt.model == 'CIDA':
    opt.lambda_gan = 0.4

elif opt.model == 'PCIDA':
    opt.lambda_gan = 1.0
    opt.nmix = 1  # number of mix guassian for the discriminator prediction
    # opt.no_bn = False

elif opt.model == "MDD":
    opt.lambda_src = 1.0
    opt.lambda_tgt = 1.0
    opt.lambda_gan = 2.0
    opt.num_epoch = 160  # early stop (optional) 

elif opt.model == 'CDANN':
    opt.cond_disc = True

elif opt.model == 'CUA':
    opt.continual_da = True
    opt.num_da_step = 5  # number of steps of domain adaptation
    opt.num_epoch_pre = 10  # number of epochs of pretraining in source domain
    opt.num_epoch_sub = 50  # number of epochs of adapting to a new sub target domain
    opt.lr_decay_period = 500
    opt.lambda_gan = 1.0
    opt.lambda_rpy = 0.3

opt.exp = opt.data + '_' + opt.model
opt.outf = './dump/' + opt.exp
os.system('mkdir -p ' + opt.outf)

opt.use_resample = False

In [6]:
from dataset import *
from plot import plot_dataset
import pandas as pd

def select_columns(data_frame, column_names):
    new_frame = data_frame.loc[:, column_names]
    return new_frame

#data_pkl = read_pickle(f'./data/{opt.data}.pkl')
#data_pkl = X
selected_columns = ['frame.time_epoch','frame.time_delta', 'frame.time_delta_displayed', 'frame.time_relative', 'frame.len', 'frame.cap_len', 'radiotap.length', 'radiotap.present.tsft', 'radiotap.present.flags', 'radiotap.present.channel', 'radiotap.present.dbm_antsignal', 'radiotap.present.antenna', 'radiotap.present.rxflags', 'radiotap.mactime', 'radiotap.flags.fcs', 'radiotap.datarate', 'radiotap.channel.freq', 'radiotap.channel.type.cck', 'radiotap.channel.type.ofdm','radiotap.channel.type.2ghz', 'radiotap.dbm_antsignal','radiotap.antenna', 'wlan.fc.type_subtype', 'wlan.fc.type', 'wlan.fc.subtype', 'wlan.fc.ds', 'wlan.fc.frag', 'wlan.fc.retry', 'wlan.fc.pwrmgt', 'wlan.fc.moredata', 'wlan.fc.protected', 'wlan.duration', 'wlan.ra', 'wlan.da', 'wlan.ta', 'wlan.sa', 'wlan.bssid', 'wlan.frag', 'wlan.seq', 'wlan.bar.type', 'wlan.ba.control.ackpolicy', 'wlan.ba.control.cbitmap', 'wlan.ba.bm', 'wlan.fcs_good', 'wlan_mgt.fixed.capabilities.ess', 'wlan_mgt.fixed.capabilities.ibss', 'wlan_mgt.fixed.capabilities.cfpoll.ap', 'wlan_mgt.fixed.capabilities.privacy', 'wlan_mgt.fixed.capabilities.preamble', 'wlan_mgt.fixed.capabilities.pbcc', 'wlan_mgt.fixed.capabilities.agility', 'wlan_mgt.fixed.capabilities.spec_man', 'wlan_mgt.fixed.capabilities.short_slot_time', 'wlan_mgt.fixed.capabilities.apsd', 'wlan_mgt.fixed.capabilities.radio_measurement', 'wlan_mgt.fixed.capabilities.dsss_ofdm', 'wlan_mgt.fixed.capabilities.del_blk_ack', 'wlan_mgt.fixed.capabilities.imm_blk_ack', 'wlan_mgt.fixed.listen_ival', 'wlan_mgt.fixed.current_ap', 'wlan_mgt.fixed.status_code', 'wlan_mgt.fixed.timestamp', 'wlan_mgt.fixed.beacon', 'wlan_mgt.fixed.aid', 'wlan_mgt.fixed.reason_code', 'wlan_mgt.fixed.auth.alg', 'wlan_mgt.fixed.auth_seq', 'wlan_mgt.fixed.sequence', 'wlan_mgt.tagged.all', 'wlan_mgt.ds.current_channel', 'wlan_mgt.tim.dtim_count', 'wlan_mgt.tim.dtim_period', 'wlan_mgt.tim.bmapctl.multicast', 'wlan_mgt.country_info.environment', 'wlan_mgt.rsn.version', 'wlan_mgt.rsn.gcs.type', 'wlan_mgt.rsn.pcs.count', 'wlan_mgt.rsn.akms.count', 'wlan_mgt.rsn.akms.type', 'wlan_mgt.rsn.capabilities.preauth', 'wlan_mgt.rsn.capabilities.ptksa_replay_counter', 'wlan_mgt.tcprep.trsmt_pow', 'wlan.wep.iv', 'wlan.wep.key', 'wlan.wep.icv','wlan.tkip.extiv', 'wlan.ccmp.extiv', 'wlan.qos.tid', 'wlan.qos.priority', 'data.len']

data_df = select_columns(X, selected_columns)
data= data_df.to_numpy()
label=encoded_y

data_df1= select_columns(X_test, selected_columns)
data1= data_df1.to_numpy()
label1=encoded_y_test


#domain= data_pkl['domain']
domain= np.empty((0))
domain1= np.empty((0))
for i in range(15):
    for j in range(200):
        domain = np.append(domain, 0)

for i in range(15):
    for j in range(200):
        domain1 = np.append(domain1, 1)


print(domain.shape)
print(domain1.shape)

#print(awid.data)
#save to pickle file
#with open('awid50.pkl', 'wb') as f:
#    pickle.dump(awid, f)



#create dictionary

awid={
    'data': data,
    'label': label,
    'domain': domain
}
awid1={
    'data': data1,
    'label': label1,
    'domain': domain1
}


datasets = [ToyDataset(awid,  0, opt)]  # sub dataset for each domain
datasets1 = [ToyDataset(awid1, 1, opt)] # sub dataset for each domain

datasets_combined= [datasets[0],datasets1[0]]
#datasets_combined= [datasets[0],datasets[1],datasets[2],datasets[3],datasets[4],datasets[5],datasets[6],datasets[7],datasets[8],datasets[9],datasets[10],datasets[11],datasets[12],datasets[13],datasets[14],datasets1[0],datasets1[1],datasets1[2],datasets1[3],datasets1[4],datasets1[5],datasets1[6],datasets1[7],datasets1[8],datasets1[9],datasets1[10],datasets1[11],datasets1[12],datasets1[13],datasets1[14]]
dataset = SeqToyDataset(datasets_combined, size=len(datasets_combined[0]))  # mix sub dataset to a large one
#dataset = SeqToyDataset(datasets, size=len(datasets[0]))  # mix sub dataset to a large one


dataloader = DataLoader(
    dataset=dataset,
    shuffle=True,
    batch_size=opt.batch_size
)


(3000,)
(3000,)
SeqDataset Size 3000 Sub Size [3000, 3000]


In [7]:
from model import get_model

# set random seed (for reproducibility)
np.random.seed(opt.seed)
random.seed(opt.seed)
torch.manual_seed(opt.seed)

# build the model
modelClass = get_model(opt.model)
print(modelClass)
model = modelClass(opt)
model.to(opt.device)
print(model)

if opt.normalize_domain:
    model.set_data_stats(
        dm=[d.data_m for d in datasets],
        ds=[d.data_s for d in datasets],
    )
    
# train the model
if not opt.continual_da:
    # one-step adaptation
    for epoch in range(opt.num_epoch):
        model.learn(epoch, dataloader)
        if (epoch + 1) % 100 == 0 or (epoch + 1) == opt.num_epoch:
            model.save()
            # model.visualize_D()
            # model.visualize_F()
            # model.visualize_E()
        if (epoch + 1) % 50 == 0:    
            model.test(epoch, dataloader)
else:
    # pretrain on source
    print('===> pretrain the classifer')
    model.prepare_trainer(init=True)
    for epoch in range(opt.num_epoch_pre):
        model.learn(epoch, dataloader, init=True)
        if (epoch + 1) % 10 == 0:
            model.save()
            model.visualize_F()
            model.test(epoch, dataloader)
    # step-by-step domain adapt
    ds_size = len(datasets[0])
    replay_datasets = [datasets[i] for i in range(opt.num_source)]
    print('===> start continual DA')
    model.prepare_trainer(init=False)
    for phase in range(opt.num_source, opt.num_domain):
        continual_dataset = SeqToyDataset(replay_datasets, size=ds_size)
        continual_dataloader = DataLoader(
            dataset=continual_dataset,
            shuffle=True,
            batch_size=opt.batch_size,
            num_workers=4,
        )
        model.set_phase(phase)
        for epoch in range(opt.num_epoch_sub):
            model.learn(epoch, (dataloader, continual_dataloader), init=False)
        # model.visualize_F(phase)
        model.save()
        model.test(epoch, dataloader)
        replay_data, replay_label = model.gen_replay_dataset(dataloader)
        replay_datasets.append(ReplayDataset(replay_data, replay_label, opt))

<class 'model.ADDA'>
===> Discrinimator Output Activation: sigmoid
ADDA(
  (netE): FeatureNet(
    (fc1): Linear(in_features=90, out_features=800, bias=True)
    (fc2): Linear(in_features=1600, out_features=1600, bias=True)
    (fc3): Linear(in_features=1600, out_features=1600, bias=True)
    (fc4): Linear(in_features=1600, out_features=1600, bias=True)
    (fc_final): Linear(in_features=1600, out_features=800, bias=True)
    (fc1_var): Linear(in_features=1, out_features=800, bias=True)
    (fc2_var): Linear(in_features=800, out_features=800, bias=True)
  )
  (netF): PredNet(
    (fc3): Linear(in_features=800, out_features=800, bias=True)
    (bn3): Identity()
    (fc4): Linear(in_features=800, out_features=800, bias=True)
    (bn4): Identity()
    (fc_final): Linear(in_features=800, out_features=4, bias=True)
  )
  (netD): DiscNet(
    (fc3): Linear(in_features=800, out_features=800, bias=True)
    (bn3): Identity()
    (fc4): Linear(in_features=800, out_features=800, bias=True)
    (

##### 