In [None]:
import tensorflow.keras as keras
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
#read data
path_train = "F:/数据集/UNSW_NB15/UNSW_NB15_CSV/UNSW_NB15_training-set.csv"
path_test = "F:/数据集/UNSW_NB15/UNSW_NB15_CSV/UNSW_NB15_testing-set.csv"
df_train=pd.read_csv(path_train, header=0,dtype='unicode')
df_test=pd.read_csv(path_test, header=0,dtype='unicode')
columns_full = df_train.columns.tolist()

x_train = df_train.iloc[:,:-1]
x_test = df_test.iloc[:,:-1]
columns = x_train.columns.tolist()
print(x_train.shape)
print(x_test.shape)

In [None]:
#divide training data to 1 or 0
df_class_Normal = df_train[df_train['label'] == '0']
df_class_Attack = df_train[df_train['label'] == '1']

In [None]:
#define MinMaxScaler normalize
def numerical_minmax_normalization (df, name):
    x = df[name].values.reshape(-1,1)
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    df[name] = x_scaled

In [None]:
from sklearn.model_selection import train_test_split
x_train_normal, x_test_normal = train_test_split(df_class_Normal, test_size=0.2, random_state=920)
x_train_attack, x_test_attack = train_test_split(df_class_Attack, test_size=0.2, random_state=920)

In [None]:
x_train_normal = x_train_normal.drop(['label'], axis=1)
x_test_normal = x_test_normal.drop(['label'], axis=1)
x_train_attack = x_train_attack.drop(['label'], axis=1)
x_test_attack = x_test_attack.drop(['label'], axis=1)

In [None]:
for i in range(len(columns)):
    numerical_minmax_normalization(x_train_normal,columns[i])
    numerical_minmax_normalization(x_train_attack,columns[i])
    numerical_minmax_normalization(x_test_attack,columns[i])
    numerical_minmax_normalization(x_test_normal,columns[i])

In [None]:
#show the number of normal and attack
num_Normal = len(df_class_Normal)
num_Attack = len(df_class_Attack)
plt.bar(['Normal', 'Attack'], [num_Normal, num_Attack], color='dodgerblue')
plt.show()

x_train_attack_len = len(x_train_attack)
x_train_normal_len = len(x_train_normal)
x_test_attack_len = len(x_test_attack)
x_test_normal_len = len(x_test_normal)

plt.bar(['x_train_attack', 'x_train_normal', 'x_test_attack', 'x_test_normal'], [x_train_attack_len, x_train_normal_len, x_test_attack_len, x_test_normal_len], color='dodgerblue')
plt.show()

In [None]:
test = df_test.drop(['label'], axis=1)

for i in range(len(columns)):
    numerical_minmax_normalization(test,columns[i])

In [None]:
#normal-AE
input_img = keras.Input(shape=(42,))
encoded = keras.layers.Dense(30, activation='relu',activity_regularizer=keras.regularizers.l1(10e-5))(input_img)
encoded = keras.layers.Dense(20, activation='relu')(encoded)

decoded = keras.layers.Dense(30, activation='relu')(encoded)
decoded = keras.layers.Dense(42, activation='relu')(decoded)

autoencoder_normal = keras.Model(input_img, decoded)

autoencoder_normal.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

autoencoder_normal.fit(x_train_normal, x_train_normal,
                epochs=100,
                batch_size=256,
                shuffle=True,
                validation_data=(x_test_normal, x_test_normal))

nor = autoencoder_normal.predict(test)
nor = pd.DataFrame(nor, columns=columns)
print(nor)


In [None]:
#attack-AE
input_img = keras.Input(shape=(42,))
encoded = keras.layers.Dense(30, activation='relu',activity_regularizer=keras.regularizers.l1(10e-5))(input_img)
encoded = keras.layers.Dense(20, activation='relu')(encoded)

decoded = keras.layers.Dense(30, activation='relu')(encoded)
decoded = keras.layers.Dense(42, activation='relu')(decoded)

autoencoder_attack = keras.Model(input_img, decoded)

autoencoder_attack.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])


autoencoder_attack.fit(x_train_attack, x_train_attack,
                epochs=100,
                batch_size=256,
                shuffle=True,
                validation_data=(x_test_attack, x_test_attack))

att = autoencoder_attack.predict(test)
att = pd.DataFrame(att, columns=columns)
print(att)


In [None]:
#combine sample of AE with testing_set
df_normal = (nor+test)/2
df_normal = pd.DataFrame(df_normal, columns=columns_full)
df_normal['label'] = df_test['label']
df_normal.to_csv("F:/数据集/UNSW_NB15/UNSW_NB15_CSV/test_nor.csv", index=0)

df_attack = (att+test)/2
df_attack = pd.DataFrame(df_attack, columns=columns_full)
df_attack['label'] = df_test['label']
df_attack.to_csv("F:/数据集/UNSW_NB15/UNSW_NB15_CSV/test_att.csv", index=0)

In [None]:
import rpy2.robjects as robjects
#train AddTree
robjects.r('library(rtemis)')

#normal
robjects.r('normal <- read.csv("F:/数据集/UNSW_NB15/UNSW_NB15_CSV/test_nor.csv")')
robjects.r('normal$Label <- factor(normal$label, levels = c(1,0))')
robjects.r('normal$label <- NULL')
robjects.r('res <- resample(normal, n.resamples = 3, resampler = "kfold", verbose = TRUE)')
robjects.r('normal.train <- normal[res$Fold_1, ]')
robjects.r('normal.test <- normal[-res$Fold_1, ]')
robjects.r('normal.addtree <- s.ADDTREE(normal.train, x.test = normal.test, gamma=.9,learning.rate=.01)')
normal_mat = robjects.r('normal.addtree$error.test$Overall')
#attack
robjects.r('attack <- read.csv("F:/数据集/UNSW_NB15/UNSW_NB15_CSV/test_att.csv")')
robjects.r('attack$Label <- factor(attack$label, levels = c(1,0))')
robjects.r('attack$label <- NULL')
robjects.r('res <- resample(attack, n.resamples = 3, resampler = "kfold", verbose = TRUE)')
robjects.r('attack.train <- attack[res$Fold_1, ]')
robjects.r('attack.test <- attack[-res$Fold_1, ]')
robjects.r('attack.addtree <- s.ADDTREE(attack.train, x.test = attack.test, gamma=.9,learning.rate=.01)')
attack_mat = robjects.r('attack.addtree$error.test$Overall')

print(normal_mat)
print(attack_mat)

In [None]:
#show the tree
img_normal = robjects.r('dplot3.addtree(normal.addtree)')
print(img_normal)
img_attack = robjects.r('dplot3.addtree(attack.addtree)')
print(img_attack)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
def voting_machine():
    #data
    #df = pd.read_csv('F:/数据集/UNSW_NB15/UNSW_NB15_CSV/nn.csv', header=0)
    train_data = df.iloc[:,:2]
    train_target = df.iloc[:,2]
    
    #for i in range(len(train_target)):
    #    train_target[i] = train_target[i]-1
    x_train, x_test, y_train, y_test = train_test_split(train_data, train_target, test_size=0.4, random_state=0)
    y_train = keras.utils.to_categorical(y_train,2)
    y_test = keras.utils.to_categorical(y_test,2)
    #model
    model = keras.Sequential()
    model.add(keras.layers.Dense(4,activation='relu',input_shape=(2,), name="Dense_1"))
    model.add(keras.layers.Dropout(0.001))
    model.add(keras.layers.Dense(2, activation='softmax', name="Dense_2"))
    model.summary()
    model.compile(optimizer=keras.optimizers.Adam(),loss='categorical_crossentropy',metrics=['accuracy'])
    model.fit(x_train, y_train, batch_size=64, epochs=50, validation_data=(x_test,y_test))
    
    pre_l = []
    y_test_l = []
    pre = model.predict(x_test)
    
    
    for i in range(len(pre)):
        pre_l.append(list(pre[i]).index(max(list(pre[i]), key = abs)))
        y_test_l.append(list(y_test[i]).index(max(list(y_test[i]), key = abs)))
    
    print("accuracy is %.4f"%accuracy_score(y_test_l, pre_l))
    print("precision is %.4f"%precision_score(y_test_l, pre_l))
    print("recall is %.4f"%recall_score(y_test_l, pre_l))
    print("f1_score is %.4f"%f1_score(y_test_l, pre_l))
    """
    score = model.evaluate(x_test, y_test)
    print("loss:",score[0])
    print("accu:",score[1])
    
    weight_Dense_1,bias_Dense_1 = model.get_layer('Dense_1').get_weights()
    print(weight_Dense_1.round(4))
    print(bias_Dense_1.round(4))
    print('---------------------------------------------')
    weight_Dense_2,bias_Dense_2 = model.get_layer('Dense_2').get_weights()
    print(weight_Dense_2.round(4))
    print(bias_Dense_2.round(4))
    """

In [None]:
normal_pre = robjects.r('predict(normal.addtree, test)')
attack_pre = robjects.r('predict(attack.addtree, test)')
df = pd.DataFrame(columns=[normal,attack,label])
df['normal'] = normal_pre
df['attack'] = attack_pre
df['label'] = df_test['label']
voting_machine(df)