In [None]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder, normalize
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeClassifier

# !pip install tensorflow==2.1.0
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Conv1D, MaxPool1D
from tensorflow.keras import regularizers
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.losses import KLDivergence, mean_squared_error

import keras
from keras.utils import np_utils

In [None]:
feature_dim = 32
encoding_dim = 16
ae_epoch = 30
clf_epoch = 30
batch_size = 32

In [None]:
df = pd.read_csv('../input/nslkdd/kdd_train.csv')
df.head(5)

In [None]:
le = LabelEncoder()

df['labels'] = le.fit_transform(df['labels'])
df['protocol_type'] = le.fit_transform(df['protocol_type'])
df['service'] = le.fit_transform(df['service'])
df['flag'] = le.fit_transform(df['flag'])

In [None]:
x = df.drop('labels', axis=1)
y = df.loc[:, ['labels']]

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=.2, random_state=42)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_train = pd.DataFrame(X_train)
X_test = sc.fit_transform(X_test)
X_test = pd.DataFrame(X_test)

In [None]:
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

In [None]:
X_train.shape

### Feature Reduction

In [None]:
DTC = DecisionTreeClassifier()
rfe = RFE(DTC, n_features_to_select= feature_dim).fit(X_train,y_train)
indices = np.where(rfe.support_==True)[0]
features = X_train.columns.values[indices]
X_train, X_test = X_train[features], X_test[features]

## Utils

### Custom Loss (Not in use, need to tweak it)

In [None]:
class HybridHuberLoss(tf.keras.losses.Loss):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, y_true, y_pred):
        mse = mean_squared_error(y_true, y_pred)
        kld = KLDivergence()
        kld_loss = kld(y_true, y_pred)
        return tf.where(mse<kld_loss, kld_loss, mse)
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "threshold": self.threshold}


### Learning Rate Sceduler

In [None]:
def LRschedulerCLF(epoch, lr=0.01):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

def LRschedulerAE(epoch):
    initial_lrate = 0.01
    drop = 0.005
    epochs_drop = 5.0
    lrate = initial_lrate * math.pow(drop,  
           math.floor((1+epoch)/epochs_drop))
    return lrate
    
clf_lr = tf.keras.callbacks.LearningRateScheduler(LRschedulerCLF)
ae_lr = tf.keras.callbacks.LearningRateScheduler(LRschedulerAE)

# Autoencoder

In [None]:
input_dim = X_train.shape[1]
ae_input_layer = Input(shape=(input_dim, ))

enc = Dense(32, activation="swish", activity_regularizer=regularizers.l1(10e-5))(ae_input_layer)
enc = BatchNormalization()(enc)
enc = Dense(encoding_dim, activation="swish")(enc)

dec = BatchNormalization()(enc)
dec = Dense(32, activation="swish")(dec)
dec = BatchNormalization()(enc)
dec = Dense(input_dim, activation="swish")(dec)

autoencoder = Model(inputs=ae_input_layer, outputs=dec)
encoder = Model(inputs=ae_input_layer, outputs=enc)

In [None]:
autoencoder.compile(optimizer='adam', loss="mean_squared_error", metrics=['accuracy'])

In [None]:
history = autoencoder.fit(X_train, X_train, 
                    epochs=ae_epoch,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_test, X_test),
                    callbacks = [ae_lr],
                    verbose=1).history

# Classifier + Autoencoder

In [None]:
encoder.trainable = False

input_layer = Input(shape=(feature_dim, ))

encoding = encoder(input_layer, training=False)

layer1 = Dense(64, activation="relu")(encoding)
layer1 = BatchNormalization()(layer1)
layer1 = Dropout(0.2)(layer1)

layer2 = Dense(64, activation="relu")(layer1)
layer2 = BatchNormalization()(layer2)
layer2 = Dropout(0.3)(layer2)

layer3 = Dense(128, activation="relu")(layer2)
layer3 = BatchNormalization()(layer3)
layer3 = Dropout(0.3)(layer3)

layer4 = Dense(128, activation="relu")(layer3)
layer4 = BatchNormalization()(layer4)
layer4 = Dropout(0.2)(layer4)

output_layer = Dense(23, activation="softmax")(layer4)

classifier = Model(inputs=input_layer ,outputs=output_layer)

In [None]:
classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'Precision', 'AUC'])

In [None]:
history = classifier.fit(X_train, y_train,
                    epochs=clf_epoch,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_test, y_test),
                    callbacks=[ae_lr],
                    verbose=1).history

## Model Evaluation

In [None]:
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score

In [None]:
y_preds = classifier.predict(X_test)
y_preds = np.round_(y_preds)

### Precision

In [None]:
print(precision_score(y_test, y_preds, average='micro', zero_division=0))
print(precision_score(y_test, y_preds, average='weighted', zero_division=0))

### F1 Score

In [None]:
print(f1_score(y_test, y_preds, average='micro', zero_division=0))
print(f1_score(y_test, y_preds, average='weighted', zero_division=0))