In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
import tensorflow as tf
from tensorflow import keras
tf.config.set_visible_devices([], 'GPU')

In [2]:
import pandas as pd
import numpy as np
def load_dataset(path):
    df=pd.read_csv(path)
 
    assert df.isna().any().any()==False, 'There is at least one missing value.'
    assert df['Timestamp'].is_monotonic_increasing, 'Timestamp is not sorted.'
 
    df['abstime']=pd.to_datetime(df['Timestamp'], unit='s').round('us')
    df['monotime']=df['Timestamp']-df['Timestamp'].min()
    df['aid_int']=df['Arbitration_ID'].map(lambda x: int(x,16))
    df['y']=df['Class'].map({'Normal': 0, 'Attack': 1})

    df['time_interval']=df.groupby('Arbitration_ID')['Timestamp'].diff()

    return df
dfx=[]
for i in range(0,3):
    df_stationary_path="./0_Preliminary/0_Training/Pre_train_D_{t}.csv".format(t=i)
    dfx.append(load_dataset(df_stationary_path))

df_stationary=pd.concat([dfx[0],dfx[1],dfx[2]],ignore_index=True)

def get_H(series_aid):
    count=series_aid.value_counts()
    p_i=count/series_aid.shape[0]
    return -(p_i*np.log(p_i)).sum()

df_stationary['entropy']=df_stationary.rolling(window=2402,min_periods=2402,step=10)['aid_int'].apply(get_H)
df_stationary['entropy']=df_stationary['entropy'].ffill()

df_stationary_except_nan=df_stationary[(~df_stationary['time_interval'].isna())&(~df_stationary['entropy'].isna())]

In [3]:
print((df_stationary_except_nan['Data'].iloc[0]))
datafield=[[int(i,16) for i in lst.split(' ')]+[-1]*(8-len(lst.split(' '))) for lst in df_stationary_except_nan['Data']]

df_df=pd.DataFrame(datafield,columns=['datafield{i}'.format(i=z) for z in range(0,8)])
df_1=df_stationary_except_nan.reset_index()
df_df=df_df.reset_index()
df_final=pd.concat([df_1,df_df],axis=1)

features_stationary=df_final[['aid_int','time_interval','entropy','datafield0','datafield1','datafield2','datafield3','datafield4','datafield5','datafield6','datafield7']]
labels_stationary=df_final['y']

00 00 00 24 76 01 45 30


In [4]:
df_test=load_dataset("./0_Preliminary/1_Submission/Pre_submit_D.csv")
df_test['entropy']=df_test.rolling(window=2402,min_periods=2402,step=10)['aid_int'].apply(get_H)
df_test['entropy']=df_test['entropy'].ffill()

df_test_except_nan=df_test[(~df_test['time_interval'].isna())&(~df_test['entropy'].isna())]

In [5]:
datafieldt=[[int(i,16) for i in lst.split(' ')]+[-1]*(8-len(lst.split(' '))) for lst in df_test_except_nan['Data']]

df_dft=pd.DataFrame(datafieldt,columns=['datafield{i}'.format(i=z) for z in range(0,8)])
df_1t=df_test_except_nan.reset_index()
df_dft=df_dft.reset_index()
df_final_test=pd.concat([df_1t,df_dft],axis=1)

features_stationary_t=df_final_test[['aid_int','time_interval','entropy','datafield0','datafield1','datafield2','datafield3','datafield4','datafield5','datafield6','datafield7']]
labels_stationary_t=df_final_test['y']

In [10]:
X=features_stationary.values

X = X.reshape(X.shape[0], X.shape[1], 1)

model = keras.Sequential()
model.add(keras.layers.LSTM(32, input_shape=(11, 1),return_sequences=True))
model.add(keras.layers.LSTM(16))
model.add(keras.layers.Dense(16))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.summary()
model.compile(optimizer=keras.optimizers.legacy.Adam(learning_rate=0.001),loss="binary_crossentropy",metrics=["accuracy"])

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 11, 32)            4352      
                                                                 
 lstm_3 (LSTM)               (None, 16)                3136      
                                                                 
 dense_2 (Dense)             (None, 16)                272       
                                                                 
 dense_3 (Dense)             (None, 1)                 17        
                                                                 
Total params: 7777 (30.38 KB)
Trainable params: 7777 (30.38 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [11]:
def train(model,data,target):
    model.fit(data,target,epochs=5,batch_size=16)
    return model

In [12]:
model=train(model,X,labels_stationary.values)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [9]:
#pred=model.predict()
def evaluations(model, data_test, target_test):
    predictions=model.predict(data_test)
    preds_1d = predictions.flatten()
    prediction = np.where(preds_1d > 0.5, 1 , 0)

    print("{}:".format('Confusion matrix: \n'),confusion_matrix(target_test,prediction))
    print("{}:".format('Accuracy: '),accuracy_score(target_test,prediction))
    print("{}:".format('Precision: '),precision_score(target_test,prediction))
    print("{}:".format('Recall: '),recall_score(target_test,prediction))
    print("{}:".format('F1 score: '),f1_score(target_test,prediction))
evaluations(model,features_stationary_t,labels_stationary_t)

Confusion matrix: 
: [[1790123    6513]
 [  36635  165044]]
Accuracy: : 0.9784078085787276
Precision: : 0.962035941407229
Recall: : 0.8183499521516866
F1 score: : 0.884394860088523
