In [1]:
import pandas as pd
import numpy as np

In [2]:
!pip install matplotlib



In [3]:
import tensorflow as tf

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model

In [4]:
from sklearn import preprocessing

In [5]:
def labelencoder(ds):
    encoded_ds=ds.copy()
    le = preprocessing.LabelEncoder()
    for col in encoded_ds.select_dtypes(include=['object']).columns:
        encoded_ds[col]=le.fit_transform(encoded_ds[col])
    
    return encoded_ds

In [6]:
def encoding_for_anomalous(ds):
    encoded_ds=ds.copy()
    for i in range(len(ds)):
        if encoded_ds.iloc[i]=='BENIGN':
            encoded_ds.iloc[i]= 0
        else:
            encoded_ds.iloc[i]= 1
            
    return encoded_ds  

In [7]:
def scaling(ds):
    scaler=preprocessing.StandardScaler()
    scaled_ds=scaler.fit_transform(ds)
    return scaled_ds

In [8]:
def drop_infs(ds):
    ds.replace([np.inf, -np.inf], np.nan, inplace=True)
    ds.dropna(how='any', inplace=True)
    return ds

In [9]:
pd.set_option("display.max_rows", None,
             "display.max_columns", None)

In [10]:
PATH="/Users/ecemdenizbabaoglan/Desktop/TOBBETU/yap470/MachineLearningCVE/Monday-WorkingHours.pcap_ISCX.csv"

In [11]:
normal_ds=pd.read_csv(PATH) #Bu veri setini tamamen train için kullanıcaz

In [12]:
normal_ds=drop_infs(normal_ds)
normal_ds=labelencoder(normal_ds)

In [13]:
normal_y_train=normal_ds[' Label']

In [14]:
normal_x_train=normal_ds.drop(' Label', axis=1) #Label'ı yani y'yi dropluyoruz

In [15]:
normal_x_train=scaling(normal_x_train)

In [16]:
test_path="/Users/ecemdenizbabaoglan/Desktop/TOBBETU/yap470/MachineLearningCVE/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv"

In [17]:
test_ds=pd.read_csv(test_path)

In [18]:
test_ds=drop_infs(test_ds)

In [19]:
y_test=test_ds[' Label']

In [20]:
x_test=test_ds.drop(' Label', axis=1)

In [21]:
y_test=encoding_for_anomalous(y_test)
x_test=labelencoder(x_test)
x_test=scaling(x_test)

In [22]:
y_test.value_counts()

1    128025
0     97686
Name:  Label, dtype: int64

In [23]:
#train.head()

In [24]:
#normal_ds.info()

In [25]:
#normal_ds.describe(include='all')

In [26]:
normal_x_train = np.asarray(normal_x_train).astype(np.float32)
x_test = np.asarray(x_test).astype(np.float32)

normal_x_train = tf.cast(normal_x_train, tf.float32)
x_test = tf.cast(x_test, tf.float32)

In [27]:
input = tf.keras.layers.Input(shape=(78,))

In [28]:
encoder = tf.keras.Sequential([
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu')])(input)


In [29]:
decoder = tf.keras.Sequential([
    layers.Dense(16, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(78, activation='sigmoid')])(encoder)

In [30]:
autoencoder = tf.keras.Model(inputs=input, outputs=decoder)


In [31]:
autoencoder.compile(optimizer='adam', loss='mae')


In [32]:
history = autoencoder.fit(normal_x_train, normal_x_train, 
          epochs=10, 
          batch_size=64,
          validation_data=(x_test, x_test),
          shuffle=True)

Epoch 1/10
   1/8274 [..............................] - ETA: 22:15 - loss: 0.7229

2022-12-07 16:03:19.484952: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-12-07 16:03:19.485089: W tensorflow/core/platform/profile_utils/cpu_utils.cc:126] Failed to get CPU frequency: 0 Hz


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [33]:
#train_loss=history.history["loss"]

In [34]:
reconstructions = autoencoder.predict(normal_x_train)
train_loss = tf.keras.losses.mae(reconstructions, normal_x_train)

In [35]:
threshold = np.mean(train_loss) + np.std(train_loss)
print("Threshold: ", threshold)

Threshold:  0.5025517


In [36]:
def predict(model, data, threshold):
  reconstructions = model(data)
  loss = tf.keras.losses.mae(reconstructions, data)
  return tf.math.less(loss, threshold)

def print_stats(predictions, labels):
  print("Accuracy = {}".format(accuracy_score(labels, predictions)))
  print("Precision = {}".format(precision_score(labels, predictions)))
  print("Recall = {}".format(recall_score(labels, predictions)))

In [40]:
preds = predict(autoencoder, x_test, threshold)
print_stats(preds, y_test.astype(bool))

Accuracy = 0.6332522562037296
Precision = 0.6142806627601536
Recall = 0.9498379222808045
