<a href="https://colab.research.google.com/github/jeawsdrfgt/Anomaly-Detection/blob/main/Anomaly_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest  # Import Isolation Forest

In [None]:
from google.colab import files
uploaded = files.upload()


Saving synthetic_network_traffic.csv to synthetic_network_traffic.csv


In [None]:
data = pd.read_csv('synthetic_network_traffic.csv')
print(df.head())


   SourceIP  DestinationIP  SourcePort  DestinationPort  Protocol  BytesSent  \
0  0.496714      -0.138264    0.647689         1.523030 -0.234153  -0.234137   
1 -0.463418      -0.465730    0.241962        -1.913280 -1.724918  -0.562288   
2  1.465649      -0.225776    0.067528        -1.424748 -0.544383   0.110923   
3 -0.601707       1.852278   -0.013497        -1.057711  0.822545  -1.220844   
4  0.738467       0.171368   -0.115648        -0.301104 -1.478522  -0.719844   

   BytesReceived  PacketsSent  PacketsReceived  Duration  IsAnomaly  
0       1.579213     0.767435        -0.469474  0.542560          0  
1      -1.012831     0.314247        -0.908024 -1.412304          0  
2      -1.150994     0.375698        -0.600639 -0.291694          0  
3       0.208864    -1.959670        -1.328186  0.196861          0  
4      -0.460639     1.057122         0.343618 -1.763040          0  


In [None]:
# Feature Engineering: Generate additional features (you can customize this)
data['TotalBytes'] = data['BytesSent'] + data['BytesReceived']
data['TotalPackets'] = data['PacketsSent'] + data['PacketsReceived']

In [None]:
# Oversample the 'Anomaly' class to balance the class distribution
anomaly_data = data[data['IsAnomaly'] == 1]
oversampled_data = pd.concat([data, anomaly_data], axis=0)


In [None]:
# Split the dataset into features and labels
X = oversampled_data.drop(columns=['IsAnomaly'])  # Features
y = oversampled_data['IsAnomaly']  # Labels

In [None]:
# Split the dataset into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [None]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [None]:
# Create and fit the Isolation Forest model
isolation_forest = IsolationForest(contamination=0.1, random_state=42)
isolation_forest.fit(X_train)

In [None]:
# Predict anomalies using the Isolation Forest
y_pred_iforest = isolation_forest.predict(X_test)
y_pred_iforest = (y_pred_iforest == -1)  # Convert -1 (anomaly) to 1, 1 (normal) to 0

In [None]:
# Create the deep learning model
model = keras.Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Binary classification, use 'sigmoid' for anomaly detection
])

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_val, y_val))

# Evaluate the model on the test set
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)  # Apply threshold (adjust as needed)

Epoch 1/5
[1m21985/21985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 2ms/step - accuracy: 0.9884 - loss: 0.0643 - val_accuracy: 0.9900 - val_loss: 0.0563
Epoch 2/5
[1m21985/21985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 2ms/step - accuracy: 0.9901 - loss: 0.0562 - val_accuracy: 0.9900 - val_loss: 0.0561
Epoch 3/5
[1m21985/21985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 2ms/step - accuracy: 0.9899 - loss: 0.0566 - val_accuracy: 0.9900 - val_loss: 0.0563
Epoch 4/5
[1m21985/21985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 2ms/step - accuracy: 0.9899 - loss: 0.0567 - val_accuracy: 0.9900 - val_loss: 0.0562
Epoch 5/5
[1m21985/21985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 2ms/step - accuracy: 0.9900 - loss: 0.0563 - val_accuracy: 0.9900 - val_loss: 0.0562
[1m4711/4711[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step


In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)

# Classification Report
report = classification_report(y_test, y_pred, target_names=['Normal', 'Anomaly'], zero_division=1)

# Calculate ROC curve and AUC
fpr, tpr, _ = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)

In [None]:
# Visualize ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc='lower right')

In [None]:
# Visualize Confusion Matrix
plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=['Normal', 'Anomaly'], yticklabels=['Normal', 'Anomaly'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')

In [None]:
# Display Classification Report
print(report)