<a href="https://colab.research.google.com/github/Uttpal-Tripathy/DDoS-Attack-detection/blob/main/RL_based_DDoS_Detection_using_SAC%2C_PPO_Clip_and_A2C_and_TD3_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install stable-baselines3

Collecting stable-baselines3
  Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)
Collecting gymnasium<1.1.0,>=0.29.1 (from stable-baselines3)
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (f

In [None]:
!pip install shimmy>=2.0

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from gym import Env
from gym.spaces import Box, Discrete
from stable_baselines3 import PPO
import time

# ✅ Load Dataset
file_path = '/content/drive/MyDrive/cicddos2019_dataset.csv'
data = pd.read_csv(file_path)
data.dropna(inplace=True)

columns_to_drop = ['Flow ID', 'Src IP', 'Dst IP', 'Timestamp']
for column in columns_to_drop:
    if column in data.columns:
        data.drop([column], axis=1, inplace=True)
    else:
        print(f"Column '{column}' not found in DataFrame. Skipping.")


# ✅ Encode Labels
label_encoder = LabelEncoder()
data['Label'] = label_encoder.fit_transform(data['Label'])

# ✅ Scale Data
X = data.drop('Label', axis=1)
y = data['Label']

for column in X.select_dtypes(include=['object']).columns:
    try:
        X[column] = pd.to_numeric(X[column])
    except ValueError:
        print(f"Column '{column}' could not be converted to numeric and will be dropped.")
        X = X.drop(columns=[column])  # Drop the problematic column

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ✅ Split Data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

state_size = X_train.shape[1]
action_size = 2

# ✅ Custom Gym Environment
class DDoSDetectionEnv(Env):
    def __init__(self):
        super(DDoSDetectionEnv, self).__init__()
        self.observation_space = Box(low=-1, high=1, shape=(state_size,))
        self.action_space = Discrete(2)
        self.data = X_train
        self.labels = y_train
        self.index = 0

    def reset(self):
        self.index = 0
        return self.data[self.index]

    def step(self, action):
        label = self.labels.iloc[self.index]
        reward = 1 if action == label else -1
        self.index += 1
        done = (self.index >= len(self.data) - 1)
        next_state = self.data[self.index] if not done else np.zeros(state_size)
        return next_state, reward, done, {}

    def render(self, mode='human'):
        pass

env = DDoSDetectionEnv()

# ✅ PPO Configuration
model = PPO(
    "MlpPolicy",
    env,
    learning_rate=0.0003,  # Lower learning rate for stable learning
    n_steps=1024,          # Reduced steps per update to save time
    batch_size=64,         # Efficient batch size
    n_epochs=5,            # Fewer epochs → faster training
    clip_range=0.1,        # Reduced clipping range for better learning
    verbose=1,
    device="cuda"          # Use GPU for faster training
)

# ✅ Start Training
start_time = time.time()

model.learn(total_timesteps=10000)  # Reduced timesteps for faster training

end_time = time.time()
print(f"Training Time: {end_time - start_time:.2f} seconds")

# ✅ Save Model
model.save("optimized_ppo_ddos")

# ✅ Test Model
state = env.reset()
total_reward = 0
done = False

while not done:
    action, _ = model.predict(state)
    state, reward, done, _ = env.step(action)
    total_reward += reward

print(f"Total Reward: {total_reward}")

# ✅ Evaluation
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

y_true = []
y_pred = []
state = env.reset()
done = False

while not done:
    action, _ = model.predict(state)
    y_pred.append(action)
    y_true.append(y_train.iloc[env.index])
    state, reward, done, _ = env.step(action)

# ✅ Metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
confusion = confusion_matrix(y_true, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"Confusion Matrix:\n{confusion}")


Column 'Flow ID' not found in DataFrame. Skipping.
Column 'Src IP' not found in DataFrame. Skipping.
Column 'Dst IP' not found in DataFrame. Skipping.
Column 'Timestamp' not found in DataFrame. Skipping.
Column 'Class' could not be converted to numeric and will be dropped.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




-----------------------------
| time/              |      |
|    fps             | 682  |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 1024 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 575         |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 2048        |
| train/                  |             |
|    approx_kl            | 0.004318704 |
|    clip_fraction        | 0.297       |
|    clip_range           | 0.1         |
|    entropy_loss         | -0.69       |
|    explained_variance   | -0.0081     |
|    learning_rate        | 0.0003      |
|    loss                 | 4.9         |
|    n_updates            | 5           |
|    policy_gradient_loss | -0.0179     |
|    value_loss           | 8.74        |
-----------------------------------------
----------------------------------

SAC - Soft actor critic

In [None]:
from stable_baselines3 import A2C
from stable_baselines3.common.env_util import make_vec_env

# Create environment with parallel processing
env = make_vec_env(DDoSDetectionEnv, n_envs=4)

# Define A2C model
model = A2C(
    "MlpPolicy",
    env,
    learning_rate=0.0003,
    n_steps=5, # Update the policy every 5 steps
    # Other hyperparameters can be added here...
    verbose=1,
    device="cuda"
)

# Train model
model.learn(total_timesteps=50000)

# Save model
model.save("a2c_ddos")

# Test model
state = env.reset()
done = [False] * env.num_envs  # Initialize done as a list of False values
total_reward = np.zeros(env.num_envs)  # Initialize total_reward for each environment
while not all(done):  # Continue until all environments are done
    action, _ = model.predict(state)
    state, reward, done_array, _ = env.step(action)
    total_reward += reward
    done = [d for d in done_array]  # Update done list from the array

print(f"Total Reward: {total_reward}")

  deprecation(


Using cpu device
------------------------------------
| time/                 |          |
|    fps                | 1004     |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -0.224   |
|    explained_variance | 0.0805   |
|    learning_rate      | 0.0003   |
|    n_updates          | 99       |
|    policy_loss        | -0.00469 |
|    value_loss         | 7.35     |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 1103     |
|    iterations         | 200      |
|    time_elapsed       | 3        |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -0.114   |
|    explained_variance | -0.187   |
|    learning_rate      | 0.0003   |
|    n_updates          | 199      |
|    policy_loss        | 0.06     |
|    value_loss      

In [None]:

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
confusion = confusion_matrix(y_true, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"Confusion Matrix:\n{confusion}")


Accuracy: 0.7985
Precision: 0.9942
Recall: 0.8014
F1-Score: 0.8874
Confusion Matrix:
[[  20   22]
 [ 928 3745]]


Twin Delayed Deep Deterministic Policy Gradient (TD3)

In [None]:


from stable_baselines3 import TD3, DDPG, A2C  # Import DDPG, A2C
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise # Import OrnsteinUhlenbeckActionNoise

# Reset the environment to a single environment, as TD3/DDPG expects.
env = DDoSDetectionEnv()

# If you still want to use TD3 (or DDPG) with discrete actions,
# you'll have to adapt it significantly, which is outside the scope
# of a quick fix. Here's an example with DDPG and discrete actions:

# Instead of TD3 or DDPG, use A2C which is suitable for discrete action spaces.
# A2C is an actor-critic algorithm, often better for discrete problems.
model = A2C("MlpPolicy", env, verbose=1, device="cuda")

# Train the model (A2C in this case)
model.learn(total_timesteps=10000)

# Save the model
model.save("a2c_ddos_for_discrete_actions") # Change save name

# Test the model (similar to PPO testing)
state = env.reset()
total_reward = 0
done = False
while not done:
    action, _ = model.predict(state)
    state, reward, done, _ = env.step(action)
    total_reward += reward
print(f"Total Reward (A2C): {total_reward}")


# Evaluation (similar to PPO evaluation)

y_true = []
y_pred = []
state = env.reset()
done = False

while not done:
    action, _ = model.predict(state)
    y_pred.append(action)
    y_true.append(y_train.iloc[env.index])
    state, reward, done, _ = env.step(action)

# Metrics (same as before)
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
confusion = confusion_matrix(y_true, y_pred)

print(f"Accuracy (A2C): {accuracy:.4f}")
print(f"Precision (A2C): {precision:.4f}")
print(f"Recall (A2C): {recall:.4f}")
print(f"F1-Score (A2C): {f1:.4f}")
print(f"Confusion Matrix (A2C):\n{confusion}")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




------------------------------------
| time/                 |          |
|    fps                | 299      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.139   |
|    explained_variance | 0.293    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.0864   |
|    value_loss         | 8.63     |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 279      |
|    iterations         | 200      |
|    time_elapsed       | 3        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.143   |
|    explained_variance | -0.144   |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | 0.0716   |
|    value_loss         | 7.22     |
-

Proximal Policy Optimization with Clipping (PPO) - Enhanced

In [None]:

model = PPO(
    "MlpPolicy",
    env,
    learning_rate=0.0003,
    n_steps=2048,  # Increased steps per update
    batch_size=128,  # Larger batch size
    n_epochs=10,     # More epochs for better convergence
    clip_range=0.2,   # Slightly increased clipping range
    ent_coef=0.01,    # Entropy coefficient for exploration
    vf_coef=0.5,      # Value function coefficient
    verbose=1,
    device="cuda"
)

# Start Training
start_time = time.time()
model.learn(total_timesteps=50000)  # Increased timesteps
end_time = time.time()
print(f"Training Time: {end_time - start_time:.2f} seconds")

# ... (rest of your code)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




-----------------------------
| time/              |      |
|    fps             | 999  |
|    iterations      | 1    |
|    time_elapsed    | 2    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 840         |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.018710515 |
|    clip_fraction        | 0.301       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.679      |
|    explained_variance   | -0.0589     |
|    learning_rate        | 0.0003      |
|    loss                 | 4.28        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0414     |
|    value_loss           | 7.7         |
-----------------------------------------
----------------------------------

In [None]:


y_true = []
y_pred = []
# Use X_test and y_test for evaluation
for i in range(len(X_test)):
    state = X_test[i]  # Use data from X_test directly
    action, _ = model.predict(state)
    y_pred.append(action)
    y_true.append(y_test.iloc[i])  # Use y_test for true labels


# Metrics (same as before)
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted',zero_division=1) # Handle potential zero divisions
recall = recall_score(y_true, y_pred, average='weighted',zero_division=1)
f1 = f1_score(y_true, y_pred, average='weighted',zero_division=1)
confusion = confusion_matrix(y_true, y_pred)

print(f"Accuracy (A2C): {accuracy:.4f}")
print(f"Precision (A2C): {precision:.4f}")
print(f"Recall (A2C): {recall:.4f}")
print(f"F1-Score (A2C): {f1:.4f}")
print(f"Confusion Matrix (A2C):\n{confusion}")


Accuracy (A2C): 0.9881
Precision (A2C): 0.9894
Recall (A2C): 0.9881
F1-Score (A2C): 0.9887
Confusion Matrix (A2C):
[[   3    6]
 [   8 1163]]


In [None]:


from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Create a dictionary to store the evaluation metrics for each model
evaluation_metrics = {}

# PPO metrics
evaluation_metrics['PPO'] = {
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1-Score': f1
}

# A2C metrics
evaluation_metrics['A2C'] = {
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1-Score': f1
}

# A2C (for discrete actions) metrics
evaluation_metrics['A2C_discrete'] = {
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1-Score': f1
}

# PPO Enhanced metrics
evaluation_metrics['PPO_Enhanced'] = {
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1-Score': f1
}


# Convert the dictionary to a Pandas DataFrame
metrics_df = pd.DataFrame.from_dict(evaluation_metrics, orient='index')

# Display the DataFrame
metrics_df


Unnamed: 0,Accuracy,Precision,Recall,F1-Score
PPO,0.988136,0.98936,0.988136,0.988724
A2C,0.988136,0.98936,0.988136,0.988724
A2C_discrete,0.988136,0.98936,0.988136,0.988724
PPO_Enhanced,0.988136,0.98936,0.988136,0.988724


In [None]:
# prompt: evauation matrics of the models used here

# ... (Your existing code)

# Create a dictionary to store the evaluation metrics for each model
evaluation_metrics = {}

# Function to calculate and store metrics
def evaluate_model(model_name, y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=1)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=1)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=1)
    evaluation_metrics[model_name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1
    }
    print(f"--- {model_name} Evaluation ---")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")


# Evaluate PPO
y_true_ppo = []
y_pred_ppo = []
state = env.reset()
done = False
while not done:
    action, _ = model.predict(state)
    y_pred_ppo.append(action)
    y_true_ppo.append(y_train.iloc[env.index])
    state, reward, done, _ = env.step(action)

evaluate_model('PPO', y_true_ppo, y_pred_ppo)


# Evaluate A2C (you'll need to re-run the A2C training and prediction parts)
# ... (A2C training and prediction code) ...

evaluate_model('A2C', y_true, y_pred) # Assuming y_true and y_pred are from A2C predictions


# Evaluate A2C for discrete actions (similar to A2C, use the corresponding y_true and y_pred)
# ... (A2C discrete training and prediction code) ...
evaluate_model('A2C_discrete', y_true, y_pred) # Assuming correct y_true and y_pred

# Evaluate Enhanced PPO (Use the correct y_true and y_pred for Enhanced PPO)
# ... (Enhanced PPO prediction code using X_test and y_test) ...
evaluate_model("PPO_Enhanced", y_true, y_pred)  # Make sure y_true and y_pred are from enhanced PPO


# Convert to DataFrame
metrics_df = pd.DataFrame.from_dict(evaluation_metrics, orient='index')
metrics_df


--- PPO Evaluation ---
Accuracy: 0.9890
Precision: 0.9879
Recall: 0.9890
F1-Score: 0.9884
--- A2C Evaluation ---
Accuracy: 0.9881
Precision: 0.9894
Recall: 0.9881
F1-Score: 0.9887
--- A2C_discrete Evaluation ---
Accuracy: 0.9881
Precision: 0.9894
Recall: 0.9881
F1-Score: 0.9887
--- PPO_Enhanced Evaluation ---
Accuracy: 0.9881
Precision: 0.9894
Recall: 0.9881
F1-Score: 0.9887


Unnamed: 0,Accuracy,Precision,Recall,F1-Score
PPO,0.988971,0.987884,0.988971,0.988396
A2C,0.988136,0.98936,0.988136,0.988724
A2C_discrete,0.988136,0.98936,0.988136,0.988724
PPO_Enhanced,0.988136,0.98936,0.988136,0.988724
