In [1]:
import torch

print("PyTorch version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())
print("Device Count:", torch.cuda.device_count())
print("Current Device Index:", torch.cuda.current_device())
print("Device Name:", torch.cuda.get_device_name(torch.cuda.current_device()) if torch.cuda.is_available() else "CPU Only")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Active Device:", device)


PyTorch version: 2.7.1+cu118
CUDA Available: True
Device Count: 1
Current Device Index: 0
Device Name: NVIDIA GeForce GTX 1650
Active Device: cuda


In [2]:
import sys
print(sys.executable)



c:\Users\T2430479\miniconda3\envs\MAFL\python.exe


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split, StratifiedKFold
from imblearn.over_sampling import SMOTE
from collections import Counter
import torch
import torch.nn as nn
import torch.utils.data as data_utils
import torch.nn.functional as F


from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report,
    roc_auc_score
)
import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', None)


In [4]:
# Dataset Initialization
df = pd.read_parquet("D:\T24\Yeasin's Model\Dataset\CIC-ToN-IoT-V2.parquet")
print(f"Dataset loaded successfully with shape: {df.shape}")
df.head(10)


ImportError: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.

In [None]:
#Exploratory Data Analysis (EDA)

missing = df.isnull().sum()
print("Missing values per column:")
print(missing[missing > 0])

plt.figure(figsize=(6, 4))
sns.countplot(x='Label', data=df)
plt.title("Binary Class Distribution (Benign vs Attack)")
plt.xlabel("Label")
plt.ylabel("Count")
plt.show()

plt.figure(figsize=(12, 5))
df['Attack'].value_counts().plot(kind='bar')
plt.title("Multiclass Attack Type Distribution")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Correlation Matrix with Label.
numeric_cols = df.select_dtypes(include=['int8', 'int16', 'int32', 'int64', 'float32', 'float64']).columns.tolist()
numeric_cols.remove('Label')  # remove target label from predictors
corr_with_label = df[numeric_cols].corrwith(df['Label']).abs().sort_values(ascending=False)

plt.figure(figsize=(14, 4))
corr_with_label.plot(kind='bar')
plt.title("Feature Correlation with Label")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# PreProcessing

le_attack = LabelEncoder()
df['Attack_encoded'] = le_attack.fit_transform(df['Attack'])


features = df.drop(columns=['Label', 'Attack', 'Attack_encoded']) #dropped both labels temporarily


scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(features)

# Final feature matrix (as DataFrame)
X = pd.DataFrame(X_scaled, columns=features.columns)

# Binary and multi-class targets
y_bin = df['Label']
y_multi = df['Attack_encoded']

print(f"Features shape: {X.shape}")
print(f"Binary target shape: {y_bin.shape}")
print(f"Multiclass target shape: {y_multi.shape}")


In [None]:
X_train_bin, X_test_bin, y_train_bin, y_test_bin = train_test_split(
    X, y_bin, test_size=0.2, random_state=42, stratify=y_bin
)
print("Class distribution before SMOTE:", Counter(y_train_bin))

#SMOTE
smote = SMOTE(random_state=42)
# X_train_bin_smote, y_train_bin_smote = smote.fit_resample(X_train_bin, y_train_bin)
X_train_bin_smote, y_train_bin_smote = X_train_bin, y_train_bin

print("Class distribution after SMOTE:", Counter(y_train_bin_smote))
print(f"Train shape: {X_train_bin_smote.shape}, Test shape: {X_test_bin.shape}")

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

X_train_tensor = torch.tensor(X_train_bin_smote.values, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_bin.values, dtype=torch.float32).to(device)

batch_size = 512
train_loader = data_utils.DataLoader(
    data_utils.TensorDataset(X_train_tensor, X_train_tensor),
    batch_size=batch_size,
    shuffle=True
)

class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32)
        )
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

input_dim = X_train_bin_smote.shape[1]
autoencoder = Autoencoder(input_dim).to(device)
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=1e-3)
criterion = nn.MSELoss()

epochs = 50
autoencoder.train()
for epoch in range(epochs):
    total_loss = 0
    for xb, _ in train_loader:
        output = autoencoder(xb)
        loss = criterion(output, xb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(train_loader):.6f}")

In [None]:
autoencoder.eval()

with torch.no_grad():
    reconstructed_test = autoencoder(X_test_tensor).cpu().numpy()

reconstruction_error = np.mean((X_test_bin.values - reconstructed_test) ** 2, axis=1)

print(f"Mean Reconstruction Error: {reconstruction_error.mean():.6f}")
print(f"Std of Reconstruction Error: {reconstruction_error.std():.6f}")


plt.figure(figsize=(8, 4))
sns.histplot(reconstruction_error, bins=50, kde=True)
plt.title("Distribution of Reconstruction Error (MSE per Sample)")
plt.xlabel("MSE")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()


In [None]:
with torch.no_grad():
    X_train_encoded = autoencoder.encoder(X_train_tensor).cpu().numpy()
    X_test_encoded = autoencoder.encoder(X_test_tensor).cpu().numpy()

print("Encoded feature shapes:")
print("Train:", X_train_encoded.shape)
print("Test :", X_test_encoded.shape)


In [None]:
# Dataset Preparation.

from torch.utils.data import TensorDataset, DataLoader

# Reshape encoded features: [N, 32] → [N, 32, 1]
X_train_seq = torch.tensor(X_train_encoded[:, :, np.newaxis], dtype=torch.float32)
X_test_seq = torch.tensor(X_test_encoded[:, :, np.newaxis], dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_bin_smote.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_bin.values, dtype=torch.float32)


train_dataset = TensorDataset(X_train_seq, y_train_tensor)
test_dataset = TensorDataset(X_test_seq, y_test_tensor)

batch_size = 512
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)


X_train_mc, X_test_mc, y_train_mc, y_test_mc = train_test_split(
    X, y_multi, test_size=0.2, random_state=42, stratify=y_multi
)


X_train_mc_tensor = torch.tensor(
    autoencoder.encoder(torch.tensor(X_train_mc.values, dtype=torch.float32).to(device)).detach().cpu().numpy()[:, :, np.newaxis], 
    dtype=torch.float32
)
X_test_mc_tensor = torch.tensor(
    autoencoder.encoder(torch.tensor(X_test_mc.values, dtype=torch.float32).to(device)).detach().cpu().numpy()[:, :, np.newaxis], 
    dtype=torch.float32
)
y_train_mc_tensor = torch.tensor(y_train_mc.values, dtype=torch.long)
y_test_mc_tensor = torch.tensor(y_test_mc.values, dtype=torch.long)

# Multi-class loaders
train_dataset_mc = TensorDataset(X_train_mc_tensor, y_train_mc_tensor)
test_dataset_mc = TensorDataset(X_test_mc_tensor, y_test_mc_tensor)
train_loader_mc = DataLoader(train_dataset_mc, batch_size=batch_size, shuffle=True)
test_loader_mc = DataLoader(test_dataset_mc, batch_size=batch_size)


In [None]:
# Hybrid Model (Binary)

class CNN_BiLSTM_Binary(nn.Module):
    def __init__(self, input_size=1, seq_len=32, hidden_dim=64, lstm_layers=1):
        super(CNN_BiLSTM_Binary, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=input_size, out_channels=16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(16)
        self.relu = nn.ReLU()
        self.lstm = nn.LSTM(input_size=16, hidden_size=hidden_dim, num_layers=lstm_layers, 
                            batch_first=True, bidirectional=True)
        self.fc = nn.Sequential(
            nn.Linear(2 * hidden_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        # x: [batch_size, seq_len, channels] -> transpose for Conv1d
        x = x.transpose(1, 2)  # [batch_size, channels, seq_len]
        x = self.relu(self.bn1(self.conv1(x)))
        x = x.transpose(1, 2)  # back to [batch_size, seq_len, channels]
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # last time step output
        out = self.fc(out)
        return out.squeeze()


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tqdm.notebook import tqdm
import copy

# Validation Split from test
X_val_seq, X_test_final_seq, y_val_tensor, y_test_final_tensor = train_test_split(
    X_test_seq, y_test_tensor, test_size=0.8, random_state=42, stratify=y_test_tensor
)

val_loader = DataLoader(TensorDataset(X_val_seq, y_val_tensor), batch_size=batch_size)
test_loader_final = DataLoader(TensorDataset(X_test_final_seq, y_test_final_tensor), batch_size=batch_size)

# Model, Optimizer with Weight Decay, Loss
model_bin = CNN_BiLSTM_Binary().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model_bin.parameters(), lr=1e-3, weight_decay=1e-5)

# Training parameters
epochs = 100
train_losses = []
val_losses = []

# Early stopping
best_val_loss = float('inf')
patience = 5
no_improve_epochs = 0
best_model_state = None

for epoch in range(epochs):
    model_bin.train()
    total_train_loss = 0
    train_loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Train]", leave=False)

    for xb, yb in train_loop:
        xb, yb = xb.to(device), yb.to(device)
        preds = model_bin(xb)
        loss = criterion(preds, yb)
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model_bin.parameters(), max_norm=2.0)  # ⬅ Gradient Clipping
        optimizer.step()
        total_train_loss += loss.item()

    train_loss_epoch = total_train_loss / len(train_loader)
    train_losses.append(train_loss_epoch)

    # Validation
    model_bin.eval()
    total_val_loss = 0
    val_loop = tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} [Val]", leave=False)

    with torch.no_grad():
        for xb, yb in val_loop:
            xb, yb = xb.to(device), yb.to(device)
            preds = model_bin(xb)
            loss = criterion(preds, yb)
            total_val_loss += loss.item()

    val_loss_epoch = total_val_loss / len(val_loader)
    val_losses.append(val_loss_epoch)

    print(f"Epoch {epoch+1} | Train Loss: {train_loss_epoch:.6f} | Val Loss: {val_loss_epoch:.6f}")

    # Early stopping logic
    if val_loss_epoch < best_val_loss:
        best_val_loss = val_loss_epoch
        best_model_state = copy.deepcopy(model_bin.state_dict())
        no_improve_epochs = 0
    else:
        no_improve_epochs += 1
        if no_improve_epochs >= patience:
            print(f"Early stopping at epoch {epoch+1} (no improvement in {patience} epochs).")
            break

# # Load best model
# if best_model_state is not None:
#     model_bin.load_state_dict(best_model_state)

# Plot training vs validation loss
plt.figure(figsize=(8, 4))
plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training vs Validation Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Final Evaluation
model_bin.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for xb, yb in test_loader_final:
        xb = xb.to(device)
        preds = model_bin(xb).cpu().numpy()
        preds = (preds > 0.5).astype(int)
        all_preds.extend(preds)
        all_labels.extend(yb.numpy())

print("\nClassification Report on Final Test Set:")
print(classification_report(all_labels, all_preds, digits=4))

cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(5, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - Binary Classification")
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Class distribution in training set
unique_classes, counts = np.unique(y_train_mc_tensor.numpy(), return_counts=True)
plt.figure(figsize=(10, 4))
plt.bar(unique_classes, counts)
plt.title("Class Distribution in y_train_mc")
plt.xlabel("Class Index")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()

print("Class Counts:\n", dict(zip(unique_classes, counts)))


In [None]:
print(f"Label dtype: {y_train_mc_tensor.dtype}")
print(f"Min label: {y_train_mc_tensor.min().item()}")
print(f"Max label: {y_train_mc_tensor.max().item()}")
print(f"Number of Classes: {len(np.unique(y_train_mc_tensor.numpy()))}")


In [None]:
class CNN_BiLSTM_Multiclass(nn.Module):
    def __init__(self, input_size=1, seq_len=32, hidden_dim=64, lstm_layers=1, num_classes=10):
        super(CNN_BiLSTM_Multiclass, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=input_size, out_channels=16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(16)
        self.relu = nn.ReLU()
        self.lstm = nn.LSTM(input_size=16, hidden_size=hidden_dim, num_layers=lstm_layers,
                            batch_first=True, bidirectional=True)
        self.fc = nn.Sequential(
            nn.Linear(2 * hidden_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, num_classes)  # no activation (CrossEntropyLoss expects raw logits)
        )

    def forward(self, x):
        x = x.transpose(1, 2)  # [batch_size, channels, seq_len]
        x = self.relu(self.bn1(self.conv1(x)))
        x = x.transpose(1, 2)  # [batch_size, seq_len, channels]
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # take last time step
        out = self.fc(out)
        return out


In [None]:
# import torch.nn.functional as F

# class FocalLoss(nn.Module):
#     def __init__(self, gamma=2.0, weight=None):
#         super(FocalLoss, self).__init__()
#         self.gamma = gamma
#         self.weight = weight  # Tensor of shape [num_classes]

#     def forward(self, input, target):
#         ce_loss = F.cross_entropy(input, target, weight=self.weight, reduction='none')
#         pt = torch.exp(-ce_loss)  # pt = softmax prob of correct class
#         focal_loss = ((1 - pt) ** self.gamma) * ce_loss
#         return focal_loss.mean()


In [None]:
import torch

# Get class frequencies
class_counts = np.bincount(y_train_mc_tensor.numpy())
class_weights = 1.0 / class_counts
class_weights = class_weights / class_weights.sum()  # Normalize

# Convert to tensor for CrossEntropyLoss
weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)


In [None]:
model_mc = CNN_BiLSTM_Multiclass(num_classes=len(class_weights)).to(device)

criterion = nn.CrossEntropyLoss(weight=weights_tensor)
# criterion = FocalLoss(gamma=2.0, weight=weights_tensor)

optimizer = torch.optim.Adam(model_mc.parameters(), lr=1e-3, weight_decay=1e-5)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=3
)



In [None]:
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

# (Optional) Validation split from test set
from sklearn.model_selection import train_test_split
X_val_mc, X_test_final_mc, y_val_mc, y_test_final_mc = train_test_split(
    X_test_mc_tensor, y_test_mc_tensor, test_size=0.8, stratify=y_test_mc_tensor, random_state=42
)

val_loader_mc = DataLoader(TensorDataset(X_val_mc, y_val_mc), batch_size=batch_size)
test_loader_mc_final = DataLoader(TensorDataset(X_test_final_mc, y_test_final_mc), batch_size=batch_size)

# # Training
# epochs = 100
# train_losses = []
# val_losses = []

# for epoch in range(epochs):
#     model_mc.train()
#     train_loss = 0.0
#     train_loop = tqdm(train_loader_mc, desc=f"Epoch {epoch+1}/{epochs} [Train]", leave=False)

#     for xb, yb in train_loop:
#         xb, yb = xb.to(device), yb.to(device).long()
#         optimizer.zero_grad()
#         preds = model_mc(xb)
#         loss = criterion(preds, yb)
#         loss.backward()
#         optimizer.step()
#         train_loss += loss.item()

#     train_losses.append(train_loss / len(train_loader_mc))
    
#     for param_group in optimizer.param_groups:  #(Scheduler er LR dekhar jonno per epoch)
#         print(f"Current Learning Rate: {param_group['lr']:.6e}")


#     # Validation
#     model_mc.eval()
#     val_loss = 0.0
#     val_loop = tqdm(val_loader_mc, desc=f"Epoch {epoch+1}/{epochs} [Val]", leave=False)

#     with torch.no_grad():
#         for xb, yb in val_loop:
#             xb, yb = xb.to(device), yb.to(device).long()
#             preds = model_mc(xb)
#             loss = criterion(preds, yb)
#             val_loss += loss.item()

#     val_losses.append(val_loss / len(val_loader_mc))

#     # Step the scheduler
#     scheduler.step(val_losses[-1])  # Pass the latest val_loss

#     print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_losses[-1]:.6f}, Val Loss: {val_losses[-1]:.6f}")


In [None]:
# Early Stopping Setup
best_val_loss = float('inf')
patience = 5
no_improve_count = 0
best_model_state = None  # Will store the best model weights

# Training loop
for epoch in range(epochs):
    model_mc.train()
    train_loss = 0.0
    train_loop = tqdm(train_loader_mc, desc=f"Epoch {epoch+1}/{epochs} [Train]", leave=False)

    for xb, yb in train_loop:
        xb, yb = xb.to(device), yb.to(device).long()
        optimizer.zero_grad()
        preds = model_mc(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_losses.append(train_loss / len(train_loader_mc))

    for param_group in optimizer.param_groups:
        print(f"Current Learning Rate: {param_group['lr']:.6e}")

    # Validation loop
    model_mc.eval()
    val_loss = 0.0
    val_loop = tqdm(val_loader_mc, desc=f"Epoch {epoch+1}/{epochs} [Val]", leave=False)

    with torch.no_grad():
        for xb, yb in val_loop:
            xb, yb = xb.to(device), yb.to(device).long()
            preds = model_mc(xb)
            loss = criterion(preds, yb)
            val_loss += loss.item()

    val_loss_epoch = val_loss / len(val_loader_mc)
    val_losses.append(val_loss_epoch)

    # Scheduler step
    scheduler.step(val_loss_epoch)

    print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_losses[-1]:.6f}, Val Loss: {val_loss_epoch:.6f}")

    # Early stopping logic
    if val_loss_epoch < best_val_loss:
        best_val_loss = val_loss_epoch
        best_model_state = model_mc.state_dict()
        no_improve_count = 0
    else:
        no_improve_count += 1
        if no_improve_count >= patience:
            print(f"Early stopping at epoch {epoch+1} (no improvement in {patience} epochs).")
            break

# Restore best model
if best_model_state is not None:
    model_mc.load_state_dict(best_model_state)
    print("Loaded best model weights from early stopping checkpoint.")


In [None]:
plt.figure(figsize=(10, 4))
plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
plt.title("Training vs Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

model_mc.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for xb, yb in test_loader_mc_final:
        xb = xb.to(device)
        preds = model_mc(xb)
        preds = torch.argmax(preds, dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(yb.numpy())

# Classification Report
print("Classification Report on Final Test Set:")
print(classification_report(all_labels, all_preds, digits=4))

# Confusion Matrix
cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - Multi-class Classification")
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
print("Predicted class distribution:", np.unique(all_preds, return_counts=True))
