<a href="https://colab.research.google.com/github/fadhluibnu/ANOMALY_IOT_NETWORK_DETECTION/blob/main/ANOMALY_IOT_NETWORK_DETECTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from scipy.stats import zscore
from imblearn.over_sampling import SMOTE
from sklearn.decomposition import PCA
import torch
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
import time
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.preprocessing import OneHotEncoder, StandardScaler, RobustScaler
from sklearn.feature_selection import mutual_info_classif, SelectKBest
from imblearn.over_sampling import SMOTE
from scipy.stats import zscore
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

def complete_improved_preprocessing(train_data, test_data):
    """
    Complete preprocessing pipeline with enhancements for IoT network anomaly detection.
    Includes all original steps plus improvements for better model performance.

    Args:
        train_data: Original training dataframe
        test_data: Original testing dataframe

    Returns:
        processed_train_data, processed_test_data: Enhanced datasets ready for modeling
    """
    print("\n🔍 Starting enhanced preprocessing pipeline...")

    # Make copies to avoid modifying originals
    train_data = train_data.copy()
    test_data = test_data.copy()

    # 1. Data Exploration
    print("\n📊 Dataset Overview:")
    print(f"Training data shape: {train_data.shape}")
    print(f"Testing data shape: {test_data.shape}")

    # Check for missing values
    train_missing = train_data.isnull().sum().sum()
    test_missing = test_data.isnull().sum().sum()
    print(f"Missing values - Training: {train_missing}, Testing: {test_missing}")

    # 2. Drop rows with missing attack_cat (target variable)
    print("\n🔍 Handling missing target values...")
    train_data = train_data.dropna(subset=['attack_cat'])
    test_data = test_data.dropna(subset=['attack_cat'])
    print(f"After dropping rows with missing targets - Train: {train_data.shape}, Test: {test_data.shape}")

    # 3. Handle remaining missing values
    print("\n🔍 Handling remaining missing values...")
    # Identify numerical and categorical columns
    numerical_cols = train_data.select_dtypes(include=['int64', 'float64']).columns.tolist()
    numerical_cols = [col for col in numerical_cols if col not in ['id', 'label', 'attack_cat']]
    categorical_cols = train_data.select_dtypes(include=['object']).columns.tolist()

    # Enhanced missing value imputation
    for col in numerical_cols:
        # Use median for numerical features (more robust than mean)
        train_data[col] = train_data[col].fillna(train_data[col].median())
        test_data[col] = test_data[col].fillna(train_data[col].median())

    for col in categorical_cols:
        if col != 'attack_cat':
            # Use mode (most frequent) for categorical features
            train_data[col] = train_data[col].fillna(train_data[col].mode()[0])
            test_data[col] = test_data[col].fillna(train_data[col].mode()[0])

    # 4. Remove duplicate rows
    print("\n🔍 Removing duplicate entries...")
    train_data = train_data.drop_duplicates()
    test_data = test_data.drop_duplicates()
    print(f"After removing duplicates - Train: {train_data.shape}, Test: {test_data.shape}")

    # 5. Encode attack_cat (target variable)
    print("\n🔍 Encoding target variable...")
    attack_mapping = {
        'Normal': 0, 'Generic': 1, 'Exploits': 2, 'Fuzzers': 3, 'DoS': 4,
        'Reconnaissance': 5, 'Analysis': 6, 'Backdoor': 7, 'Shellcode': 8, 'Worms': 9
    }
    train_data['attack_cat'] = train_data['attack_cat'].map(attack_mapping)
    test_data['attack_cat'] = test_data['attack_cat'].map(attack_mapping)

    # Check class distribution
    class_counts = train_data['attack_cat'].value_counts().sort_index()
    print("Class distribution after encoding:")
    for class_id, count in class_counts.items():
        print(f"  Class {class_id}: {count} samples ({100*count/len(train_data):.2f}%)")

    # 6. Advanced feature transformation for skewed features
    print("\n🔍 Applying transformations for skewed numerical features...")
    for col in numerical_cols:
        # Check if data is significantly skewed
        skewness = train_data[col].skew()
        if abs(skewness) > 1:  # If moderately or highly skewed
            # Apply log transformation (adding a constant to handle zeros/negatives)
            train_min = train_data[col].min()
            offset = 1 - min(0, train_min)  # Ensure all values are positive

            train_data[col] = np.log1p(train_data[col] + offset)
            test_data[col] = np.log1p(test_data[col] + offset)
            print(f"  Applied log transform to {col} (skewness: {skewness:.2f})")

    # 7. Encoding categorical features with enhanced handling
    print("\n🔍 Encoding categorical features...")
    # Use OneHotEncoder with improved handling for test data
    if len(categorical_cols) > 0:
        encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
        encoded_train = encoder.fit_transform(train_data[categorical_cols])
        encoded_test = encoder.transform(test_data[categorical_cols])

        # Get feature names
        feature_names = encoder.get_feature_names_out(categorical_cols)

        # Create DataFrames with encoded features
        encoded_train_df = pd.DataFrame(encoded_train, columns=feature_names, index=train_data.index)
        encoded_test_df = pd.DataFrame(encoded_test, columns=feature_names, index=test_data.index)

        # Drop original categorical columns and join encoded ones
        train_data = train_data.drop(columns=categorical_cols).reset_index(drop=True)
        test_data = test_data.drop(columns=categorical_cols).reset_index(drop=True)

        train_data = pd.concat([train_data, encoded_train_df.reset_index(drop=True)], axis=1)
        test_data = pd.concat([test_data, encoded_test_df.reset_index(drop=True)], axis=1)

        print(f"  Encoded {len(categorical_cols)} categorical features into {encoded_train.shape[1]} binary features")

    # 8. Feature selection using mutual information
    print("\n🔍 Performing feature selection...")
    # Exclude target and ID columns
    X_train = train_data.drop(columns=['id', 'label', 'attack_cat'])
    y_train = train_data['attack_cat']

    # Use mutual information for selecting most informative features
    k = int(X_train.shape[1] * 0.8)  # Keep top 80% of features
    selector = SelectKBest(mutual_info_classif, k=k)
    selector.fit(X_train, y_train)

    # Get selected feature names
    selected_features = X_train.columns[selector.get_support()].tolist()
    print(f"  Selected {len(selected_features)} features out of {X_train.shape[1]}")

    # Keep only selected features
    features_to_keep = ['id', 'label', 'attack_cat'] + selected_features
    train_data = train_data[features_to_keep]
    test_data = test_data[features_to_keep]

    # Update numerical columns list to reflect selected features only
    numerical_cols = [col for col in selected_features if col in numerical_cols]

    # 9. Enhanced normalization using RobustScaler
    print("\n🔍 Applying robust scaling to numerical features...")
    scaler = RobustScaler()  # More robust to outliers than StandardScaler
    train_data[numerical_cols] = scaler.fit_transform(train_data[numerical_cols])
    test_data[numerical_cols] = scaler.transform(test_data[numerical_cols])

    # 10. Improved outlier handling with Winsorization
    print("\n🔍 Handling outliers with Winsorization...")
    for col in numerical_cols:
        # Use more conservative percentiles for winsorization
        lower_bound = train_data[col].quantile(0.01)
        upper_bound = train_data[col].quantile(0.99)

        # Apply clipping to both train and test data
        train_data[col] = train_data[col].clip(lower=lower_bound, upper=upper_bound)
        test_data[col] = test_data[col].clip(lower=lower_bound, upper=upper_bound)

    # 11. Enhanced SMOTE for imbalanced data
    print("\n🔍 Applying SMOTE with improved parameters...")
    # Prepare data for SMOTE
    X_train = train_data.drop(columns=['id', 'label', 'attack_cat'])
    y_train = train_data['attack_cat']

    # Print class distribution before SMOTE
    print("  Class distribution before SMOTE:")
    for class_id, count in y_train.value_counts().sort_index().items():
        print(f"    Class {class_id}: {count} samples ({100*count/len(y_train):.2f}%)")

    # Apply SMOTE with improved parameters
    smote = SMOTE(random_state=42, k_neighbors=7, sampling_strategy='auto')
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

    # Print class distribution after SMOTE
    print("  Class distribution after SMOTE:")
    for class_id, count in pd.Series(y_train_resampled).value_counts().sort_index().items():
        print(f"    Class {class_id}: {count} samples ({100*count/len(y_train_resampled):.2f}%)")

    # Create new DataFrame with resampled data
    train_data_resampled = pd.DataFrame(X_train_resampled, columns=X_train.columns)
    train_data_resampled['attack_cat'] = y_train_resampled
    train_data_resampled['id'] = range(len(train_data_resampled))
    train_data_resampled['label'] = train_data_resampled['attack_cat'] != 0  # Binary label (0=Normal)

    # 12. Check for any remaining issues
    print("\n🔍 Final data quality check...")
    # Check for infinities
    train_data_resampled = train_data_resampled.replace([np.inf, -np.inf], np.nan)
    test_data = test_data.replace([np.inf, -np.inf], np.nan)

    # Check for NaN and fill if any
    if train_data_resampled.isnull().sum().sum() > 0:
        print(f"  Found {train_data_resampled.isnull().sum().sum()} NaN values in training data. Filling with column medians.")
        for col in train_data_resampled.columns:
            if train_data_resampled[col].isnull().sum() > 0:
                if train_data_resampled[col].dtype in ['int64', 'float64']:
                    train_data_resampled[col] = train_data_resampled[col].fillna(train_data_resampled[col].median())

    if test_data.isnull().sum().sum() > 0:
        print(f"  Found {test_data.isnull().sum().sum()} NaN values in test data. Filling with column medians.")
        for col in test_data.columns:
            if test_data[col].isnull().sum() > 0:
                if test_data[col].dtype in ['int64', 'float64']:
                    test_data[col] = test_data[col].fillna(test_data[col].median())

    # 13. Prepare data in PyTorch format
    print("\n🔍 Preparing data for PyTorch...")
    # Extract features and targets
    X_train_final = train_data_resampled.drop(columns=['id', 'label', 'attack_cat'])
    y_train_final = train_data_resampled['attack_cat']
    X_test_final = test_data.drop(columns=['id', 'label', 'attack_cat'])
    y_test_final = test_data['attack_cat']

    # Convert to tensors
    X_train_tensor = torch.tensor(X_train_final.values, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train_final.values, dtype=torch.long)
    X_test_tensor = torch.tensor(X_test_final.values, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test_final.values, dtype=torch.long)

    # Create TensorDatasets
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    # 14. Create DataLoaders with optimized parameters
    batch_size = 64  # Adjust based on available memory
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=2,
        pin_memory=True,
        drop_last=False
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=2,
        pin_memory=True,
        drop_last=False
    )

    print(f"\n✅ Preprocessing complete!")
    print(f"  Final training set: {len(train_dataset)} samples with {X_train_final.shape[1]} features")
    print(f"  Final test set: {len(test_dataset)} samples with {X_test_final.shape[1]} features")

    # 15. Visualize class distribution
    try:
        plt.figure(figsize=(12, 5))

        plt.subplot(1, 2, 1)
        train_class_counts = pd.Series(y_train_final).value_counts().sort_index()
        plt.bar(train_class_counts.index.astype(str), train_class_counts.values)
        plt.title('Training Data Class Distribution')
        plt.xlabel('Attack Category')
        plt.ylabel('Count')
        plt.xticks(rotation=45)

        plt.subplot(1, 2, 2)
        test_class_counts = pd.Series(y_test_final).value_counts().sort_index()
        plt.bar(test_class_counts.index.astype(str), test_class_counts.values)
        plt.title('Test Data Class Distribution')
        plt.xlabel('Attack Category')
        plt.ylabel('Count')
        plt.xticks(rotation=45)

        plt.tight_layout()
        plt.savefig('class_distribution.png')
        plt.show()
    except:
        print("  Could not create visualization (possibly running in non-graphical environment)")

    # Return processed data and PyTorch loaders
    return {
        'train_data': train_data_resampled,
        'test_data': test_data,
        'X_train': X_train_final,
        'y_train': y_train_final,
        'X_test': X_test_final,
        'y_test': y_test_final,
        'train_loader': train_loader,
        'test_loader': test_loader,
        'input_dim': X_train_final.shape[1],
        'num_classes': len(np.unique(y_train_final))
    }

# Example usage
if __name__ == "__main__":
    # This section runs when the script is executed directly
    print("This is a preprocessing module for IoT Network Anomaly Detection.")
    print("Import this module and call the complete_improved_preprocessing function.")

This is a preprocessing module for IoT Network Anomaly Detection.
Import this module and call the complete_improved_preprocessing function.


In [3]:
# Improved CNN Model with Residual Connections
class ImprovedCNN(nn.Module):
    def __init__(self, input_dim, output_dim, dropout_rate=0.4):
        super(ImprovedCNN, self).__init__()

        # Initial convolutional layer
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=128, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(128)
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(dropout_rate)

        # Residual block 1
        self.conv2a = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.bn2a = nn.BatchNorm1d(256)
        self.conv2b = nn.Conv1d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.bn2b = nn.BatchNorm1d(256)
        # Skip connection
        self.skip_conn1 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=1)

        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.dropout2 = nn.Dropout(dropout_rate)

        # Residual block 2
        self.conv3a = nn.Conv1d(in_channels=256, out_channels=512, kernel_size=3, padding=1)
        self.bn3a = nn.BatchNorm1d(512)
        self.conv3b = nn.Conv1d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
        self.bn3b = nn.BatchNorm1d(512)
        # Skip connection
        self.skip_conn2 = nn.Conv1d(in_channels=256, out_channels=512, kernel_size=1)

        self.pool3 = nn.AdaptiveMaxPool1d(output_size=1)
        self.dropout3 = nn.Dropout(dropout_rate)

        # Attention mechanism for feature importance
        self.attention = nn.Sequential(
            nn.Linear(512, 128),
            nn.Tanh(),
            nn.Linear(128, 1),
            nn.Softmax(dim=1)
        )

        # Fully connected layers
        self.fc1 = nn.Linear(512, 256)
        self.bn_fc1 = nn.BatchNorm1d(256)
        self.dropout_fc1 = nn.Dropout(dropout_rate)

        self.fc2 = nn.Linear(256, 128)
        self.bn_fc2 = nn.BatchNorm1d(128)
        self.dropout_fc2 = nn.Dropout(dropout_rate)

        self.fc3 = nn.Linear(128, output_dim)

    def forward(self, x):
        # Initial layer
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.leaky_relu(x, negative_slope=0.1)
        x = self.pool1(x)
        x = self.dropout1(x)

        # Residual block 1
        residual = self.skip_conn1(x)
        x = self.conv2a(x)
        x = self.bn2a(x)
        x = F.leaky_relu(x, negative_slope=0.1)
        x = self.conv2b(x)
        x = self.bn2b(x)
        # Add residual connection
        x = x + residual
        x = F.leaky_relu(x, negative_slope=0.1)
        x = self.pool2(x)
        x = self.dropout2(x)

        # Residual block 2
        residual = self.skip_conn2(x)
        x = self.conv3a(x)
        x = self.bn3a(x)
        x = F.leaky_relu(x, negative_slope=0.1)
        x = self.conv3b(x)
        x = self.bn3b(x)
        # Add residual connection
        x = x + residual
        x = F.leaky_relu(x, negative_slope=0.1)
        x = self.pool3(x)
        x = self.dropout3(x)

        # Flatten
        x = x.view(x.size(0), -1)

        # Apply attention
        att_weights = self.attention(x)
        x = x * att_weights

        # Fully connected layers
        x = self.fc1(x)
        x = self.bn_fc1(x)
        x = F.leaky_relu(x, negative_slope=0.1)
        x = self.dropout_fc1(x)

        x = self.fc2(x)
        x = self.bn_fc2(x)
        x = F.leaky_relu(x, negative_slope=0.1)
        x = self.dropout_fc2(x)

        x = self.fc3(x)

        return x


# Improved RBM with enhanced training
class ImprovedRBM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(ImprovedRBM, self).__init__()
        self.W = nn.Parameter(torch.randn(input_size, hidden_size) * 0.01)  # Smaller init for better stability
        self.b = nn.Parameter(torch.zeros(hidden_size))
        self.c = nn.Parameter(torch.zeros(input_size))
        self.input_size = input_size
        self.hidden_size = hidden_size

        # Initialize parameters properly - Xavier/Glorot initialization
        nn.init.xavier_uniform_(self.W)

    def forward(self, v):
        batch_size = v.size(0)

        # Reshape input to 2D if needed
        if v.dim() > 2:
            v = v.view(batch_size, -1)

        # Handle dimension mismatch more gracefully
        if v.size(1) != self.input_size:
            v_resized = torch.zeros(batch_size, self.input_size, device=v.device)
            min_size = min(v.size(1), self.input_size)
            v_resized[:, :min_size] = v[:, :min_size]
            v = v_resized

        # Apply normalization for better stability
        v = F.normalize(v, p=2, dim=1)

        # Propagate visible to hidden with LeakyReLU for better gradients
        h_activation = torch.matmul(v, self.W) + self.b
        h = torch.sigmoid(h_activation)

        return h

    def reconstruct(self, h):
        # Reconstruct visible from hidden
        v_activation = torch.matmul(h, self.W.t()) + self.c
        v_reconstructed = torch.sigmoid(v_activation)
        return v_reconstructed

    def free_energy(self, v):
        # Calculate free energy for monitoring convergence
        wx_b = torch.matmul(v, self.W) + self.b
        hidden_term = torch.sum(F.softplus(wx_b), dim=1)
        visible_term = torch.matmul(v, self.c)
        return -hidden_term - visible_term


# Improved DBN with enhanced architecture
class ImprovedDBN(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim, dropout_rate=0.4):
        super(ImprovedDBN, self).__init__()
        self.input_dim = input_dim
        self.hidden_dims = hidden_dims

        # Create a stack of RBMs with more layers for better feature hierarchy
        self.rbm_layers = nn.ModuleList()

        # First RBM
        self.rbm_layers.append(ImprovedRBM(input_dim, hidden_dims[0]))

        # Additional RBM layers
        for i in range(1, len(hidden_dims)):
            self.rbm_layers.append(ImprovedRBM(hidden_dims[i-1], hidden_dims[i]))

        # Fully connected layers with dropout for regularization
        self.fc_layers = nn.ModuleList()

        # First FC layer from last RBM's output
        self.fc_layers.append(nn.Sequential(
            nn.Linear(hidden_dims[-1], 256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(0.1),
            nn.Dropout(dropout_rate)
        ))

        # Second FC layer
        self.fc_layers.append(nn.Sequential(
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.1),
            nn.Dropout(dropout_rate)
        ))

        # Output layer
        self.output_layer = nn.Linear(128, output_dim)

        # Apply better initialization for fully connected layers
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        # Flatten input if not already flat
        batch_size = x.size(0)
        if x.dim() > 2:
            x = x.view(batch_size, -1)

        # Apply RBM layers sequentially
        h = x
        for rbm in self.rbm_layers:
            h = rbm(h)

        # Apply fully connected layers
        for fc in self.fc_layers:
            h = fc(h)

        # Output layer
        output = self.output_layer(h)

        return output


# Improved Ensemble model with attention mechanisms
class ImprovedEnsembleModel(nn.Module):
    def __init__(self, cnn_model, dbn_model, output_dim):
        super(ImprovedEnsembleModel, self).__init__()
        self.cnn_model = cnn_model
        self.dbn_model = dbn_model

        # Dynamic weighting of CNN and DBN outputs using attention
        self.attention = nn.Sequential(
            nn.Linear(output_dim * 2, 64),
            nn.LeakyReLU(0.1),
            nn.Linear(64, 2),
            nn.Softmax(dim=1)
        )

        # Improved combiner network
        self.combiner = nn.Sequential(
            nn.Linear(output_dim * 2, 256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.3),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.3),

            nn.Linear(128, output_dim)
        )

    def forward(self, x):
        # Save original input for DBN
        x_original = x.clone()

        # Input for CNN (add channel dimension)
        x_cnn = x.unsqueeze(1)

        # Forward pass through CNN and DBN
        cnn_output = self.cnn_model(x_cnn)
        dbn_output = self.dbn_model(x_original)

        # Concatenate outputs
        combined = torch.cat((cnn_output, dbn_output), dim=1)

        # Calculate attention weights
        weights = self.attention(combined)

        # Apply attention weights
        weighted_cnn = cnn_output * weights[:, 0].unsqueeze(1)
        weighted_dbn = dbn_output * weights[:, 1].unsqueeze(1)

        # Combine weighted outputs
        weighted_combined = torch.cat((weighted_cnn, weighted_dbn), dim=1)

        # Final output through combiner network
        final_output = self.combiner(weighted_combined)

        return final_output

In [4]:
# Advanced training function with additional enhancements
def train_improved_ensemble(model, train_loader, val_loader, test_loader, device, epochs=30):
    print("Starting advanced training process...")

    # 1. Learning rate scheduler for better optimization
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=3, verbose=True, min_lr=1e-6
    )

    # 2. Use label smoothing for better generalization
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

    # Tracking metrics
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []
    best_val_acc = 0.0
    best_val_loss = float('inf')
    patience_counter = 0

    # For confusion matrix visualization
    from sklearn.metrics import confusion_matrix, classification_report

    # Training loop with early stopping
    for epoch in range(epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        # Use tqdm for progress visualization if available
        try:
            from tqdm import tqdm
            loader_iterator = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
        except ImportError:
            loader_iterator = train_loader

        for inputs, labels in loader_iterator:
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            optimizer.zero_grad()
            outputs = model(inputs)

            # Calculate loss
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()

            # Gradient clipping to prevent exploding gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            optimizer.step()

            # Update statistics
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        # Calculate epoch metrics
        epoch_train_loss = running_loss / len(train_loader)
        epoch_train_acc = 100.0 * correct / total
        train_losses.append(epoch_train_loss)
        train_accs.append(epoch_train_acc)

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        all_preds = []
        all_targets = []

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = model(inputs)

                # Calculate loss
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                # Calculate accuracy
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

                # Save predictions and targets for confusion matrix
                all_preds.extend(predicted.cpu().numpy())
                all_targets.extend(labels.cpu().numpy())

        # Calculate validation metrics
        epoch_val_loss = val_loss / len(val_loader)
        epoch_val_acc = 100.0 * val_correct / val_total
        val_losses.append(epoch_val_loss)
        val_accs.append(epoch_val_acc)

        # Update learning rate based on validation loss
        scheduler.step(epoch_val_loss)

        # Print epoch results
        print(f"Epoch {epoch+1}/{epochs} | "
              f"Train Loss: {epoch_train_loss:.4f} | Train Acc: {epoch_train_acc:.2f}% | "
              f"Val Loss: {epoch_val_loss:.4f} | Val Acc: {epoch_val_acc:.2f}%")

        # Check if this is the best model so far
        if epoch_val_acc > best_val_acc:
            best_val_acc = epoch_val_acc
            best_val_loss = epoch_val_loss
            patience_counter = 0

            # Save the best model
            torch.save(model.state_dict(), 'best_improved_ensemble.pt')
            print(f"✅ New best model saved with Val Acc: {epoch_val_acc:.2f}%")

            # Generate confusion matrix for best model
            try:
                import matplotlib.pyplot as plt
                import seaborn as sns

                # Calculate confusion matrix
                cm = confusion_matrix(all_targets, all_preds)
                plt.figure(figsize=(10, 8))
                sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
                plt.title(f'Confusion Matrix - Epoch {epoch+1}')
                plt.xlabel('Predicted')
                plt.ylabel('True')
                plt.savefig(f'confusion_matrix_epoch_{epoch+1}.png')
                plt.close()

                # Print detailed classification report
                print("\nClassification Report:")
                print(classification_report(all_targets, all_preds))
            except Exception as e:
                print(f"Could not generate confusion matrix: {e}")
        else:
            patience_counter += 1

        # Early stopping check
        if patience_counter >= 7:  # Stop if no improvement for 7 epochs
            print(f"Early stopping triggered after {epoch+1} epochs")
            break

    # Final evaluation on test set
    print("\nEvaluating final model on test set...")
    model.load_state_dict(torch.load('best_improved_ensemble.pt'))
    model.eval()
    test_correct = 0
    test_total = 0
    all_test_preds = []
    all_test_targets = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
            all_test_preds.extend(predicted.cpu().numpy())
            all_test_targets.extend(labels.cpu().numpy())

    test_acc = 100.0 * test_correct / test_total
    print(f"Test Accuracy: {test_acc:.2f}%")

    # Generate final confusion matrix and report
    try:
        cm = confusion_matrix(all_test_targets, all_test_preds)
        plt.figure(figsize=(12, 10))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.title('Final Test Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.savefig('final_test_confusion_matrix.png')
        plt.show()

        print("\nFinal Classification Report:")
        print(classification_report(all_test_targets, all_test_preds))
    except Exception as e:
        print(f"Could not generate final confusion matrix: {e}")

    # Plot training history
    plt.figure(figsize=(15, 6))

    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.title('Loss over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)

    plt.subplot(1, 2, 2)
    plt.plot(train_accs, label='Training Accuracy')
    plt.plot(val_accs, label='Validation Accuracy')
    plt.title('Accuracy over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.savefig('training_history.png')
    plt.show()

    return model, test_acc

In [5]:
# Main implementation with all improvements integrated
def run_improved_model(train_data, test_data):
    print("\n🚀 Starting improved IoT anomaly detection pipeline...")

    # Step 1: Enhanced preprocessing
    print("\n📊 Performing enhanced data preprocessing...")
    # train_data_processed, test_data_processed = improved_preprocessing(train_data, test_data)

    # # Prepare data for model
    # X_train = train_data_processed.drop(columns=['id', 'label', 'attack_cat'])
    # y_train = train_data_processed['attack_cat']
    # X_test = test_data_processed.drop(columns=['id', 'label', 'attack_cat'])
    # y_test = test_data_processed['attack_cat']

    processed_data = complete_improved_preprocessing(train_data, test_data)

    X_train = processed_data['X_train']
    y_train = processed_data['y_train']
    X_test = processed_data['X_test']
    y_test = processed_data['y_test']

    # Step 2: Convert to tensors
    X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
    y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

    # Step 3: Create datasets and data loaders
    # Split training data into train and validation sets
    from sklearn.model_selection import train_test_split

    X_train_final, X_val, y_train_final, y_val = train_test_split(
        X_train_tensor, y_train_tensor, test_size=0.15, random_state=42, stratify=y_train_tensor
    )

    # Create data loaders with appropriate batch sizes
    batch_size = 128  # Larger batch size for faster training

    train_dataset = TensorDataset(X_train_final, y_train_final)
    val_dataset = TensorDataset(X_val, y_val)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

    print(f"Train set: {len(train_dataset)} samples")
    print(f"Validation set: {len(val_dataset)} samples")
    print(f"Test set: {len(test_dataset)} samples")

    # Step 4: Check device and set up
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"\n🔧 Using device: {device}")

    # Step 5: Create improved models
    input_dim = X_train.shape[1]
    num_classes = len(torch.unique(y_train_tensor))

    # CNN with improved architecture
    cnn_model = ImprovedCNN(input_dim=input_dim, output_dim=num_classes)

    # DBN with multiple hidden layers
    dbn_model = ImprovedDBN(
        input_dim=input_dim,
        hidden_dims=[512, 384, 256],  # Deeper architecture
        output_dim=num_classes
    )

    # Ensemble model
    ensemble_model = ImprovedEnsembleModel(cnn_model, dbn_model, output_dim=num_classes)
    ensemble_model = ensemble_model.to(device)

    # Print model summary if pytorch_model_summary is available
    try:
        from pytorch_model_summary import summary
        print("\n📋 Model Architecture Summary:")
        print(summary(ensemble_model, torch.zeros((1, input_dim)).to(device), show_input=True))
    except ImportError:
        print("\nCould not print model summary. Install pytorch_model_summary for detailed architecture view.")

    # Step 6: Train the model with improved training loop
    print("\n🏋️‍♀️ Starting advanced training process...")
    trained_model, test_accuracy = train_improved_ensemble(
        model=ensemble_model,
        train_loader=train_loader,
        val_loader=val_loader,
        test_loader=test_loader,
        device=device,
        epochs=30  # Train for more epochs with early stopping
    )

    print(f"\n🏁 Final test accuracy: {test_accuracy:.2f}%")
    return trained_model, test_accuracy

In [6]:
# Import the improved modules
# First, save the code snippets above as Python files

# Example usage:
if __name__ == "__main__":
    # Load your data
    # Assuming train_data and test_data are already loaded from your CSV files

    train_data = pd.read_csv("/content/drive/MyDrive/ANOMALI DING DING/UNSW_NB15_training-set.csv")
    test_data = pd.read_csv("/content/drive/MyDrive/ANOMALI DING DING/UNSW_NB15_testing-set.csv")

    # Run the improved model
    trained_model, accuracy = run_improved_model(train_data, test_data)

    print(f"Final model accuracy: {accuracy:.2f}%")

    # Save the final model
    # torch.save(trained_model.state_dict(), "final_anomaly_detection_model.pt")
    # print("Model saved successfully!")


🚀 Starting improved IoT anomaly detection pipeline...

📊 Performing enhanced data preprocessing...

🔍 Starting enhanced preprocessing pipeline...

📊 Dataset Overview:
Training data shape: (82332, 45)
Testing data shape: (175341, 45)
Missing values - Training: 0, Testing: 0

🔍 Handling missing target values...
After dropping rows with missing targets - Train: (82332, 45), Test: (175341, 45)

🔍 Handling remaining missing values...

🔍 Removing duplicate entries...
After removing duplicates - Train: (82332, 45), Test: (175341, 45)

🔍 Encoding target variable...
Class distribution after encoding:
  Class 0: 37000 samples (44.94%)
  Class 1: 18871 samples (22.92%)
  Class 2: 11132 samples (13.52%)
  Class 3: 6062 samples (7.36%)
  Class 4: 4089 samples (4.97%)
  Class 5: 3496 samples (4.25%)
  Class 6: 677 samples (0.82%)
  Class 7: 583 samples (0.71%)
  Class 8: 378 samples (0.46%)
  Class 9: 44 samples (0.05%)

🔍 Applying transformations for skewed numerical features...
  Applied log tran

KeyError: "['attack_cat'] not found in axis"