In [None]:
import pandas as pd
import numpy as np

In [None]:
# Loading the dataset
data1 = pd.read_csv('MachineLearningCVE/Monday-WorkingHours.pcap_ISCX.csv')
data2 = pd.read_csv('MachineLearningCVE/Tuesday-WorkingHours.pcap_ISCX.csv')
data3 = pd.read_csv('MachineLearningCVE/Wednesday-workingHours.pcap_ISCX.csv')
data4 = pd.read_csv('MachineLearningCVE/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv')
data5 = pd.read_csv('MachineLearningCVE/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv')
data6 = pd.read_csv('MachineLearningCVE/Friday-WorkingHours-Morning.pcap_ISCX.csv')
data7 = pd.read_csv('MachineLearningCVE/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv')
data8 = pd.read_csv('MachineLearningCVE/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv')

In [None]:
data_list = [data1,data2, data3,data4,data5, data6, data7, data8]

print('Data dimensions: ')
for i, data in enumerate(data_list, start = 1):
  rows, cols = data.shape
  print(f'Data{i} -> {rows} rows, {cols} columns')

In [None]:
df = pd.concat(data_list)
rows, cols = df.shape

print('New dimension:')
print(f'Number of rows: {rows}')
print(f'Number of columns: {cols}')
print(f'Total cells: {rows * cols}')

In [None]:
# Deleting dataframes after concating to save memory
for d in data_list: del d

In [None]:
# Renaming the columns by removing leading/trailing whitespace
col_names = {col: col.strip() for col in df.columns}
df.rename(columns = col_names, inplace = True)

In [None]:
print(df.columns)

In [None]:
# Separate features from labels
X = df.drop(columns=['Label'])
y = df['Label']

In [None]:
# Replace ±∞ with NaN
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)

# Suppose the dataset has a 'Label' column
df['Label'] = df['Label'].astype('category').cat.codes

# Separate features & labels
X = df.drop(columns=['Label'])
y = df['Label'].values

print("Features shape:", X.shape)
print("Unique labels in y:", np.unique(y))

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train_val, X_test, y_train_val, y_test = train_test_split(
    X_scaled, y, test_size=0.15, random_state=42, stratify=y
)

val_ratio = 0.15 / 0.85
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=val_ratio, 
    random_state=42, stratify=y_train_val
)

print("Train:", X_train.shape, y_train.shape)
print("Val:  ", X_val.shape, y_val.shape)
print("Test: ", X_test.shape, y_test.shape)


Justification for 70-15-15 split:

70% Training: Enough data to learn robust patterns.

15% Validation: A separate set for hyperparameter tuning (e.g., learning rate, noise multiplier for DP, etc.).

15% Test: Kept strictly for final evaluation. This prevents overfitting to the validation set and provides an unbiased measure of performance.

In [None]:
#%pip install torch opacus

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from opacus import PrivacyEngine

In [None]:
# Convert to PyTorch tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.long)

X_val_t = torch.tensor(X_val, dtype=torch.float32)
y_val_t = torch.tensor(y_val, dtype=torch.long)

X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_t, y_train_t)
val_dataset   = TensorDataset(X_val_t,   y_val_t)
test_dataset  = TensorDataset(X_test_t,  y_test_t)

batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False)


In [None]:
class SimpleMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, num_classes=2):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, num_classes)
        )
    def forward(self, x):
        return self.net(x)

input_dim = X_train.shape[1]
num_classes = len(np.unique(y_train))  # If it's binary, likely 2
model = SimpleMLP(input_dim, hidden_dim=64, num_classes=num_classes)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

# DP hyperparameters
noise_multiplier = 1.0
max_grad_norm = 1.0
epochs = 5
delta = 1e-5

privacy_engine = PrivacyEngine()

model, optimizer, train_loader = privacy_engine.make_private(
    module=model,
    optimizer=optimizer,
    data_loader=train_loader,
    noise_multiplier=noise_multiplier,
    max_grad_norm=max_grad_norm,
)


In [None]:
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0
    
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * data.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == target).sum().item()
        total += target.size(0)
    
    epoch_loss = running_loss / total
    epoch_acc  = correct / total
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}")


In [None]:
from sklearn.metrics import (
    confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
)

def evaluate(model, loader):
    model.eval()
    correct, total = 0, 0
    all_probs = []
    all_labels = []
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            
            # If multi-class, outputs have shape [batch_size, num_classes]
            probs = torch.softmax(outputs, dim=1)[:,1]  # Probability of class=1
            all_probs.append(probs.cpu().numpy())
            all_labels.append(target.cpu().numpy())
            
            _, pred = torch.max(outputs, 1)
            correct += (pred == target).sum().item()
            total += target.size(0)
    acc = correct / total
    return acc, np.concatenate(all_probs), np.concatenate(all_labels)

test_acc, y_prob_test, y_test_true = evaluate(model, test_loader)
print("Test Accuracy:", test_acc)

y_pred_test = (y_prob_test >= 0.5).astype(int)

prec = precision_score(y_test_true, y_pred_test,average='weighted')
rec  = recall_score(y_test_true, y_pred_test,average='weighted')
f1   = f1_score(y_test_true, y_pred_test,average='weighted')
cm   = confusion_matrix(y_test_true, y_pred_test)

print("Precision:", prec)
print("Recall:", rec)
print("F1-Score:", f1)
print("Confusion Matrix:\n", cm)

In [None]:
epsilon = privacy_engine.get_epsilon(delta=delta)
print(f"ε = {epsilon:.2f} for δ = {delta}")

In [None]:
import copy
from torch.utils.data import DataLoader, Subset

def remove_opacus_prefix(state_dict, prefix="_module."):
    """Remove the '_module.' prefix added by Opacus from each key."""
    new_state_dict = {}
    for k, v in state_dict.items():
        # If the key starts with "_module.", strip it off
        if k.startswith(prefix):
            k = k[len(prefix):]
        new_state_dict[k] = v
    return new_state_dict

# 2.1 Federated Learning Setup 

def split_dataset_for_federation(train_dataset, num_clients=3, seed=42):
    """
    Splits the given training dataset into `num_clients` subsets.
    Returns a list of Subset objects.
    """
    np.random.seed(seed)
    data_size = len(train_dataset)
    indices   = np.arange(data_size)
    np.random.shuffle(indices)
    
    # Equal (or almost equal) split
    split_sizes = [data_size // num_clients] * num_clients
    remainder = data_size % num_clients
    for i in range(remainder):
        split_sizes[i] += 1
    
    subsets = []
    start_idx = 0
    for size in split_sizes:
        end_idx = start_idx + size
        subset_indices = indices[start_idx:end_idx]
        subsets.append(Subset(train_dataset, subset_indices))
        start_idx = end_idx
    return subsets


def train_one_client_federated(model, train_data, device, dp_params, local_epochs=1):
    local_model = copy.deepcopy(model)
    local_model.train()

    # Create local dataloader
    local_batch_size = 128
    local_loader = DataLoader(train_data, batch_size=local_batch_size, shuffle=True)
    
    # Define local optimizer & loss
    optimizer = optim.SGD(local_model.parameters(), lr=dp_params['lr'])
    criterion = nn.CrossEntropyLoss()
    

    # Attach local PrivacyEngine for user-level DP
    privacy_engine_local = PrivacyEngine()
    local_model, optimizer, local_loader = privacy_engine_local.make_private(
        module=local_model,
        optimizer=optimizer,
        data_loader=local_loader,
        noise_multiplier=dp_params['noise_multiplier'],
        max_grad_norm=dp_params['max_grad_norm'],
    )
    
    # Local training
    for _ in range(local_epochs):
        for data_batch, target_batch in local_loader:
            data_batch, target_batch = data_batch.to(device), target_batch.to(device)
            optimizer.zero_grad()
            outputs = local_model(data_batch)
            loss = criterion(outputs, target_batch)
            loss.backward()
            optimizer.step()
    
    # Remove the "_module." prefix so that we can load it into the non-wrapped model
    cleaned_state_dict = remove_opacus_prefix(local_model.state_dict())

    # Return the state_dict and number of samples
    return cleaned_state_dict, len(train_data)


def federated_avg(state_dicts, data_counts):
    """
    Performs Federated Averaging (FedAvg).
    state_dicts: list of parameter dictionaries from each client
    data_counts: number of samples for each client, to do weighted averaging
    """
    # Keys in the first state_dict
    global_model_dict = copy.deepcopy(state_dicts[0])
    
    total_data_points = sum(data_counts)
    
    for key in global_model_dict.keys():
        # Weighted sum of the parameter across all clients
        global_model_dict[key] = sum(
            (state_dicts[i][key] * data_counts[i] for i in range(len(state_dicts)))
        ) / total_data_points
    
    return global_model_dict


def federated_training(
    global_model, 
    train_dataset,
    test_loader, 
    device, 
    dp_params,
    num_clients=3,
    global_rounds=3,
    local_epochs=1
):
    """
    High-level loop for Federated Training.
    - Splits train_dataset among `num_clients`
    - For each global round:
        - Each client trains locally (with DP)
        - We average (FedAvg) all client model weights
    - Returns final global model
    """
    # Split dataset
    client_subsets = split_dataset_for_federation(train_dataset, num_clients=num_clients)
    
    # Initialize global model
    fed_model = copy.deepcopy(global_model)
    fed_model.to(device)
    
    for round_idx in range(global_rounds):
        client_state_dicts = []
        client_data_counts = []
        
        # Broadcast global model to each client; train locally with DP
        for client_idx, subset in enumerate(client_subsets):
            state_dict, data_count = train_one_client_federated(
                fed_model, subset, device, dp_params, local_epochs
            )
            client_state_dicts.append(state_dict)
            client_data_counts.append(data_count)
        
        # Average the client models into the new global model
        new_global_dict = federated_avg(client_state_dicts, client_data_counts)
        fed_model.load_state_dict(new_global_dict)
        
        # Evaluate on test set after each global round
        test_acc, y_prob_test, y_test_true = evaluate(fed_model, test_loader)
        print(f"[Round {round_idx+1}/{global_rounds}] Federated Model Test Accuracy: {test_acc:.4f}")
    
    return fed_model


# Federated Training + Comparison

# We will reuse the 'train_dataset' from your code above, but we actually only
# have: train_dataset, val_dataset, and test_dataset.
#
# Here, for demonstration, let's do federated learning on the "train_dataset".
# If you wish, you could also combine train_dataset + val_dataset for more data.

# For convenience, let's combine train_dataset + val_dataset into one big set
# for the federated simulation. Then we'll test on the same test set.
combined_indices = list(range(len(train_dataset))) + [
    len(train_dataset) + i for i in range(len(val_dataset))
]
combined_dataset = torch.utils.data.ConcatDataset([train_dataset, val_dataset])

# We will define new DP parameters for the federated approach
federated_dp_params = {
    'noise_multiplier': 1.0,
    'max_grad_norm': 1.0,
    'lr': 0.01,
    'delta': 1e-5
}

# Create a fresh model (same architecture) for federated training
federated_model = SimpleMLP(input_dim, hidden_dim=64, num_classes=num_classes)

# Number of clients (federated nodes)
num_clients = 3
# Number of global rounds
global_rounds = 3
# Local epochs each client trains per round
local_epochs = 1

# Perform federated training
federated_model = federated_training(
    global_model=federated_model,
    train_dataset=combined_dataset,
    test_loader=test_loader,
    device=device,
    dp_params=federated_dp_params,
    num_clients=num_clients,
    global_rounds=global_rounds,
    local_epochs=local_epochs,
)

# Final evaluation of the federated model
fed_acc, fed_prob_test, fed_test_true = evaluate(federated_model, test_loader)
fed_pred_test = (fed_prob_test >= 0.5).astype(int)

fed_prec = precision_score(fed_test_true, fed_pred_test, average='weighted')
fed_rec  = recall_score(fed_test_true, fed_pred_test, average='weighted')
fed_f1   = f1_score(fed_test_true, fed_pred_test, average='weighted')
fed_cm   = confusion_matrix(fed_test_true, fed_pred_test)

print("\n=== Federated Model (with DP) Final Evaluation ===")
print(f"Accuracy:      {fed_acc:.4f}")
print(f"Precision:     {fed_prec:.4f}")
print(f"Detection Rate (Recall): {fed_rec:.4f}")
print(f"F1-Score:      {fed_f1:.4f}")
print("Confusion Matrix:\n", fed_cm)


# Compare Federated vs Centralized
#
# The centralized model metrics were already printed in your original code:
#   - test_acc, prec, rec, f1, cm
#   - You can reference them to compare with `fed_acc, fed_prec, fed_rec, fed_f1, fed_cm`.
#
# Example comparison print (uncomment if you want them side by side):
print("\n=== Comparison: Centralized vs Federated ===")
print("Centralized Model:")
print(f"  Accuracy:      {test_acc:.4f}")
print(f"  Recall:        {rec:.4f}")
print(f"  Precision:     {prec:.4f}")
print(f"  F1-Score:      {f1:.4f}")

print("Federated Model:")
print(f"  Accuracy:      {fed_acc:.4f}")
print(f"  Recall:        {fed_rec:.4f}")
print(f"  Precision:     {fed_prec:.4f}")
print(f"  F1-Score:      {fed_f1:.4f}")

print("\nDone.")


Adversarial Threats:

Model/Data Poisoning

Malicious clients submit crafted updates or train on manipulated data to degrade or backdoor the global model.

Inference Attacks

Attackers try to recover private information (e.g., membership inference) from aggregated updates.

Free-Rider Attacks

Malicious participants exploit the system’s resources without contributing meaningful data or updates.


-----------------------------


Defenses:

Differential Privacy

Limits per-client leakage by adding noise to gradients.

Robust Aggregation

Methods like Krum, Bulyan, or Trimmed Mean filter out or downweight outlier (potentially poisoned) updates.

Secure Aggregation

Cryptographic techniques ensure server only sees aggregated data, protecting individual updates from exposure.

Anomaly Detection

Screens client updates for suspicious deviations, large gradients, or performance drops on a small validation set.

Trusted Execution Environments (TEEs)

Protect aggregation and model parameters from tampering at server-level.

By combining these measures—especially DP + robust aggregation—you can better secure federated learning against poisoning, privacy leaks, and free-riders while preserving model utility.

3.1 Feature Importance

To identify the top five features contributing to intrusion detection, we can use techniques such as feature-importance scores (e.g., from Random Forest, XGBoost) or SHAP values. Below are five features that commonly play a critical role in detecting cyberattacks:

Source and Destination IP Flow Counts

Helps detect unusual communication patterns, which are common in DDoS attacks.
Packet Size Distribution

Abnormal packet sizes may indicate an attempt to exfiltrate data or flood a network.
Connection Duration

Extremely short or long connections may indicate scanning behavior or persistent infiltration techniques.
Request/Response Ratios

Sudden spikes in request rates could be a sign of brute-force or credential-stuffing attacks.
Entropy of Payload Data

High entropy can suggest encrypted malicious payloads or obfuscated attack traffic.
These features help detect various cyberattacks, such as:

DDoS Attacks: Characterized by abnormally high packet counts and connection requests.
Phishing: Revealed by unusual request-response patterns to malicious domains.
Data Exfiltration: Indicated by large payload sizes over extended connections.
Attackers may also try to recover private information (e.g., membership inference) from aggregated model updates or exploit free-rider attacks, where they leverage the system’s resources without contributing meaningful data.

3.2 Custom Feature Design

To further improve intrusion detection, we propose two additional cybersecurity-relevant features:

Behavioral Anomaly Scores

Extraction: Use statistical baselines for normal network behavior (e.g., clustering techniques) and compute deviations for each connection.
Impact: Improves classification accuracy by detecting novel (zero-day) and adaptive threats.
Temporal Traffic Patterns

Extraction: Perform time-series analysis on log timestamps to detect periodic or time-based attack strategies (e.g., slow brute-force attempts).
Impact: Enhances detection of stealthy attacks that operate under the radar over extended periods.
Defenses

Differential Privacy: Limits per-client data leakage by adding noise to gradients in federated or distributed learning.
Robust Aggregation: Methods like Krum, Bulyan, or Trimmed Mean filter out or downweight outlier (potentially poisoned) updates.
Secure Aggregation: Uses cryptographic techniques so that the server only sees aggregated data, protecting individual updates.

4.1 Real-World Deployment Challenges

Deploying a privacy-preserving ML system in large enterprises comes with several challenges:

Scalability

Issue: Differential privacy (DP) often increases computational overhead, making large-scale deployment resource-intensive.
Solution: Adopt model-compression techniques and distributed computing frameworks to manage computational loads.
Latency Constraints

Issue: DP-SGD and federated learning introduce delays due to added noise and decentralized updates.
Solution: Implement hybrid models that combine local DP training with periodic centralized refinements, balancing performance and efficiency.
Adaptability to Emerging Threats

Issue: Cyber threats evolve quickly, and privacy-preserving models may be slow to adapt.
Solution: Use continual learning techniques with DP, allowing models to learn from new threats while preserving privacy.
By combining advanced feature engineering, robust aggregation methods, and privacy-preserving techniques, organizations can improve their intrusion detection capabilities while maintaining the confidentiality of sensitive data.