In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.metrics import f1_score, accuracy_score, recall_score

# Load the dataset
df = pd.read_csv("preProcessed_FeatureClean_AttackTypes.csv")

# Drop non-numeric columns if needed (assuming 'Label' is the target)
features = df.drop(columns=['Label'])
target = df['Label']

# Normalize the data
scaler = StandardScaler()
X = scaler.fit_transform(features)

# Convert data to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)

# Split into train and validation sets
X_train, X_val = train_test_split(X, test_size=0.2, random_state=42)

# Create DataLoaders for parallel processing
train_dataset = TensorDataset(X_train)
val_dataset = TensorDataset(X_val)
num_workers = 64  # Number of workers for DataLoader

# Define the auto-encoder architecture
class Autoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim1),
            nn.ReLU(),
            nn.Linear(hidden_dim1, hidden_dim2),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dim2, hidden_dim1),
            nn.ReLU(),
            nn.Linear(hidden_dim1, input_dim)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Define parameter grid for hyperparameter tuning
param_grid = {
    'hidden_dim1': [64, 128],
    'hidden_dim2': [32, 64],
    'learning_rate': [1e-3, 1e-4],
    'batch_size': [32, 64]
}

# Define loss function
loss_fn = nn.MSELoss()

# Initialize results
best_score = float('inf')
best_params = None
best_model = None

# Perform grid search
for params in ParameterGrid(param_grid):
    # Initialize the model with given parameters
    model = Autoencoder(input_dim=X.shape[1], hidden_dim1=params['hidden_dim1'], hidden_dim2=params['hidden_dim2'])
    optimizer = Adam(model.parameters(), lr=params['learning_rate'])
    batch_size = params['batch_size']
    
    # Create DataLoader instances with multiple workers
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    # Training loop
    num_epochs = 20
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        for batch in train_loader:
            batch = batch[0]  # Extract tensor from dataset tuple
            optimizer.zero_grad()
            outputs = model(batch)
            loss = loss_fn(outputs, batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                batch = batch[0]
                val_outputs = model(batch)
                val_loss += loss_fn(val_outputs, batch).item()
        val_loss /= len(val_loader)
        
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}")

    # Save the best model based on validation loss
    if val_loss < best_score:
        best_score = val_loss
        best_params = params
        best_model = model.state_dict()

# Print best parameters and best score
print("Best Parameters:", best_params)
print("Best Validation Loss:", best_score)

# Load the best model
model.load_state_dict(best_model)

# Evaluate the model on validation data
model.eval()
with torch.no_grad():
    val_outputs = model(X_val)
    val_loss = loss_fn(val_outputs, X_val).item()
print("Final Validation Loss:", val_loss)

Epoch 1/20, Loss: 43676.6209, Val Loss: 0.0933
Epoch 2/20, Loss: 27124.4065, Val Loss: 0.0977
Epoch 3/20, Loss: 24854.3658, Val Loss: 0.0822
Epoch 4/20, Loss: 22832.9364, Val Loss: 0.0833
Epoch 5/20, Loss: 26678.8785, Val Loss: 0.0813
Epoch 6/20, Loss: 26477.5563, Val Loss: 0.1089
Epoch 7/20, Loss: 20810.0554, Val Loss: 0.0859
Epoch 8/20, Loss: 23368.6646, Val Loss: 0.0873
Epoch 9/20, Loss: 21424.4339, Val Loss: 0.0530
Epoch 10/20, Loss: 23758.5531, Val Loss: 0.0674
