### Unsupervised GRU-VAE training  

In [17]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import numpy as np # For standard deviation calculation
from modbus import ModbusDataset,CSVDataset
from sklearn.metrics import f1_score, accuracy_score
import torch.optim as optim
import pickle
import os 
from torch.utils.data import DataLoader

def compute_threshold(mse_values):
    """
    Computes the anomaly detection threshold (for marking sample as Intrusion if the IS was greater )
    based on the mean and standard deviation of Mean Squared Error (MSE) values.
    Formula: thr = mean(MSE) + std(MSE)

    Args:
        mse_values (torch.Tensor or list/np.array): A tensor or list of MSE values
                                                    obtained from the validation set.

    Returns:
        float: The calculated threshold.
    """
    if not isinstance(mse_values, torch.Tensor):
        mse_values = torch.tensor(mse_values, dtype=torch.float32)

    if mse_values.numel() == 0:
        return 0.0 
    mean_mse = torch.mean(mse_values)
    std_mse = torch.std(mse_values)

    threshold = mean_mse + std_mse
    return threshold.item() 




In [18]:

# AutoEncoder (AE)
class AE(nn.Module):
    """
    Encoder: (87-64-32)
    Decoder: (32-64-87)
    """
    def __init__(self):
        super(AE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(87, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 87),
            nn.ReLU()
        )

    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon

# Variational AutoEncoder (VAE)
class VAE(nn.Module):
    """
    Encoder: (87-64-64-32 for mu and log_var)
    Decoder: (32-64-64-87)
    return x_recon, mu, logvar
    """
    def __init__(self):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(87, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU()
        )
        self.fc_mu = nn.Linear(64, 32)
        self.fc_logvar = nn.Linear(64, 32)
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 87),
            nn.ReLU()
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        h = self.encoder(x)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        z = self.reparameterize(mu, logvar)
        x_recon = self.decoder(z)
        return x_recon, mu, logvar


# Adversarial AutoEncoder (AAE)
class AAE(nn.Module):
    """
    Encoder:(87-16-4-2)
    Decoder: (2-4-16-87)
    Discriminator: (16-4-2)
    Activation: LeakyReLU
    """
    def __init__(self):
        super(AAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(87, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 4),
            nn.LeakyReLU(),
            nn.Linear(4, 2),
            nn.LeakyReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(2, 4),
            nn.LeakyReLU(),
            nn.Linear(4, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 87),
            nn.LeakyReLU()
        )
        self.discriminator = nn.Sequential(
            nn.Linear(2, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 4),
            nn.LeakyReLU(),
            nn.Linear(4, 2), # Output for binary classification (real/fake)
            nn.Sigmoid()
        )

    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon, z

    def discriminate(self, z):
        return self.discriminator(z)
# GRU-VAE
class GRUVAE(nn.Module):
    """
    Gated Recurrent Unit : num_layers=2, hidden_size=256, dropout=0.01,window size (seq_len)= 40
    """
    def __init__(self, input_dim=87, hidden_dim=256, latent_dim=32, num_layers=2, dropout=0.01):
        super(GRUVAE, self).__init__()
        self.encoder_gru = nn.GRU(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            dropout=dropout,
            batch_first=True
        )
        self.fc_mu = nn.Linear(hidden_dim, latent_dim)
        self.fc_logvar = nn.Linear(hidden_dim, latent_dim)
        self.fc_z_to_hidden = nn.Linear(latent_dim, hidden_dim)
        self.decoder_gru = nn.GRU(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            dropout=dropout,
            batch_first=True
        )
        self.fc_out = nn.Linear(hidden_dim, input_dim)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        # x shape: [batch_size, seq_len, input_dim=87]
        _, hidden = self.encoder_gru(x)  # hidden: [num_layers, batch_size, hidden_dim] h0 Defaults to zeros (not provided).
        h = hidden[-1]  # [batch_size, hidden_dim]
        mu = self.fc_mu(h)  # [batch_size, latent_dim]
        logvar = self.fc_logvar(h)  # [batch_size, latent_dim]
        z = self.reparameterize(mu, logvar)  # [batch_size, latent_dim]
        h0 = self.fc_z_to_hidden(z).unsqueeze(0).repeat(self.encoder_gru.num_layers, 1, 1)  # [num_layers, batch_size, hidden_dim]
        # Initialize decoder input with zeros 
        decoder_input = torch.zeros_like(x)
        output, _ = self.decoder_gru(decoder_input, h0)  # [batch_size, seq_len, hidden_dim]
        x_recon = self.fc_out(output)  # [batch_size, seq_len, input_dim]
        return x_recon, mu, logvar

In [19]:
dataset_directory = "./dataset" # change this to the folder contain benign and attack subdirs
modbus = ModbusDataset(dataset_directory,"ready")
modbus.summary_print()

 The CIC Modbus Dataset contains network (pcap) captures and attack logs from a simulated substation network.
                The dataset is categorized into two groups: an attack dataset and a benign dataset
                The attack dataset includes network traffic captures that simulate various types of Modbus protocol attacks in a substation environment.
                The attacks are reconnaissance, query flooding, loading payloads, delay response, modify length parameters, false data injection, stacking Modbus frames, brute force write and baseline replay.
                These attacks are based of some techniques in the MITRE ICS ATT&CK framework.
                On the other hand, the benign dataset consists of normal network traffic captures representing legitimate Modbus communication within the substation network.
                The purpose of this dataset is to facilitate research, analysis, and development of intrusion detection systems, anomaly detection algorithms and

In [20]:

def load_scalers(scaler_dir='fitted_scalers'):
    """
    Loads fitted scaler models from disk.

    Args:
        scaler_dir (str, optional): The directory where scalers are saved. Defaults to 'fitted_scalers'.

    Returns:
        dict: A dictionary containing the loaded scalers, organized by subdirectory.
              Returns an empty dictionary if the directory doesn't exist.
    """
    loaded_scalers = {}
    if not os.path.exists(scaler_dir):
        print(f"Scaler directory '{scaler_dir}' not found.")
        return loaded_scalers

    for subdir_name in os.listdir(scaler_dir):
        subdir_path = os.path.join(scaler_dir, subdir_name)
        if os.path.isdir(subdir_path):
            try:
                min_max_path = os.path.join(subdir_path, 'min_max_scalers.pkl')
                standard_path = os.path.join(subdir_path, 'standard_scalers.pkl')

                if os.path.exists(min_max_path) and os.path.exists(standard_path):
                    # In the load_scalers function, replace joblib.load() with:
                    with open(min_max_path, 'rb') as f:
                        min_max_scalers = pickle.load(f)
                    with open(standard_path, 'rb') as f:
                        standard_scalers = pickle.load(f)
                    loaded_scalers[subdir_name] = {
                        'min_max_scalers': min_max_scalers,
                        'standard_scalers': standard_scalers
                    }
                    print(f"Successfully loaded scalers for '{subdir_name}'")
            except Exception as e:
                print(f"Could not load scalers for '{subdir_name}'. Error: {e}")
                
    return loaded_scalers


In [21]:
# In the load_scalers function, replace joblib.load() with:
print("\n--- Loading scalers from disk ---")
loaded_scalers = load_scalers("fitted_scalers")

# #Example of iterating
# for sequences, labels in lstm_dataloader:
#     print(f"Batch of sequences shape: {sequences.shape}")
#     print(f"Batch of labels shape: {labels.shape}")
#     # Your LSTM model training logic here...
#     break



--- Loading scalers from disk ---
Successfully loaded scalers for 'network-wide'


In [22]:
#Example of iterating
lstm_dataset=CSVDataset(
    shuffle=False,chunk_size=1,batch_size=64,csv_files=modbus.dataset["benign_dataset_dir"][0:2],scalers=loaded_scalers['network-wide']['min_max_scalers'],window_size=1
)

lstm_dataloader=DataLoader(lstm_dataset,batch_size=1,shuffle=False)

In [23]:
print(len(lstm_dataloader))

4779


In [None]:
import time
time_1 = time.time()
for epoch in range(2):
    print("time",time.time()-time_1 )
    i =0
    time_1 = time.time()
    for sequences, labels in lstm_dataloader:
        # print(f"Batch of sequences shape: {sequences.shape}")
        # print(f"Batch of labels shape: {labels.shape}")
        # # Your LSTM model training logic here...
        # i+=1
        # print(i)
        pass
    print("time",time.time()-time_1,i)


time 6.413459777832031e-05
here 0 64
Batch of sequences shape: torch.Size([1, 64, 89])
Batch of labels shape: torch.Size([1, 64])
1
here 64 128
Batch of sequences shape: torch.Size([1, 64, 89])
Batch of labels shape: torch.Size([1, 64])
2
here 128 192
Batch of sequences shape: torch.Size([1, 64, 89])
Batch of labels shape: torch.Size([1, 64])
3
here 192 256
Batch of sequences shape: torch.Size([1, 64, 89])
Batch of labels shape: torch.Size([1, 64])
4
here 256 320
Batch of sequences shape: torch.Size([1, 64, 89])
Batch of labels shape: torch.Size([1, 64])
5
here 320 384
Batch of sequences shape: torch.Size([1, 64, 89])
Batch of labels shape: torch.Size([1, 64])
6
here 384 448
Batch of sequences shape: torch.Size([1, 64, 89])
Batch of labels shape: torch.Size([1, 64])
7
here 448 512
Batch of sequences shape: torch.Size([1, 64, 89])
Batch of labels shape: torch.Size([1, 64])
8
here 512 576
Batch of sequences shape: torch.Size([1, 64, 89])
Batch of labels shape: torch.Size([1, 64])
9
here 

KeyboardInterrupt: 