### Download and make the dataset ready in Kaggle 


In [1]:

# ## uncomment if The zip file of the dataset isn't downloaded,extraced 
# !pip install gdown
# Copy the link. The file ID is the long string of characters between d/ and /view.
#For example, in the URL https://drive.google.com/file/d/1aBcDeFgHiJkLmNoPqRsTuVwXyZ/view?usp=sharing, 
#the file ID is 1aBcDeFgHiJkLmNoPqRsTuVwXyZ
# !mkdir /kaggle/tmp
# !gdown  1pzXpA5Cz0DJmjRsLxlqRNnJq-kOUvojb -O /kaggle/tmp/Labeled_CICMODBUS2023.zip
# !unzip /kaggle/tmp/Labeled_CICMODBUS2023.zip -d /kaggle/working/

# # ## uncomment if the python modules (modbus.py,utils.py ,...) not cloned  and added to the path 

# !git clone https://github.com/hamid-rd/FLBased-ICS-NIDS.git
# import sys
# # Add the repository folder to the Python path
# repo_path = '/kaggle/working/FLBased-ICS-NIDS'
# repo_input_path = '/kaggle/input/training/FLBased-ICS-NIDS'
# dataset_path = '/kaggle/input/training/'

# for path in {repo_path,repo_input_path,dataset_path}:
#     if path not in sys.path:
#         sys.path.append(path)


In [2]:
# To test if every thing is okay (modbus.py class and correct number of founded csv files )
from modbus import ModbusDataset,ModbusFlowStream

# dataset_directory = "/kaggle/working/ModbusDataset" 
# dataset_directory = "/kaggle/input/training/ModbusDataset" 
dataset_directory = "dataset" 

modbus = ModbusDataset(dataset_directory,"ready")
modbus.summary_print()

# Don't forget to save version in kaggle (to save outputs written on the disk (/kaggle/working/))  

 The CIC Modbus Dataset contains network (pcap) captures and attack logs from a simulated substation network.
                The dataset is categorized into two groups: an attack dataset and a benign dataset
                The attack dataset includes network traffic captures that simulate various types of Modbus protocol attacks in a substation environment.
                The attacks are reconnaissance, query flooding, loading payloads, delay response, modify length parameters, false data injection, stacking Modbus frames, brute force write and baseline replay.
                These attacks are based of some techniques in the MITRE ICS ATT&CK framework.
                On the other hand, the benign dataset consists of normal network traffic captures representing legitimate Modbus communication within the substation network.
                The purpose of this dataset is to facilitate research, analysis, and development of intrusion detection systems, anomaly detection algorithms and

### Unsupervised Autoencoder training  

In [3]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import numpy as np # For standard deviation calculation
from modbus import ModbusDataset,ModbusFlowStream
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix,recall_score
import torch.optim as optim
from torch.utils.data import DataLoader
import time
from utils import load_scalers
import random
from random import SystemRandom
from sklearn.model_selection import train_test_split
import itertools
import torch.nn.init as init
from collections import Counter

def compute_threshold(mse_values,k=1):

    """
    K-SIGMA
    Computes the anomaly detection threshold (for marking sample as Intrusion if the IS was greater )
    based on the mean and standard deviation of Mean Squared Error (MSE) values.
    Formula: thr = mean(MSE) + std(MSE)
    Args:
    mse_values (torch.Tensor or list/np.array): A tensor or list of MSE values

                            obtained from the validation set.
    Returns:
    float: The calculated threshold.
    float: The calculated std.

    """
    if not isinstance(mse_values, torch.Tensor):
        mse_values = torch.tensor(mse_values, dtype=torch.float32)
    if mse_values.numel() == 0:
        return 0.0
    mean_mse = torch.mean(mse_values)
    std_mse = torch.std(mse_values)
    print("-----------mse_loss mean : ",f"{mean_mse.item():.4f}","std:",f"{std_mse.item():.4f}")
    threshold = mean_mse + k*std_mse
    return threshold.item(),std_mse.item()

def vae_loss_function(recon_x, x, mu, logvar,beta=0.05):
    """
    VAE loss function.
    """
    #equivalent to BCE loss
    BCE = nn.functional.mse_loss(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    # print(BCE.item(),KLD.item())
    return (BCE + beta*KLD)

def _init_weights( module):
    ## for one layer apply Xavier Initialization
    if isinstance(module, nn.Linear):
        init.xavier_normal_(module.weight)
        if module.bias is not None:
            init.zeros_(module.bias)
    return module


In [4]:
# dataset_directory = "/kaggle/input/training/ModbusDataset" # change this to the folder contain benign and attack subdirs
# dataset_directory = "dataset" 
dataset_directory = "./ModbusDataset/" 

modbus = ModbusDataset(dataset_directory,"ready")
modbus.summary_print()

 The CIC Modbus Dataset contains network (pcap) captures and attack logs from a simulated substation network.
                The dataset is categorized into two groups: an attack dataset and a benign dataset
                The attack dataset includes network traffic captures that simulate various types of Modbus protocol attacks in a substation environment.
                The attacks are reconnaissance, query flooding, loading payloads, delay response, modify length parameters, false data injection, stacking Modbus frames, brute force write and baseline replay.
                These attacks are based of some techniques in the MITRE ICS ATT&CK framework.
                On the other hand, the benign dataset consists of normal network traffic captures representing legitimate Modbus communication within the substation network.
                The purpose of this dataset is to facilitate research, analysis, and development of intrusion detection systems, anomaly detection algorithms and

In [5]:

# AutoEncoder (AE)
class AE(nn.Module):
    """
    Encoder: (76-32-16-4-2)
    Decoder: (2-4-16-32-76)
    """
    def __init__(self,input_dim=76):
        super(AE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 4),
            nn.ReLU(),
            nn.Linear(4, 2),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(2, 4),
            nn.ReLU(),
            nn.Linear(4, 16),
            nn.ReLU(),
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon


# Variational AutoEncoder (VAE)
class VAE(nn.Module):
    """
    Encoder: (76-16-4-2 for mu and log_var)
    Decoder: (2-4-16-76)
    return x_recon, mu, logvar
    """
    def __init__(self,input_dim=76):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 4),
            nn.ReLU(),
        )
        self.fc_mu = nn.Linear(4, 2)
        self.fc_logvar = nn.Linear(4, 2)
        self.decoder = nn.Sequential(
            nn.Linear(2, 4),
            nn.ReLU(),
            nn.Linear(4, 16),
            nn.ReLU(),
            nn.Linear(16, input_dim),
            nn.Sigmoid()
                    )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        h = self.encoder(x)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        z = self.reparameterize(mu, logvar)
        x_recon = self.decoder(z)
        return x_recon, mu, logvar

    
class AAE_Encoder(nn.Module):
    def __init__(self,input_dim=76):
        """
        Encoder(Generator):(76-16-4-2)
        """
        super(AAE_Encoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.LeakyReLU(0.2),
            nn.Linear(16, 4),
            nn.LeakyReLU(0.2),
            nn.Linear(4, 2))
    def forward(self, x):
        return self.encoder(x)
class AAE_Decoder(nn.Module):
    def __init__(self,input_dim=76):
        super(AAE_Decoder, self).__init__()
        self.decoder = nn.Sequential(
            nn.Linear(2, 4),
            nn.LeakyReLU(),
            nn.Linear(4, 16),
            nn.LeakyReLU(),
            nn.Linear(16, input_dim),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.decoder(x)
class AAE_Discriminator(nn.Module):
    def __init__(self):
        super(AAE_Discriminator, self).__init__()
        # corrected to 2-16-4-1
        self.discriminator = nn.Sequential(
            nn.Linear(2, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 4),
            nn.LeakyReLU(),
            nn.Linear(4, 1), 
            nn.Sigmoid()
        )    
    def forward(self, x):
        return self.discriminator(x)
 
class AdversarialAutoencoder(nn.Module):
    def __init__(self):
        super(AdversarialAutoencoder, self).__init__()
        self.encoder = AAE_Encoder()
        self.decoder = AAE_Decoder()
        self.discriminator = AAE_Discriminator()
    def forward(self, x):
        fake_z = self.encoder(x)
        x_recon = self.decoder(fake_z)
        return fake_z,x_recon


In [6]:
from torch.utils.data import Dataset
class InMemoryDataset(Dataset):
    """A simple dataset that serves data from pre-loaded tensors."""
    def __init__(self, data, labels):
        # This dataset holds references to the data, which should already be in shared memory.
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

### Part a: Centralized learning 

##### You can go from here right to the FL part


In [95]:
def make_dataloader(load_all_chunks=True):

    ## must be called after that the global train_files,val_files and test_files plus loades scalers are initilized
    ## return train, valid and test dataloaders
    if load_all_chunks==True:
        large_chunk_size = modbus.dataset["metadata"]["founded_files_num"]["total_dataset_num"]

        dataset_configs = {
            "train": {"files": train_files},
            "val": {"files": val_files},
            "test": {"files": test_files},
        }
        datasets = {}
        ae_datasets = {}
        dataloaders = {}

        print("Cow Processing datasets...")

        for name, config in dataset_configs.items():
            print(f"  - Creating '{name}' dataset...")
            
            # 1. Create the primary ModbusFlowStream dataset
            datasets[name] = ModbusFlowStream(
                shuffle=False,
                chunk_size=large_chunk_size,
                batch_size=1,
                csv_files=config["files"],
                scalers=loaded_scalers['network-wide']['min_max_scalers'],
                window_size=window_size
            )
            
            # 2. Call __getitem__(0) once to load the entire dataset chunk into memory
            datasets[name].__getitem__(0)

            # 2. Extract the raw tensors
            all_data = datasets[name].current_chunk_data
            all_labels = datasets[name].current_chunk_labels
            
            # all_data.share_memory_()
            # all_labels.share_memory_()
            
            # 4. Create an instance of our SIMPLE dataset using the SHARED tensors.
            shared_dataset = InMemoryDataset(all_data, all_labels)

            # 5. Create the DataLoader from the simple dataset. This will work correctly with workers.
            if name=="train":
                shuffle_samples =True
            else:
                shuffle_samples =False
            # dataloaders[name] = DataLoader(
            #     shared_dataset,
            #     batch_size=64,
            #     shuffle=shuffle_samples,
            #     num_workers=4
            #     persistent_workers=True,
            #     pin_memory=True
            # )
            dataloaders[name] = DataLoader(
                shared_dataset,
                batch_size=64,
                shuffle=shuffle_samples,
                num_workers=0, # You can now use multiple workers effectively.
                persistent_workers=False,
                pin_memory=False
            )
                    
            
            print(f"  - Finished '{name}' dataset.")
        train_dataloader = dataloaders['train']
        val_dataloader = dataloaders['val']
        test_dataloader = dataloaders['test']

    else :
        train_dataloader=DataLoader(ModbusFlowStream( 
            shuffle=True,chunk_size=1,batch_size=64,csv_files=train_files,scalers=loaded_scalers['network-wide']['min_max_scalers'],window_size=window_size
        ),  batch_size=1,shuffle=False)
        val_dataloader=DataLoader(ModbusFlowStream( 
            shuffle=False,chunk_size=1,batch_size=64,csv_files=val_files,scalers=loaded_scalers['network-wide']['min_max_scalers'],window_size=window_size
        ),batch_size=1,shuffle=False)
        test_dataloader=DataLoader(ModbusFlowStream(shuffle=False,chunk_size=1,batch_size=64,csv_files=test_files,scalers=loaded_scalers['network-wide']['min_max_scalers'],window_size=window_size),
                                    batch_size=1,shuffle=False)
    return train_dataloader,val_dataloader,test_dataloader


In [117]:

def train_eval(model,train_dataloader,val_dataloader,test_dataloader,learning_rates= [5e-6,1e-7,5e-5,1e-5,1e-6],
               weight_decays=[1e-5,1e-4,1e-7],shuffle_files=True,num_epochs=20,eval_epoch=4,criterion_method="mse", k_range=[1,3],train_model=True):
    device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model=model.to(device)
    if criterion_method=="bce":
        criterion = nn.BCELoss(reduction='sum').to(device)
        eval_criterion = nn.BCELoss(reduction='none').to(device)
    else: #mse
        criterion = nn.MSELoss(reduction='sum').to(device)
        eval_criterion = nn.MSELoss(reduction='none').to(device)

    for lr, wd in itertools.product(learning_rates, weight_decays):
        if model._get_name()=="AdversarialAutoencoder":
            adversarial_criterion= nn.BCELoss(reduction="sum")
            optimizer_D = optim.Adam(model.discriminator.parameters(), lr=lr, weight_decay=wd)
            optimizer_G =  optim.Adam(list(model.encoder.parameters()) + list(model.decoder.parameters()), lr=lr, weight_decay=wd)
        else:
            AE_optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
            ### new code
            # AE_optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=wd)

        print(f"\n==================  lr={lr}, wd={wd} ==================")
        if train_model==True:
            model.apply(_init_weights)
        for epoch in range(num_epochs):
            if train_model==True:
                time_1 = time.time()
                model.train()
                train_loss = 0
                ## for AAE
                Discriminator_loss = 0
                for sequences, labels in train_dataloader:
                    sequences=sequences.squeeze().to(device)
                    if labels.sum()!=0:
                        continue
                    if model._get_name()=="AdversarialAutoencoder":
                        # 1) generator loss

                        target_ones= torch.ones(sequences.size(0), 1,device=device,dtype=torch.float)
                        target_zeros= torch.zeros(sequences.size(0), 1,device=device,dtype=torch.float)
                        random_latent = torch.randn(sequences.size(0), 2, device=device)
                        optimizer_G.zero_grad()
                        fake_z,decoded_seq = model(sequences)
                        G_loss = 0.001*adversarial_criterion(model.discriminator(fake_z),target_ones ) + 0.999*criterion(decoded_seq, sequences)
                        G_loss.backward()
                        optimizer_G.step()
                        # 2) discriminator loss
                        optimizer_D.zero_grad()
                        real_loss = adversarial_criterion(model.discriminator(random_latent), target_ones)
                        fake_loss = adversarial_criterion(model.discriminator(fake_z.detach()),  target_zeros)
                        D_loss = 0.5*(real_loss + fake_loss)
                        D_loss.backward()
                        optimizer_D.step()
                        train_loss+=G_loss.item()
                        Discriminator_loss+=D_loss.item()   
                    else:
                        AE_optimizer.zero_grad()
                        if model._get_name()=="AE":
                            recon = model(sequences)
                            loss = criterion(recon, sequences) / sequences.size(0)
                        elif model._get_name()=="VAE" :
                            recon, mu, logvar = model(sequences)
                            loss = vae_loss_function(recon, sequences, mu, logvar) /sequences.size(0)
                        loss.backward()
                        AE_optimizer.step()
                        train_loss += loss.item()
                print(f"Train : time {(time.time()-time_1):.2f} s",
                f"Epoch {epoch+1}")
                if model._get_name()=="AdversarialAutoencoder":
                    print(f"Generator Loss: {train_loss / len(train_dataloader):.4f}",
                        f"Discriminator Loss: {Discriminator_loss / len(train_dataloader):.4f}")
                else:
                    print(f"Train Loss: {train_loss / len(train_dataloader):.4f}")
            # Evaluate part
            if (epoch + 1) % eval_epoch == 0:
                model.eval() 
                all_val_losses = []
                all_val_labels = []
                print(f"--- Running Evaluation for Epoch {epoch+1} lr ={lr} wd {wd} ---")
                with torch.no_grad():
                    for sequences, labels in val_dataloader:
                        sequences = sequences.squeeze().to(device) 
                        if criterion_method=="bce":
                            ## may test features be greater than 1 after scaling 
                            sequences=torch.clamp(sequences, min=0.0, max=1.0)      
                        if model._get_name()=="AE":
                            recon = model(sequences)
                        elif model._get_name()=="VAE" :
                            recon, _, _ = model(sequences)
                        elif model._get_name()=="AdversarialAutoencoder":
                            _,recon= model(sequences)
                        val_loss = eval_criterion(recon, sequences)
                        if val_loss.dim() > 1:
                            val_loss = val_loss
                        else:
                            val_loss = val_loss.unsqueeze(dim=0)
                            labels = labels.unsqueeze(dim=0)
                        val_loss = val_loss.sum(dim=1)
                        all_val_losses.extend(val_loss.cpu().numpy())
                        all_val_labels.extend(labels.flatten().cpu().numpy())
                threshold_1,std_mse = compute_threshold(all_val_losses,k=0)

                all_val_losses = np.array(all_val_losses).squeeze()  
                all_val_labels = np.array(all_val_labels).squeeze()  
                # If intrusion score > threshold, predict 1 (intrusion), else 0 (benign)
                # For FDR, get True Positives (TP) and False Positives (FP)
                
                predictions = (all_val_losses > threshold_1).astype(int)

                accuracy = accuracy_score(all_val_labels, predictions)
                print(f"Val: Accuracy: {accuracy:.4f}  ")
                model.eval() 
                all_test_losses = []
                all_test_labels = []
                with torch.no_grad():
                    for sequences, labels in test_dataloader:
                        sequences = sequences.squeeze().to(device)
                        labels = labels.squeeze().to(device)
                        if criterion_method=="bce":
                            ## may test features be greater than 1 after scaling 
                            sequences=torch.clamp(sequences, min=0.0, max=1.0)
                        if model._get_name()=="AE":
                            recon = model(sequences)
                        elif model._get_name()=="VAE"  :
                            recon, mu, logvar = model(sequences)
                        elif model._get_name()=="AdversarialAutoencoder":
                            _,recon= model(sequences)

                        intrusion_scores = eval_criterion(recon, sequences)
                        if intrusion_scores.dim() > 1:
                            intrusion_scores = intrusion_scores
                        else:
                            intrusion_scores = intrusion_scores.unsqueeze(dim=0)
                            labels = labels.unsqueeze(dim=0)
                        if intrusion_scores.dim()==3:
                            ##GRU : mean of window
                            intrusion_scores = intrusion_scores.mean(dim=1)
                        intrusion_scores = intrusion_scores.sum(dim=1)
                        all_test_losses.extend(intrusion_scores.cpu().numpy())
                        all_test_labels.extend(labels.cpu().numpy())

                all_test_losses = np.array(all_test_losses)
                all_test_labels = np.array(all_test_labels)
                for k in k_range:
                    threshold=threshold_1+k*std_mse
                    print(f" K: {k} K-SIGMA Threshold : ---thr {threshold:.4}")
                    predictions = (all_test_losses > threshold).astype(int)
                    binary_test_labels = (all_test_labels != 0).astype(int)

                    # Find the indices where the prediction was incorrect
                    misclassified_indices = np.where(binary_test_labels != predictions)[0]

                    # Get the original labels for those misclassified instances
                    misclassified_original_labels = all_test_labels[misclassified_indices]

                    # To get a summary count of which labels were misclassified
                    print("Counts of : original binary labels",Counter(binary_test_labels),"predicted binary labels",Counter(predictions))
                    print(f"Counts of  original  labels: {dict(sorted(Counter(all_test_labels).items()))}")
                    print(f"Counts of misclassified original labels: {dict(sorted(Counter(misclassified_original_labels).items()))}")
                    accuracy = accuracy_score(binary_test_labels, predictions)
                    f1 = f1_score(binary_test_labels, predictions, zero_division=0)
                    recall = recall_score(binary_test_labels, predictions,zero_division=0)
                    _, fp, _, tp = confusion_matrix(binary_test_labels, predictions, labels=[0, 1]).ravel()
                    # FDR = FP / (FP + TP) 
                    if (fp + tp) == 0:
                        fdr = 0.0 
                    else:
                        fdr = fp / (fp + tp)
                    print(f"Test : Accuracy: {accuracy:.4f} Recall : {recall:.4f} FDR: {fdr:.4f}  F1-score: {f1:.4f}  ")
                    !mkdir best_center_models -p
                    if train_model:
                        save_path ="best_center_models/"+model._get_name()+"_f1_"+f"{f1:.4f}" +"_recall_"+f"{recall:.4f}" +"_.pth"
                        torch.save(model.state_dict(),save_path)
                        print("model",model._get_name(),"is saved in" ,save_path )


#### Train on network-wdie
#### evaluate on comrpomised-scada IED node ied1b during centralized training.

In [None]:
train_files=[col for col in modbus.dataset["benign_dataset_dir"] if col.find("network-wide")!=-1][:]
test_files= [col for col in modbus.dataset["attack_dataset_dir"]["compromised-scada"] if col.find("ied1b")!=-1]


### missed attack logs files for the day 21 for ied1b which can reduce the accuracy.
test_files.remove(dataset_directory+"attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-6-labeled.csv")
# test_files.remove(dataset_directory+"/attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-6-labeled.csv")
SEED=20
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
random.shuffle(train_files)
random.shuffle(test_files)

train_files = train_files[:-3]
val_files = train_files[-3:]
test_files = test_files[:2]
loaded_scalers=load_scalers("fitted_scalers")
print("ied1b comp ied attack ->\n test: ",len(test_files),test_files)
print("----------- Network-wide number of csv files -> \n ----------- train :",len(train_files),train_files,"\n -------- valid:",len(val_files),val_files)

Successfully loaded scalers for 'network-wide'
ied1b comp ied attack ->
 test:  2 ['./ModbusDataset/attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-7-labeled.csv', './ModbusDataset/attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-3-labeled.csv']
----------- Network-wide number of csv files -> 
 ----------- train : 16 ['./ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-27-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-29-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-20-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-19-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-15-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-14-labeled.csv

In [None]:
def make_dataloader(load_all_chunks=True):

    ## must be called if global train_files,val_files and test_files plus loades scalers are initilized
    ## return train, valid and test dataloaders
    if load_all_chunks:
        large_chunk_size = modbus.dataset["metadata"]["founded_files_num"]["total_dataset_num"]

        dataset_configs = {
            "train": {"files": train_files},
            "val": {"files": val_files},
            "test": {"files": test_files},
        }
        datasets = {}
        ae_datasets = {}
        dataloaders = {}

        print("Cow Processing datasets...")

        for name, config in dataset_configs.items():
            print(f"  - Creating '{name}' dataset...")
            
            # 1. Create the primary ModbusFlowStream dataset
            datasets[name] = ModbusFlowStream(
                shuffle=False,
                chunk_size=large_chunk_size,
                batch_size=1,
                csv_files=config["files"],
                scalers=loaded_scalers['network-wide']['min_max_scalers'],
                window_size=window_size
            )
            
            # 2. Call __getitem__(0) once to load the entire dataset chunk into memory
            datasets[name].__getitem__(0)

            # 2. Extract the raw tensors
            all_data = datasets[name].current_chunk_data
            all_labels = datasets[name].current_chunk_labels
            
            # all_data.share_memory_()
            # all_labels.share_memory_()
            
            # 4. Create an instance of our SIMPLE dataset using the SHARED tensors.
            shared_dataset = InMemoryDataset(all_data, all_labels)

            # 5. Create the DataLoader from the simple dataset. This will work correctly with workers.
            if name=="train":
                shuffle_samples =True
            else:
                shuffle_samples =False
            # dataloaders[name] = DataLoader(
            #     shared_dataset,
            #     batch_size=64,
            #     shuffle=shuffle_samples,
            #     num_workers=4
            #     persistent_workers=True,
            #     pin_memory=True
            # )
            dataloaders[name] = DataLoader(
                shared_dataset,
                batch_size=64,
                shuffle=shuffle_samples,
                num_workers=0, # You can now use multiple workers effectively.
                persistent_workers=False,
                pin_memory=False
            )
                    
            
            print(f"  - Finished '{name}' dataset.")
        train_dataloader = dataloaders['train']
        val_dataloader = dataloaders['val']
        test_dataloader = dataloaders['test']

    else :
        train_dataloader=DataLoader(ModbusFlowStream( 
            shuffle=True,chunk_size=1,batch_size=64,csv_files=train_files,scalers=loaded_scalers['network-wide']['min_max_scalers'],window_size=window_size
        ),  batch_size=1,shuffle=False)
        val_dataloader=DataLoader(ModbusFlowStream( 
            shuffle=False,chunk_size=1,batch_size=64,csv_files=val_files,scalers=loaded_scalers['network-wide']['min_max_scalers'],window_size=window_size
        ),batch_size=1,shuffle=False)
        test_dataloader=DataLoader(ModbusFlowStream(shuffle=False,chunk_size=1,batch_size=64,csv_files=test_files,scalers=loaded_scalers['network-wide']['min_max_scalers'],window_size=window_size),
                                    batch_size=1,shuffle=False)
    return train_dataloader,val_dataloader,test_dataloader


In [None]:
train_dataloader,val_dataloader,test_dataloader= make_dataloader(load_all_chunks=False)


In [109]:
t_1=time.time()
for seq,label in train_dataloader:
     pass
print(time.time()-t_1)

33.09274983406067


In [110]:
print(len(train_dataloader),len(val_dataloader),len(test_dataloader))

36920 7193 2174


In [112]:
AE_model = AE(input_dim=76)
train_eval(AE_model,train_dataloader,val_dataloader,test_dataloader,shuffle_files=True,num_epochs=2,eval_epoch=2,criterion_method="mse",learning_rates=[1e-2,1e-3,1e-4,1e-5],weight_decays=[1e-4,1e-5],k_range=[1,3])
# --- Running Evaluation for Epoch 6 lr =0.001 wd 1e-05 ---



Train : time 164.74 s Epoch 1
Train Loss: 0.0511
Train : time 177.71 s Epoch 2
Train Loss: 0.0156
--- Running Evaluation for Epoch 2 lr =0.01 wd 0.0001 ---
-----------mse_loss mean :  0.0093 std: 0.1554
Val: Accuracy: 0.9101  
 K: 1 K-SIGMA Threshold : ---thr 0.1647
Counts of : original binary labels Counter({0: 97486, 1: 41564}) predicted binary labels Counter({0: 102696, 1: 36354})
Counts of  original  labels: {0: 97486, 1: 41306, 2: 59, 3: 40, 4: 55, 5: 40, 6: 36, 7: 28}
Counts of misclassified original labels: {0: 94, 1: 5260, 2: 14, 3: 15, 4: 4, 5: 1, 6: 10}
Test : Accuracy: 0.9612 Recall : 0.8724 FDR: 0.0026  F1-score: 0.9307  
model AE is saved in best_models/AE_f1_0.93_recall_0.87_.pth
 K: 3 K-SIGMA Threshold : ---thr 0.4756
Counts of : original binary labels Counter({0: 97486, 1: 41564}) predicted binary labels Counter({0: 103018, 1: 36032})
Counts of  original  labels: {0: 97486, 1: 41306, 2: 59, 3: 40, 4: 55, 5: 40, 6: 36, 7: 28}
Counts of misclassified original labels: {0:

In [113]:
VAE_model = VAE(input_dim=76)
train_eval(VAE_model,train_dataloader,val_dataloader,test_dataloader,shuffle_files=True,num_epochs=2,eval_epoch=2,criterion_method="mse",learning_rates=[1e-2,1e-3,1e-4,1e-5],weight_decays=[1e-3,1e-4],k_range=[1,3])



Train : time 165.62 s Epoch 1
Train Loss: 0.2089
Train : time 184.23 s Epoch 2
Train Loss: 0.1849
--- Running Evaluation for Epoch 2 lr =0.01 wd 0.001 ---
-----------mse_loss mean :  0.0732 std: 0.4997
Val: Accuracy: 0.8659  
 K: 1 K-SIGMA Threshold : ---thr 0.5729
Counts of : original binary labels Counter({0: 97486, 1: 41564}) predicted binary labels Counter({0: 102985, 1: 36065})
Counts of  original  labels: {0: 97486, 1: 41306, 2: 59, 3: 40, 4: 55, 5: 40, 6: 36, 7: 28}
Counts of misclassified original labels: {0: 102, 1: 5530, 2: 14, 3: 16, 4: 4, 5: 1, 6: 10, 7: 26}
Test : Accuracy: 0.9590 Recall : 0.8652 FDR: 0.0028  F1-score: 0.9265  
model VAE is saved in best_models/VAE_f1_0.93_recall_0.87_.pth
 K: 3 K-SIGMA Threshold : ---thr 1.572
Counts of : original binary labels Counter({0: 97486, 1: 41564}) predicted binary labels Counter({0: 103032, 1: 36018})
Counts of  original  labels: {0: 97486, 1: 41306, 2: 59, 3: 40, 4: 55, 5: 40, 6: 36, 7: 28}
Counts of misclassified original lab


Train : time 165.04 s Epoch 1
Train Loss: 0.3184
Train : time 155.17 s Epoch 2
Train Loss: 0.1613
Train : time 158.38 s Epoch 3
Train Loss: 0.1547
Train : time 168.95 s Epoch 4
Train Loss: 0.1528
Train : time 163.59 s Epoch 5
Train Loss: 0.1508
--- Running Evaluation for Epoch 5 lr =0.001 wd 1e-05 ---
-----------mse_loss mean :  0.0367 std: 0.2408
Val: Accuracy: 0.8536  
 K: 1 K-SIGMA Threshold : ---thr 0.2775
Counts of : original binary labels Counter({0: 529457, 1: 220322}) predicted binary labels Counter({0: 555983, 1: 193796})
Counts of  original  labels: {0: 529457, 1: 218884, 2: 317, 3: 236, 4: 243, 5: 240, 6: 170, 7: 232}
Counts of misclassified original labels: {0: 13336, 1: 39597, 2: 57, 3: 95, 4: 42, 5: 2, 6: 68, 7: 1}
Test : Accuracy: 0.9290 Recall : 0.8191 FDR: 0.0688  F1-score: 0.8715  
model VAE is saved in best_models/VAE_f1_0.87_recall_0.82_.pth
Train : time 168.75 s Epoch 6
Train Loss: 0.1486
Train : time 159.53 s Epoch 7
Train Loss: 0.1466
Train : time 159.93 s Epoch

In [114]:
AAE_model = AdversarialAutoencoder()
train_eval(AAE_model,train_dataloader,val_dataloader,test_dataloader,shuffle_files=True,num_epochs=2,eval_epoch=2,criterion_method="mse",learning_rates=[1e-2,1e-3,1e-4,1e-5],weight_decays=[1e-3,1e-4],k_range=[1,3])



Train : time 260.97 s Epoch 1
Generator Loss: 2.7325 Discriminator Loss: 25.2167
Train : time 260.60 s Epoch 2
Generator Loss: 0.8408 Discriminator Loss: 26.6410
--- Running Evaluation for Epoch 2 lr =0.01 wd 0.001 ---
-----------mse_loss mean :  0.0046 std: 0.0674
Val: Accuracy: 0.9078  
 K: 1 K-SIGMA Threshold : ---thr 0.07198
Counts of : original binary labels Counter({0: 97486, 1: 41564}) predicted binary labels Counter({0: 102696, 1: 36354})
Counts of  original  labels: {0: 97486, 1: 41306, 2: 59, 3: 40, 4: 55, 5: 40, 6: 36, 7: 28}
Counts of misclassified original labels: {0: 94, 1: 5260, 2: 14, 3: 15, 4: 4, 5: 1, 6: 10}
Test : Accuracy: 0.9612 Recall : 0.8724 FDR: 0.0026  F1-score: 0.9307  
model AdversarialAutoencoder is saved in best_models/AdversarialAutoencoder_f1_0.93_recall_0.87_.pth
 K: 3 K-SIGMA Threshold : ---thr 0.2068
Counts of : original binary labels Counter({0: 97486, 1: 41564}) predicted binary labels Counter({0: 102696, 1: 36354})
Counts of  original  labels: {0:


Train : time 266.33 s Epoch 1
Generator Loss: 56.0338 Discriminator Loss: 6.7792
Train : time 264.11 s Epoch 2
Generator Loss: 9.7689 Discriminator Loss: 12.3131
Train : time 263.87 s Epoch 3
Generator Loss: 8.1489 Discriminator Loss: 10.4486
Train : time 261.28 s Epoch 4
Generator Loss: 6.0716 Discriminator Loss: 11.2788
Train : time 256.96 s Epoch 5
Generator Loss: 5.6057 Discriminator Loss: 10.5377
--- Running Evaluation for Epoch 5 lr =0.0001 wd 1e-06 ---
-----------mse_loss mean :  0.0837 std: 0.3804
Val: Accuracy: 0.9290  
 K: 1 K-SIGMA Threshold : ---thr 0.4641
Counts of : original binary labels Counter({0: 529457, 1: 220322}) predicted binary labels Counter({0: 561825, 1: 187954})
Counts of  original  labels: {0: 529457, 1: 218884, 2: 317, 3: 236, 4: 243, 5: 240, 6: 170, 7: 232}
Counts of misclassified original labels: {0: 7937, 1: 40041, 2: 58, 3: 96, 4: 39, 5: 2, 6: 68, 7: 1}
Test : Accuracy: 0.9357 Recall : 0.8171 FDR: 0.0422  F1-score: 0.8818  
 K: 3 K-SIGMA Threshold : --

KeyboardInterrupt: 

In [118]:
#lr=0.001, wd=1e-05 
AE_model = AE(input_dim=76)
train_eval(AE_model,train_dataloader,val_dataloader,test_dataloader,shuffle_files=True,num_epochs=20,eval_epoch=4,criterion_method="mse",learning_rates=[1e-3],weight_decays=[1e-5],k_range=[1])



Train : time 157.69 s Epoch 1
Train Loss: 0.3925
Train : time 153.40 s Epoch 2
Train Loss: 0.0358
Train : time 152.62 s Epoch 3
Train Loss: 0.0248
Train : time 153.83 s Epoch 4
Train Loss: 0.0219
--- Running Evaluation for Epoch 4 lr =0.001 wd 1e-05 ---
-----------mse_loss mean :  0.0232 std: 0.3594
Val: Accuracy: 0.9913  
 K: 1 K-SIGMA Threshold : ---thr 0.3826
Counts of : original binary labels Counter({0: 97486, 1: 41564}) predicted binary labels Counter({0: 103017, 1: 36033})
Counts of  original  labels: {0: 97486, 1: 41306, 2: 59, 3: 40, 4: 55, 5: 40, 6: 36, 7: 28}
Counts of misclassified original labels: {0: 93, 1: 5552, 2: 14, 3: 15, 4: 4, 5: 1, 6: 10, 7: 28}
Test : Accuracy: 0.9589 Recall : 0.8647 FDR: 0.0026  F1-score: 0.9263  
model AE is saved in best_center_models/AE_f1_0.9263_recall_0.8647_.pth
Train : time 152.49 s Epoch 5
Train Loss: 0.0216
Train : time 157.49 s Epoch 6
Train Loss: 0.0214
Train : time 160.24 s Epoch 7
Train Loss: 0.0209
Train : time 151.94 s Epoch 8
Tra

In [119]:
#==================  lr=0.0001, wd=0.0001 ==================

VAE_model = VAE(input_dim=76)
train_eval(VAE_model,train_dataloader,val_dataloader,test_dataloader,shuffle_files=True,num_epochs=20,eval_epoch=4,criterion_method="mse",learning_rates=[1e-4],weight_decays=[1e-4,],k_range=[1])



Train : time 195.29 s Epoch 1
Train Loss: 0.7271
Train : time 188.97 s Epoch 2
Train Loss: 0.1814
Train : time 196.54 s Epoch 3
Train Loss: 0.1734
Train : time 204.82 s Epoch 4
Train Loss: 0.1685
--- Running Evaluation for Epoch 4 lr =0.0001 wd 0.0001 ---
-----------mse_loss mean :  0.0537 std: 0.2896
Val: Accuracy: 0.8444  
 K: 1 K-SIGMA Threshold : ---thr 0.3433
Counts of : original binary labels Counter({0: 97486, 1: 41564}) predicted binary labels Counter({0: 102169, 1: 36881})
Counts of  original  labels: {0: 97486, 1: 41306, 2: 59, 3: 40, 4: 55, 5: 40, 6: 36, 7: 28}
Counts of misclassified original labels: {0: 711, 1: 5348, 2: 14, 3: 14, 4: 4, 5: 1, 6: 10, 7: 3}
Test : Accuracy: 0.9561 Recall : 0.8702 FDR: 0.0193  F1-score: 0.9222  
model VAE is saved in best_center_models/VAE_f1_0.9222_recall_0.8702_.pth
Train : time 200.85 s Epoch 5
Train Loss: 0.1655
Train : time 177.86 s Epoch 6
Train Loss: 0.1617
Train : time 162.78 s Epoch 7
Train Loss: 0.1580
Train : time 166.06 s Epoch 8

In [120]:
# ==================  lr=0.01, wd=0.0001 ==================

AAE_model = AdversarialAutoencoder()
train_eval(AAE_model,train_dataloader,val_dataloader,test_dataloader,shuffle_files=True,num_epochs=20,eval_epoch=4,criterion_method="mse",learning_rates=[1e-2],weight_decays=[1e-4])



Train : time 258.46 s Epoch 1
Generator Loss: 2.5820 Discriminator Loss: 23.0968
Train : time 263.93 s Epoch 2
Generator Loss: 1.0836 Discriminator Loss: 26.7439
Train : time 260.17 s Epoch 3
Generator Loss: 0.5757 Discriminator Loss: 32.0621
Train : time 259.62 s Epoch 4
Generator Loss: 1.3244 Discriminator Loss: 27.5940
--- Running Evaluation for Epoch 4 lr =0.01 wd 0.0001 ---
-----------mse_loss mean :  0.0067 std: 0.0696
Val: Accuracy: 0.8607  
 K: 1 K-SIGMA Threshold : ---thr 0.07626
Counts of : original binary labels Counter({0: 97486, 1: 41564}) predicted binary labels Counter({0: 102696, 1: 36354})
Counts of  original  labels: {0: 97486, 1: 41306, 2: 59, 3: 40, 4: 55, 5: 40, 6: 36, 7: 28}
Counts of misclassified original labels: {0: 94, 1: 5260, 2: 14, 3: 15, 4: 4, 5: 1, 6: 10}
Test : Accuracy: 0.9612 Recall : 0.8724 FDR: 0.0026  F1-score: 0.9307  
model AdversarialAutoencoder is saved in best_center_models/AdversarialAutoencoder_f1_0.9307_recall_0.8724_.pth
 K: 3 K-SIGMA Thre

#### Evaluate pre-trained autoencoders  on the compromised-ied and compromised scada scenarios 

No exact labeling for the comp ied scenario results in low performance 

In [86]:
Trained_AE_model=AE(input_dim=76)
Trained_AE_model.load_state_dict(torch.load("./best_models/AE_f1_0.88_recall_0.81_.pth"))
Trained_VAE_model=VAE(input_dim=76)
Trained_VAE_model.load_state_dict(torch.load("./best_models/VAE_f1_0.88_recall_0.82_.pth"))
Trained_AAE_model=AdversarialAutoencoder()
Trained_AAE_model.load_state_dict(torch.load("./best_models/AdversarialAutoencoder_f1_0.88_recall_0.82_.pth"))

<All keys matched successfully>

In [None]:
dataset_directory = "./ModbusDataset/" 

modbus = ModbusDataset(dataset_directory,"ready")
modbus.summary_print()


 The CIC Modbus Dataset contains network (pcap) captures and attack logs from a simulated substation network.
                The dataset is categorized into two groups: an attack dataset and a benign dataset
                The attack dataset includes network traffic captures that simulate various types of Modbus protocol attacks in a substation environment.
                The attacks are reconnaissance, query flooding, loading payloads, delay response, modify length parameters, false data injection, stacking Modbus frames, brute force write and baseline replay.
                These attacks are based of some techniques in the MITRE ICS ATT&CK framework.
                On the other hand, the benign dataset consists of normal network traffic captures representing legitimate Modbus communication within the substation network.
                The purpose of this dataset is to facilitate research, analysis, and development of intrusion detection systems, anomaly detection algorithms and

In [None]:
for scenario in {"compromised-ied","external","compromised-scada"}:
    if scenario=="compromised-scada":
        print("scenario :",scenario)
        test_files= [col for col in modbus.dataset["attack_dataset_dir"][scenario] if col.find("ied1b")!=-1]
        ### missed attack logs for the day 21 for ied1b which can reduce the accuracy.
        test_files.remove(dataset_directory+"attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-6-labeled.csv")    
    elif scenario=="compromised-ied":
        print("scenario :",scenario)
        test_files= [col for col in modbus.dataset["attack_dataset_dir"][scenario] if col.find("trust-scada-hmi")!=-1]
    else:
        print("scenario :",scenario)
        test_files= [col for col in modbus.dataset["attack_dataset_dir"][scenario] if col.find("network-wide")!=-1]        

    print("----------- benign valid files:",len(val_files),val_files)
    print(f"----------{scenario} attack  test files : ",len(test_files),test_files)
    val_dataloader=DataLoader(ModbusFlowStream(
                shuffle=False,
                chunk_size=1,
                batch_size=64,
                csv_files=val_files,
                scalers=loaded_scalers['network-wide']['min_max_scalers'],
            ),batch_size=1,shuffle=False)
    test_dataloader=DataLoader(ModbusFlowStream(
                shuffle=False,
                chunk_size=1,
                batch_size=64,
                csv_files=test_files,
                scalers=loaded_scalers['network-wide']['min_max_scalers'],
            ),batch_size=1,shuffle=False)
    for trained_model in {Trained_AE_model,Trained_VAE_model,Trained_AAE_model}:
        print("*"*10,trained_model._get_name(),10*"*")
        train_eval(trained_model,None,val_dataloader,test_dataloader,shuffle_files=False,num_epochs=1,eval_epoch=1,criterion_method="mse",train_model=False,learning_rates=[0],weight_decays=[0])
        

scenario : compromised-scada
----------- benign valid files: 4 ['./ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-28-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-22-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-25-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-17-labeled.csv']
----------compromised-scada attack  test files :  7 ['./ModbusDataset/attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-3-labeled.csv', './ModbusDataset/attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-4-labeled.csv', './ModbusDataset/attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-1-labeled.csv', './ModbusDataset/attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-7-labeled.csv', './ModbusDataset/attack/compromised-sca

In [52]:

## TEST RESULTS WITHOUT IED1B TRAFFIC
modbus = ModbusDataset(dataset_directory,"ready")

for scenario in {"external","compromised-scada"}:

    print(scenario)
    test_files=modbus.dataset["attack_dataset_dir"][scenario]
    nodes_to_exclude = [
    "ied1b",
    "network-wide",
    "substation-wide-capture",
    "scada-hmi-network-capture",
    "trust-scada-hmi",
    "central-agent",
    "ied4c"
    ]
    exclude = [col for col in test_files if any(node in col for node in nodes_to_exclude)]
    [test_files.remove(c) for c in exclude if c in test_files]
    print("exclude",len(exclude),exclude)
    print("filtered test files without ied1b",len(test_files),test_files)
    for test_file in test_files: 
        print(test_file)
        val_dataloader=DataLoader(ModbusFlowStream(
                    shuffle=False,
                    chunk_size=1,
                    batch_size=64,
                    csv_files=val_files,
                    scalers=loaded_scalers['network-wide']['min_max_scalers'],
                ),batch_size=1,shuffle=False)
        test_dataloader=DataLoader(ModbusFlowStream(
                    shuffle=False,
                    chunk_size=1,
                    batch_size=64,
                    csv_files=[test_file],
                    scalers=loaded_scalers['network-wide']['min_max_scalers'],
                ),batch_size=1,shuffle=False)
        for trained_model in {Trained_AE_model,Trained_VAE_model,Trained_AAE_model}:
            print("*"*10,trained_model._get_name(),10*"*")
            train_eval(trained_model,None,val_dataloader,test_dataloader,shuffle_files=False,num_epochs=1,eval_epoch=1,criterion_method="mse",train_model=False,learning_rates=[0],weight_decays=[0])
            

compromised-scada
exclude 51 ['./ModbusDataset/attack/compromised-scada/ied4c/ied4c-network-captures/ready/vethe685ac9-0-labeled.csv', './ModbusDataset/attack/compromised-scada/ied4c/ied4c-network-captures/ready/vethe685ac9-5-labeled.csv', './ModbusDataset/attack/compromised-scada/ied4c/ied4c-network-captures/ready/vethe685ac9-1-labeled.csv', './ModbusDataset/attack/compromised-scada/ied4c/ied4c-network-captures/ready/vethe685ac9-2-labeled.csv', './ModbusDataset/attack/compromised-scada/ied4c/ied4c-network-captures/ready/vethe685ac9-4-labeled.csv', './ModbusDataset/attack/compromised-scada/ied4c/ied4c-network-captures/ready/vethe685ac9-3-labeled.csv', './ModbusDataset/attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-6-labeled.csv', './ModbusDataset/attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-3-labeled.csv', './ModbusDataset/attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-4-labeled.csv', './ModbusDataset/attack/c

### Part b: Federated learning 
####  non iid distribution of dataset (ip\node based)

In [7]:
# ==============================================================================
# 1. SETUP: INSTALL LIBRARIES AND IMPORT DEPENDENCIES
# ==============================================================================
# In a Kaggle notebook, run this cell first to install the necessary libraries.
# !pip install -q flwr[simulation] torch torchvision pandas scikit-learn matplotlib seaborn


In [7]:

from collections import OrderedDict
from typing import Dict, List, Tuple, Optional , Union
import os 
import flwr as fl
import ray
from flwr.common import FitRes, Scalar,Context, ndarrays_to_parameters, parameters_to_ndarrays
from flwr.server.client_proxy import ClientProxy

import random

# Suppress warning messages for a cleaner output
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
# Set a seed for reproducibility
SEED = 20
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

#global device
dataset_directory = "./ModbusDataset/" 
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Training on {DEVICE}")
modbus = ModbusDataset(dataset_directory,"ready")


Training on cuda:0


In [8]:

# ==============================================================================
#  FEDERATED LEARNING CLIENT: FlowerClient
# ==============================================================================
class FlowerClient(fl.client.NumPyClient):
    """Flower client for training."""
    def __init__(self, cid, model, trainloader,valloader):
        self.cid = cid
        self.model = model
        self.train_dataloader = trainloader
        self.val_dataloader = valloader
    def get_parameters(self, config):
        return [val.cpu().numpy() for _, val in self.model.state_dict().items()]

    def set_parameters(self, parameters):
        params_dict = zip(self.model.state_dict().keys(), parameters)
        state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
        self.model.load_state_dict(state_dict, strict=True)

    def fit(self, parameters, config):
        self.set_parameters(parameters)
        model =self.model
        lr = cfg.LEARNING_RATE
        wd= cfg.WEIGHT_DECAY
        
        criterion = nn.MSELoss(reduction='sum').to(DEVICE)
        if model._get_name()=="AdversarialAutoencoder":
            adversarial_criterion= nn.BCELoss(reduction="sum")
            optimizer_D = optim.Adam(model.discriminator.parameters(), lr=lr, weight_decay=wd)
            optimizer_G =  optim.Adam(list(model.encoder.parameters()) + list(model.decoder.parameters()), lr=lr, weight_decay=wd)
        else:
            AE_optimizer = optim.Adam(model.parameters(), lr=lr,weight_decay=wd)

        if cfg.STRATEGY == "FED_PROX":
            global_params_dict = {
                k: torch.tensor(v, device=DEVICE) 
                for k, v in zip(self.model.state_dict().keys(), parameters)
            }

        for epoch in range(cfg.LOCAL_EPOCHS):
            time_1 = time.time()
            model.train()
            train_loss = 0
            ## for AAE
            Discriminator_loss = 0
            for sequences, _ in self.train_dataloader:
                sequences=sequences.squeeze().to(DEVICE)
                if model._get_name()=="AdversarialAutoencoder":
                    target_ones= torch.ones(sequences.size(0), 1,device=DEVICE,dtype=torch.float)
                    target_zeros= torch.zeros(sequences.size(0), 1,device=DEVICE,dtype=torch.float)
                    random_latent = torch.randn(sequences.size(0), 2, device=DEVICE)
                    optimizer_G.zero_grad()
                    fake_z,decoded_seq = model(sequences)
                    G_loss = 0.001*adversarial_criterion(model.discriminator(fake_z),target_ones ) + 0.999*criterion(decoded_seq, sequences)
                    if cfg.STRATEGY == "FED_PROX":
                        proximal_term_G = 0.0
                        # Proximal term for ENCODER
                        for name, local_param in model.encoder.named_parameters():
                            global_param = global_params_dict['encoder.' + name]
                            proximal_term_G += torch.pow((local_param - global_param).norm(2), 2)
                        # Proximal term for DECODER
                        for name, local_param in model.decoder.named_parameters():
                            global_param = global_params_dict['decoder.' + name]
                            proximal_term_G += torch.pow((local_param - global_param).norm(2), 2)
                        
                        G_loss += (cfg.PROXIMAL_MU / 2) * proximal_term_G
                    G_loss.backward()
                    optimizer_G.step()
                    # 2) discriminator loss
                    optimizer_D.zero_grad()
                    real_loss = adversarial_criterion(model.discriminator(random_latent), target_ones)
                    fake_loss = adversarial_criterion(model.discriminator(fake_z.detach()),  target_zeros)
                    D_loss =  0.5*(real_loss + fake_loss)
                    if cfg.STRATEGY == "FED_PROX":
                        proximal_term_D = 0.0
                        # Proximal term for DISCRIMINATOR
                        for name, local_param in model.discriminator.named_parameters():
                            global_param = global_params_dict['discriminator.' + name]
                            proximal_term_D += torch.pow((local_param - global_param).norm(2), 2)
                        D_loss += (cfg.PROXIMAL_MU / 2) * proximal_term_D
            
                    D_loss.backward()
                    optimizer_D.step()
                    train_loss+=G_loss.item()
                    Discriminator_loss+=D_loss.item()   
                else:
                    AE_optimizer.zero_grad()
                    if model._get_name()=="AE":
                        recon = model(sequences)
                        loss = criterion(recon, sequences) / sequences.size(0)
                    elif model._get_name()=="VAE" :
                        recon, mu, logvar = model(sequences)
                        loss = vae_loss_function(recon, sequences, mu, logvar) /sequences.size(0)
                    
                    if cfg.STRATEGY == "FED_PROX":
                        proximal_term = 0.0
                        for name, local_param in model.encoder.named_parameters():
                            global_param = global_params_dict['encoder.' + name]
                            proximal_term += torch.pow((local_param - global_param).norm(2), 2)
                        # Proximal term for DECODER
                        for name, local_param in model.decoder.named_parameters():
                            global_param = global_params_dict['decoder.' + name]
                            proximal_term += torch.pow((local_param - global_param).norm(2), 2)
                        loss+= (cfg.PROXIMAL_MU / 2) *proximal_term
                    loss.backward()
                    AE_optimizer.step()
                    train_loss += loss.item()
            print(f"Train : time {(time.time()-time_1):.2f} s",
            f"Epoch {epoch+1}")
            num_samples=len(self.train_dataloader)
            if model._get_name()=="AdversarialAutoencoder":
                print(f"Generator Loss: {train_loss / num_samples:.4f}",
                    f"Discriminator Loss: {Discriminator_loss / num_samples:.4f}")
            else:
                print(f"Train Loss: {train_loss / num_samples:.4f}")
        local_threshold,len_val_samples=self._calculate_threshold()
        metrics = {"threshold": local_threshold,"len": len_val_samples}
        return self.get_parameters(config={}), num_samples, metrics

    def _calculate_threshold(self):
        model = self.model
        model.to(DEVICE)
        model.eval()
        eval_criterion = nn.MSELoss(reduction='none').to(DEVICE)

        # Evaluate part
        all_val_losses = []
        all_val_labels = []
        with torch.no_grad():
            for sequences, labels in self.val_dataloader:
                sequences = sequences.squeeze().to(DEVICE) 
                if model._get_name()=="AE":
                    recon = model(sequences)
                elif model._get_name()=="VAE" :
                    recon, _, _ = model(sequences)
                elif model._get_name()=="AdversarialAutoencoder":
                    _,recon= model(sequences)
                val_loss = eval_criterion(recon, sequences)
                if val_loss.dim() > 1:
                    val_loss = val_loss
                else:
                    val_loss = val_loss.unsqueeze(dim=0)
                    labels = labels.unsqueeze(dim=0)
                val_loss = val_loss.sum(dim=1)
                all_val_losses.extend(val_loss.cpu().numpy())
                all_val_labels.extend(labels.flatten().cpu().numpy())     
        threshold_1,std_mse = compute_threshold(all_val_losses,k=1)
        all_val_losses = np.array(all_val_losses).squeeze()  
        all_val_labels = np.array(all_val_labels).squeeze()  
        # If intrusion score > threshold, predict 1 (intrusion), else 0 (benign)
        # For FDR, get True Positives (TP) and False Positives (FP)
        
        predictions = (all_val_losses > threshold_1).astype(int)
        accuracy = accuracy_score(all_val_labels, predictions)
        print(f"Val: Accuracy: {accuracy:.4f}  ")
        return threshold_1,len(all_val_losses)
    def evaluate(self, parameters, config):
        #focuses on server-side evaluation, so we can keep this simple
        return 0.0, 0, {}


In [123]:
class FedAnomalyStrategy(fl.server.strategy.FedAvg):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.aggregated_threshold: Optional[float] = None

    def aggregate_fit(
        self,
        server_round: int,
        results: List[Tuple[ClientProxy, FitRes]],
        failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]],
    ) -> Tuple[Optional[fl.common.Parameters], Dict[str, Scalar]]:
        
        aggregated_parameters, _ = super().aggregate_fit(server_round, results, failures)
        candidates = [
            (res.metrics["threshold"],res.metrics["len"]) for _, res in results if "threshold" in res.metrics
        ]
        if candidates:
            threholds=np.array([pair[0] for pair in candidates],dtype=float)
            weights=np.array([pair[1] for pair in candidates],dtype=int)

            self.aggregated_threshold = np.average(threholds,weights=weights)
            print(f"Round {server_round}: Aggregated threshold = {self.aggregated_threshold:.4f}")
        else:
            print("Warning: No thresholds received from clients.")
        return aggregated_parameters, {}

In [124]:

# ==============================================================================
#  SERVER-SIDE LOGIC AND SIMULATION START
# ==============================================================================

def client_function(context:Context ) -> FlowerClient:
    client_id = int(context.node_config["partition-id"])
    trainloader = load_data_from_id(client_id,"client",chunk_size=1)
    valloader = load_data_from_id(client_id,"server",chunk_size=1)
    model = get_model().to(DEVICE)
    return FlowerClient(client_id, model, trainloader,valloader).to_client()

def make_client_fn_with_cache(chunk_size=10000):
    print("...")
    dataloader_cache: Dict[str, DataLoader] = {}
    def client_fn(context:Context ) -> FlowerClient:
        client_id = int(context.node_config["partition-id"])
        if client_id not in dataloader_cache:
            # If not, create it once and store it in the cache
            print(f"Round 1: Loading and caching data for client {client_id}...")
            dataloader_cache[client_id] = load_data_from_id(client_id,"client",chunk_size=chunk_size)
        else:
            print(f"Reusing cached dataloader for client {client_id}...")
        trainloader = dataloader_cache[client_id]
        model = get_model().to(DEVICE)
        return FlowerClient(client_id, model, trainloader).to_client()
    return client_fn


def get_evaluate_fn(model, test_dataloader, strategy: FedAnomalyStrategy):
    """Return an evaluation function for server-side evaluation with caching """
    eval_criterion = nn.MSELoss(reduction='none').to(DEVICE)
    best_f1=0
    best_recall=0
    def evaluate(
        server_round: int,
        parameters: fl.common.NDArrays,
        config: Dict[str, fl.common.Scalar],
        train_model=True
    ) -> Optional[Tuple[float, Dict[str, fl.common.Scalar]]]:
        nonlocal best_f1,best_recall
        params_dict = zip(model.state_dict().keys(), parameters)
        state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
        model.load_state_dict(state_dict, strict=True)
        model.to(DEVICE)
        model.eval()
        all_test_losses = []
        all_test_labels = []
        temp_best_recall =best_recall
        temp_best_f1 =best_f1
        with torch.no_grad():
            for sequences, labels in test_dataloader:
                sequences = sequences.squeeze().to(DEVICE)
                labels = labels.squeeze().to(DEVICE)
                if model._get_name()=="AE":
                    recon = model(sequences)
                elif model._get_name()=="VAE" :
                    recon, mu, logvar = model(sequences)
                elif model._get_name()=="AdversarialAutoencoder":
                    _,recon= model(sequences)

                intrusion_scores = eval_criterion(recon, sequences)
                if intrusion_scores.dim() > 1:
                    intrusion_scores = intrusion_scores
                else:
                    intrusion_scores = intrusion_scores.unsqueeze(dim=0)
                    labels = labels.unsqueeze(dim=0)
                if intrusion_scores.dim()==3:
                    ##GRU : mean of window
                    intrusion_scores = intrusion_scores.mean(dim=1)
                intrusion_scores = intrusion_scores.sum(dim=1)
                all_test_losses.extend(intrusion_scores.cpu().numpy())
                all_test_labels.extend(labels.cpu().numpy())

        all_test_losses = np.array(all_test_losses)
        all_test_labels = np.array(all_test_labels)
        if strategy.aggregated_threshold is None:
            # Threshold not available yet (e.g., round 0)
            threshold=0
        else:
            threshold = strategy.aggregated_threshold
        test_result = {}

        predictions = (all_test_losses > threshold).astype(int)
        binary_test_labels = (all_test_labels != 0).astype(int)

        # Find the indices where the prediction was incorrect
        misclassified_indices = np.where(binary_test_labels != predictions)[0]

        # Get the original labels for those misclassified instances
        misclassified_original_labels = all_test_labels[misclassified_indices]

        # To get a summary count of which labels were misclassified
        print("Counts of : original binary labels",Counter(binary_test_labels),"predicted binary labels",Counter(predictions))
        print(f"Counts of  original  labels: {dict(sorted(Counter(all_test_labels).items()))}")
        print(f"Counts of misclassified original labels: {dict(sorted(Counter(misclassified_original_labels).items()))}")
        accuracy = accuracy_score(binary_test_labels, predictions)
        f1 = f1_score(binary_test_labels, predictions, zero_division=0)
        recall = recall_score(binary_test_labels, predictions,zero_division=0)
        _, fp, _, tp = confusion_matrix(binary_test_labels, predictions, labels=[0, 1]).ravel()
        # FDR = FP / (FP + TP) 
        if (fp + tp) == 0:
            fdr = 0.0 
        else:
            fdr = fp / (fp + tp)
        test_result[0] = f"threshold={threshold:.4f} ,Test : Accuracy: {accuracy:.4f} Recall : {recall:.4f} FDR: {fdr:.4f}  F1-score: {f1:.4f} "
        print(test_result)
        !mkdir fed_best_models -p
        if f1>best_f1 :
            best_f1=f1
        if recall>best_recall:
            best_recall=recall
        if ((best_recall>temp_best_recall or best_f1 > temp_best_f1) and not(strategy.aggregated_threshold is None)):
            if train_model:
                save_path ="fed_best_models/"+cfg.STRATEGY+"_"+model._get_name()+"_f1_"+f"{best_f1:.2f}" +"_recall_"+f"{best_recall:.2f}" +"_.pth"
                torch.save(model.state_dict(),save_path)
                print("model",model._get_name(),"is saved in" ,save_path )
        return np.sum(all_test_losses)/len(all_test_losses),test_result

    return evaluate


In [125]:

def get_initial_parameters(model_name: str):
    """
    Initializes the model weights using Xavier uniform distribution
    and returns them as a Flower Parameters object.
    """
    
    temp_model = get_model()
    for param in temp_model.parameters():
        if param.dim() > 1:
            nn.init.xavier_uniform_(param)
            
    ndarrays = [val.cpu().numpy() for _, val in temp_model.state_dict().items()]
    return ndarrays_to_parameters(ndarrays)


def load_data_from_id(id: int, node = "client" ,chunk_size=10000):
    """Loads the data for a specific training client."""
    if node == "client":
        file_list = TRAIN_CLIENT_DATA_MAPPING[id]
        shuffle=cfg.SHUFFLE_FILES
    else: # server
        file_list = SERVER_EVALUATION_DATA_MAPPING[id]
        shuffle = False
    ## means load all chunks of data in memory at once 
    if chunk_size==10000:
        train_loader=DataLoader(ModbusFlowStream(
                shuffle=False,
                chunk_size=chunk_size,
                batch_size=1 ,
                csv_files=file_list,
                scalers=loaded_scalers['network-wide']['min_max_scalers'],
            ),batch_size=64,shuffle=shuffle)
    else :
        train_loader=DataLoader(ModbusFlowStream(
                shuffle=shuffle,
                chunk_size=chunk_size,
                batch_size=cfg.BATCH_SIZE ,
                csv_files=file_list,
                scalers=loaded_scalers['network-wide']['min_max_scalers'],
            ),batch_size=1,shuffle=False)
    return train_loader

def get_model():
    """Returns the model specified in the config."""
    if cfg.MODEL_NAME == "VAE":
        return VAE(input_dim=cfg.INPUT_DIM)
    elif cfg.MODEL_NAME == "AE":
        return AE(input_dim=cfg.INPUT_DIM)
    elif cfg.MODEL_NAME =="AAE":
        return AdversarialAutoencoder()#76
    else:
        raise ValueError(f"Unknown model name: {cfg.MODEL_NAME}. Choose 'AE' or 'VAE' or 'AAE'.")

def set_server_strategy():
    if cfg.STRATEGY == "FED_PROX":
        print(f"Using FedProx strategy with {cfg.MODEL_NAME} model.")
    else:
        print(f"Using FedAvg strategy with {cfg.MODEL_NAME} model.")
    strategy = FedAnomalyStrategy(
        fraction_fit=1.0,
        fraction_evaluate=0.0,
        min_fit_clients=cfg.NUM_TRAIN_CLIENTS,
        min_available_clients=cfg.NUM_TRAIN_CLIENTS,
        min_evaluate_clients=0,
        initial_parameters=get_initial_parameters(cfg.MODEL_NAME)
    )
    model = get_model().to(DEVICE)
    testloader = load_data_from_id(-1,"server",chunk_size=1)
    evaluate_function = get_evaluate_fn(model, testloader, strategy)
    strategy.evaluate_fn=evaluate_function
    return strategy


#### test on compromised scada attack

In [134]:

# # ==============================================================================
# #  DATA Distribution
# # ==============================================================================


SEED=20

torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

network_train_files=[col for col in modbus.dataset["benign_dataset_dir"] if col.find("network-wide")!=-1][:]
test_files= [col for col in modbus.dataset["attack_dataset_dir"]["compromised-scada"] if col.find("ied1b")!=-1]
### missed attack logs files for the day 21 for ied1b which can reduce the accuracy.
test_files.remove(dataset_directory+"attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-6-labeled.csv")

random.shuffle(network_train_files)
random.shuffle(test_files)

num_splits = 4
train_files = list(np.array_split(network_train_files, num_splits))

SERVER_EVALUATION_DATA_MAPPING = [list(traffic_file[-1:]) for traffic_file in train_files ]
SERVER_EVALUATION_DATA_MAPPING.extend([test_files[:2]])
TRAIN_CLIENT_DATA_MAPPING = [list(traffic_file[:-1]) for traffic_file in train_files ]

for i in range(num_splits):
    print("node_",i+1,"train:",len(TRAIN_CLIENT_DATA_MAPPING[i]),TRAIN_CLIENT_DATA_MAPPING[i])
    print("node_",i+1,"val:",len(SERVER_EVALUATION_DATA_MAPPING[i]),SERVER_EVALUATION_DATA_MAPPING[i])

print("test",len(SERVER_EVALUATION_DATA_MAPPING[-1]),SERVER_EVALUATION_DATA_MAPPING[-1])


node_ 1 train: 4 ['./ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-27-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-29-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-20-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-19-labeled.csv']
node_ 1 val: 1 ['./ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-15-labeled.csv']
node_ 2 train: 4 ['./ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-14-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-28-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-31-labeled.csv', './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-16-labeled.

In [136]:

# ==============================================================================
#  CONFIGURATION: TWEAK  FEDERATED LEARNING EXPERIMENT
# ==============================================================================
class Config:
    """Global configuration class for the federated learning experiment."""
    # --- FL Parameters ---
    NUM_TRAIN_CLIENTS = num_splits
    NUM_ROUNDS = 10
    LOCAL_EPOCHS = 2
    BATCH_SIZE = 64
    LEARNING_RATE = 1e-3
    WEIGHT_DECAY = 1e-5
    
    # --- Strategy Selection ---
    # Choose from "FED_AVG", "FED_PROX"
    STRATEGY = "FED_AVG" 
    PROXIMAL_MU = 1e-2 # Proximal term for FedProx
    # --- Model Selection ---
    # Choose from "AE" (Autoencoder) or "VAE" (Variational Autoencoder) or "AdverserialAutoencoder"
    MODEL_NAME = "AE"
    INPUT_DIM = 76
    # --- Anomaly Detection ---
    SHUFFLE_FILES=  True
# Instantiate the configuration
cfg = Config()

loaded_scalers = load_scalers("fitted_scalers")




Successfully loaded scalers for 'network-wide'


In [16]:
# ## cache all client data in memory
# client_fn=make_client_fn_with_cache()

# for i in range(3):
#     context= Context(0,i,{},0,0)
#     context.node_config["partition-id"]=i
#     client_fn(context)


In [137]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning, message=".*DEPRECATED FEATURE: flwr.simulation.start_simulation.*")
warnings.filterwarnings("ignore", category=DeprecationWarning)

strategy=set_server_strategy()


history = fl.simulation.start_simulation(
    client_fn=client_function,
    num_clients=cfg.NUM_TRAIN_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=cfg.NUM_ROUNDS),
    strategy=strategy,
    client_resources={"num_cpus": 1, "num_gpus": 1} if DEVICE.type == "cuda" else {"num_cpus": 1},
)
print("Federated learning simulation finished.")

Using FedAvg strategy with AE model.


	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=10, no round_timeout
2025-07-28 00:06:10,283	INFO worker.py:1771 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initialized with resources: {'accelerator_type:G': 1.0, 'node:__internal_head__': 1.0, 'CPU': 2.0, 'memory': 2785542144.0, 'object_store_memory': 1392771072.0, 'GPU': 1.0, 'node:172.24.78.91': 1.0}
[92mINFO [0m:      Optimize your simulation with Flower VCE: https://flower.ai/docs/framework/how-to-run-simulations.html
[92mINFO [0m:      Flower VCE: Resources for each Virtual Client: {'num_cpus':

Counts of : original binary labels Counter({0: 97486, 1: 41564}) predicted binary labels Counter({1: 139050})
Counts of  original  labels: {0: 97486, 1: 41306, 2: 59, 3: 40, 4: 55, 5: 40, 6: 36, 7: 28}
Counts of misclassified original labels: {0: 97486}
{0: 'threshold=0.0000 ,Test : Accuracy: 0.2989 Recall : 1.0000 FDR: 0.7011  F1-score: 0.4603 '}


[92mINFO [0m:      initial parameters (loss, other metrics): 17.23868932038835, {0: 'threshold=0.0000 ,Test : Accuracy: 0.2989 Recall : 1.0000 FDR: 0.7011  F1-score: 0.4603 '}
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 1]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


[36m(ClientAppActor pid=313681)[0m Train : time 42.54 s Epoch 1
[36m(ClientAppActor pid=313681)[0m Train Loss: 0.7803
[36m(ClientAppActor pid=313681)[0m Train : time 30.56 s Epoch 2
[36m(ClientAppActor pid=313681)[0m Train Loss: 0.1295
[36m(ClientAppActor pid=313681)[0m -----------mse_loss mean :  0.0417 std: 0.4311
[36m(ClientAppActor pid=313681)[0m Val: Accuracy: 0.9926  
[36m(ClientAppActor pid=313681)[0m Train : time 46.75 s Epoch 1
[36m(ClientAppActor pid=313681)[0m Train Loss: 0.6489
[36m(ClientAppActor pid=313681)[0m Train : time 46.29 s Epoch 2
[36m(ClientAppActor pid=313681)[0m Train Loss: 0.0286
[36m(ClientAppActor pid=313681)[0m -----------mse_loss mean :  0.0161 std: 0.2168
[36m(ClientAppActor pid=313681)[0m Val: Accuracy: 0.9944  
[36m(ClientAppActor pid=313681)[0m Train : time 49.80 s Epoch 1
[36m(ClientAppActor pid=313681)[0m Train Loss: 0.2919


: 

In [None]:

# Instantiate the configuration
cfg.STRATEGY="FED_PROX"
strategy=set_server_strategy()

# --- Start the Simulation ---
print("Starting federated learning simulation...")
history = fl.simulation.start_simulation(
    client_fn=client_function,
    num_clients=cfg.NUM_TRAIN_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=cfg.NUM_ROUNDS),
    strategy=strategy,
    client_resources={"num_cpus": 1, "num_gpus": 1} if DEVICE.type == "cuda" else {"num_cpus": 1},
)
print("Federated learning simulation finished.")

	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=5, no round_timeout


Using FedProx strategy with AE model.
Starting federated learning simulation...


2025-07-26 23:34:59,298	INFO worker.py:1771 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initialized with resources: {'accelerator_type:G': 1.0, 'node:__internal_head__': 1.0, 'CPU': 2.0, 'object_store_memory': 1709929267.0, 'memory': 3419858535.0, 'GPU': 1.0, 'node:172.24.78.91': 1.0}
[92mINFO [0m:      Optimize your simulation with Flower VCE: https://flower.ai/docs/framework/how-to-run-simulations.html
[92mINFO [0m:      Flower VCE: Resources for each Virtual Client: {'num_cpus': 1, 'num_gpus': 1}
[92mINFO [0m:      Flower VCE: Creating VirtualClientEngineActorPool with 1 actors
[92mINFO [0m:      [INIT]
[92mINFO [0m:      Using initial global parameters provided by strategy
[92mINFO [0m:      Starting evaluation of initial global parameters


Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({1: 221329})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 156058}
{0: 'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  F1-score: 0.4555 '}


[92mINFO [0m:      initial parameters (loss, other metrics): 16.93721112009723, {0: 'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  F1-score: 0.4555 '}
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 1]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


[36m(ClientAppActor pid=21780)[0m Train : time 73.86 s Epoch 1
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.4775
[36m(ClientAppActor pid=21780)[0m Train : time 75.82 s Epoch 2
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.1543
[36m(ClientAppActor pid=21780)[0m Train : time 72.74 s Epoch 3
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.1531
[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean :  0.0408 std: 0.3339
[36m(ClientAppActor pid=21780)[0m Val: Accuracy: 0.9929  
[36m(ClientAppActor pid=21780)[0m Train : time 77.81 s Epoch 1
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.4767
[36m(ClientAppActor pid=21780)[0m Train : time 72.74 s Epoch 2
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.1574
[36m(ClientAppActor pid=21780)[0m Train : time 74.83 s Epoch 3
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.1562
[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean :  0.0415 std: 0.3669
[36m(ClientAppActor pid=21780)[0m Val: Accuracy: 0.9

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean :  0.0344 std: 0.3227
[36m(ClientAppActor pid=21780)[0m Val: Accuracy: 0.9932  
Round 1: Aggregated threshold = 0.3713
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 134173, 1: 87156})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 22038, 1: 153}
{0: 'threshold=0.3713 ,Test : Accuracy: 0.8997 Recall : 0.9977 FDR: 0.2529  F1-score: 0.8544 '}


[92mINFO [0m:      fit progress: (1, 0.6500179032119605, {0: 'threshold=0.3713 ,Test : Accuracy: 0.8997 Recall : 0.9977 FDR: 0.2529  F1-score: 0.8544 '}, 906.6179958290004)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 2]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


model AE is saved in fed_best_models/FED_PROX_AE_f1_0.85_recall_1.00_.pth
[36m(ClientAppActor pid=21780)[0m Train : time 73.12 s Epoch 1
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0327
[36m(ClientAppActor pid=21780)[0m Train : time 74.16 s Epoch 2
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0315
[36m(ClientAppActor pid=21780)[0m Train : time 70.83 s Epoch 3
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0314
[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean :  0.0246 std: 0.2598
[36m(ClientAppActor pid=21780)[0m Val: Accuracy: 0.9928  
[36m(ClientAppActor pid=21780)[0m Train : time 54.80 s Epoch 1
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0331
[36m(ClientAppActor pid=21780)[0m Train : time 56.85 s Epoch 2
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0314
[36m(ClientAppActor pid=21780)[0m Train : time 54.40 s Epoch 3
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0314
[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean : 

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


Round 2: Aggregated threshold = 0.2641
[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean :  0.0204 std: 0.2236
[36m(ClientAppActor pid=21780)[0m Val: Accuracy: 0.9953  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 155269, 1: 66060})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 954, 1: 165}
{0: 'threshold=0.2641 ,Test : Accuracy: 0.9949 Recall : 0.9975 FDR: 0.0144  F1-score: 0.9915 '}


[92mINFO [0m:      fit progress: (2, 0.4974349201302134, {0: 'threshold=0.2641 ,Test : Accuracy: 0.9949 Recall : 0.9975 FDR: 0.0144  F1-score: 0.9915 '}, 1794.9931981460004)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 3]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


model AE is saved in fed_best_models/FED_PROX_AE_f1_0.99_recall_1.00_.pth
[36m(ClientAppActor pid=21780)[0m Train : time 72.01 s Epoch 1
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0205
[36m(ClientAppActor pid=21780)[0m Train : time 75.21 s Epoch 2
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0202
[36m(ClientAppActor pid=21780)[0m Train : time 77.52 s Epoch 3
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0201
[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean :  0.0175 std: 0.1873
[36m(ClientAppActor pid=21780)[0m Val: Accuracy: 0.9928  
[36m(ClientAppActor pid=21780)[0m Train : time 58.85 s Epoch 1
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0201
[36m(ClientAppActor pid=21780)[0m Train : time 58.96 s Epoch 2
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0196
[36m(ClientAppActor pid=21780)[0m Train : time 58.11 s Epoch 3
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0195
[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean : 

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


Round 3: Aggregated threshold = 0.1922
[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean :  0.0152 std: 0.1587
[36m(ClientAppActor pid=21780)[0m Val: Accuracy: 0.9953  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 155264, 1: 66065})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 955, 1: 161}
{0: 'threshold=0.1922 ,Test : Accuracy: 0.9950 Recall : 0.9975 FDR: 0.0145  F1-score: 0.9915 '}


[92mINFO [0m:      fit progress: (3, 0.6186465482381432, {0: 'threshold=0.1922 ,Test : Accuracy: 0.9950 Recall : 0.9975 FDR: 0.0145  F1-score: 0.9915 '}, 2707.8700235740007)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 4]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


model AE is saved in fed_best_models/FED_PROX_AE_f1_0.99_recall_1.00_.pth
[36m(ClientAppActor pid=21780)[0m Train : time 63.02 s Epoch 1
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0146
[36m(ClientAppActor pid=21780)[0m Train : time 64.00 s Epoch 2
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0144
[36m(ClientAppActor pid=21780)[0m Train : time 64.27 s Epoch 3
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0144
[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean :  0.0106 std: 0.1358
[36m(ClientAppActor pid=21780)[0m Val: Accuracy: 0.9953  
[36m(ClientAppActor pid=21780)[0m Train : time 75.33 s Epoch 1
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0155
[36m(ClientAppActor pid=21780)[0m Train : time 73.97 s Epoch 2
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0154
[36m(ClientAppActor pid=21780)[0m Train : time 71.95 s Epoch 3
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0154
[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean : 

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean :  0.0105 std: 0.1204
Round 4: Aggregated threshold = 0.1547
[36m(ClientAppActor pid=21780)[0m Val: Accuracy: 0.9944  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 155257, 1: 66072})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 955, 1: 154}
{0: 'threshold=0.1547 ,Test : Accuracy: 0.9950 Recall : 0.9976 FDR: 0.0145  F1-score: 0.9916 '}


[92mINFO [0m:      fit progress: (4, 0.7948349798264123, {0: 'threshold=0.1547 ,Test : Accuracy: 0.9950 Recall : 0.9976 FDR: 0.0145  F1-score: 0.9916 '}, 3601.7165690419997)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 5]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


model AE is saved in fed_best_models/FED_PROX_AE_f1_0.99_recall_1.00_.pth
[36m(ClientAppActor pid=21780)[0m Train : time 71.13 s Epoch 1
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0120
[36m(ClientAppActor pid=21780)[0m Train : time 74.82 s Epoch 2
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0120
[36m(ClientAppActor pid=21780)[0m Train : time 74.42 s Epoch 3
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0119
[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean :  0.0094 std: 0.1092
[36m(ClientAppActor pid=21780)[0m Val: Accuracy: 0.9944  
[36m(ClientAppActor pid=21780)[0m Train : time 62.52 s Epoch 1
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0124
[36m(ClientAppActor pid=21780)[0m Train : time 61.94 s Epoch 2
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0122
[36m(ClientAppActor pid=21780)[0m Train : time 61.64 s Epoch 3
[36m(ClientAppActor pid=21780)[0m Train Loss: 0.0123
[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean : 

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


[36m(ClientAppActor pid=21780)[0m -----------mse_loss mean :  0.0132 std: 0.1290
Round 5: Aggregated threshold = 0.1373
[36m(ClientAppActor pid=21780)[0m Val: Accuracy: 0.9928  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 155254, 1: 66075})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 957, 1: 153}
{0: 'threshold=0.1373 ,Test : Accuracy: 0.9950 Recall : 0.9977 FDR: 0.0145  F1-score: 0.9915 '}


[92mINFO [0m:      fit progress: (5, 0.7993914743436242, {0: 'threshold=0.1373 ,Test : Accuracy: 0.9950 Recall : 0.9977 FDR: 0.0145  F1-score: 0.9915 '}, 4487.011939446001)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [SUMMARY]
[92mINFO [0m:      Run finished 5 round(s) in 4487.01s
[92mINFO [0m:      	History (loss, centralized):
[92mINFO [0m:      		round 0: 16.93721112009723
[92mINFO [0m:      		round 1: 0.6500179032119605
[92mINFO [0m:      		round 2: 0.4974349201302134
[92mINFO [0m:      		round 3: 0.6186465482381432
[92mINFO [0m:      		round 4: 0.7948349798264123
[92mINFO [0m:      		round 5: 0.7993914743436242
[92mINFO [0m:      	History (metrics, centralized):
[92mINFO [0m:      	{0: [(0,
[92mINFO [0m:      	      'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  '
[92mINFO [0m:      	      'F1-score: 0.4555 '),
[92mINFO [0m:      	     (1,
[92mINFO

Federated learning simulation finished.


### VAE

In [None]:
# Instantiate the configuration
cfg.STRATEGY="FED_AVG"
cfg.MODEL_NAME="VAE"
cfg.LEARNING_RATE=1e-4
cfg.WEIGHT_DECAY=1e-4
strategy=set_server_strategy()

# --- Start the Simulation ---
print("Starting federated learning simulation...")
history = fl.simulation.start_simulation(
    client_fn=client_function,
    num_clients=cfg.NUM_TRAIN_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=cfg.NUM_ROUNDS),
    strategy=strategy,
    client_resources={"num_cpus": 1, "num_gpus": 1} if DEVICE.type == "cuda" else {"num_cpus": 4},
)
print("Federated learning simulation finished.")

	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=5, no round_timeout


Using FedAvg strategy with VAE model.
Starting federated learning simulation...


2025-07-27 00:44:18,687	INFO worker.py:1771 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initialized with resources: {'accelerator_type:G': 1.0, 'node:__internal_head__': 1.0, 'CPU': 2.0, 'memory': 3333827790.0, 'object_store_memory': 1666913894.0, 'GPU': 1.0, 'node:172.24.78.91': 1.0}
[92mINFO [0m:      Optimize your simulation with Flower VCE: https://flower.ai/docs/framework/how-to-run-simulations.html
[92mINFO [0m:      Flower VCE: Resources for each Virtual Client: {'num_cpus': 1, 'num_gpus': 1}
[92mINFO [0m:      Flower VCE: Creating VirtualClientEngineActorPool with 1 actors
[92mINFO [0m:      [INIT]
[92mINFO [0m:      Using initial global parameters provided by strategy
[92mINFO [0m:      Starting evaluation of initial global parameters


Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({1: 221329})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 156058}
{0: 'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  F1-score: 0.4555 '}


[92mINFO [0m:      initial parameters (loss, other metrics): 18.036986793416137, {0: 'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  F1-score: 0.4555 '}
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 1]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


[36m(ClientAppActor pid=36318)[0m Train : time 30.11 s Epoch 1
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.5352
[36m(ClientAppActor pid=36318)[0m Train : time 30.65 s Epoch 2
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1550
[36m(ClientAppActor pid=36318)[0m Train : time 29.61 s Epoch 3
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1380
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0381 std: 0.1958
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9852  
[36m(ClientAppActor pid=36318)[0m Train : time 42.29 s Epoch 1
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.4714
[36m(ClientAppActor pid=36318)[0m Train : time 39.58 s Epoch 2
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1689
[36m(ClientAppActor pid=36318)[0m Train : time 40.69 s Epoch 3
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1602
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0618 std: 0.5117
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


Round 1: Aggregated threshold = 0.4155
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0613 std: 0.4961
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9938  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 122723, 1: 98606})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 33487, 1: 152}
{0: 'threshold=0.4155 ,Test : Accuracy: 0.8480 Recall : 0.9977 FDR: 0.3396  F1-score: 0.7947 '}
model

[92mINFO [0m:      fit progress: (1, 0.9953139613426166, {0: 'threshold=0.4155 ,Test : Accuracy: 0.8480 Recall : 0.9977 FDR: 0.3396  F1-score: 0.7947 '}, 510.31632201999855)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 2]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


 VAE is saved in fed_best_models/FED_AVG_VAE_f1_0.79_recall_1.00_.pth
[36m(ClientAppActor pid=36318)[0m Train : time 30.22 s Epoch 1
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1704
[36m(ClientAppActor pid=36318)[0m Train : time 27.88 s Epoch 2
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1619
[36m(ClientAppActor pid=36318)[0m Train : time 30.37 s Epoch 3
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1602
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0614 std: 0.5008
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9920  
[36m(ClientAppActor pid=36318)[0m Train : time 41.43 s Epoch 1
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1663
[36m(ClientAppActor pid=36318)[0m Train : time 41.82 s Epoch 2
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1585
[36m(ClientAppActor pid=36318)[0m Train : time 39.01 s Epoch 3
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1566
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


Round 2: Aggregated threshold = 0.5662
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0677 std: 0.5438
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9914  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 193792, 1: 27537})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 1307, 1: 39033, 6: 8}
{0: 'threshold=0.5662 ,Test : Accuracy: 0.8177 Recall : 0.4019 FDR: 0.0475  F1-score: 0.5653 '}


[92mINFO [0m:      fit progress: (2, 0.5350337478369306, {0: 'threshold=0.5662 ,Test : Accuracy: 0.8177 Recall : 0.4019 FDR: 0.0475  F1-score: 0.5653 '}, 1013.6585695119993)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 3]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


[36m(ClientAppActor pid=36318)[0m Train : time 29.76 s Epoch 1
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1594
[36m(ClientAppActor pid=36318)[0m Train : time 31.12 s Epoch 2
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1585
[36m(ClientAppActor pid=36318)[0m Train : time 29.52 s Epoch 3
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1584
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0600 std: 0.4995
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9920  
[36m(ClientAppActor pid=36318)[0m Train : time 39.68 s Epoch 1
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1575
[36m(ClientAppActor pid=36318)[0m Train : time 41.54 s Epoch 2
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1568
[36m(ClientAppActor pid=36318)[0m Train : time 40.99 s Epoch 3
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1563
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0639 std: 0.5285
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0586 std: 0.5236
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9931  
Round 3: Aggregated threshold = 0.5655
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 193900, 1: 27429})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 1110, 1: 38944, 6: 8}
{0: 'threshold=0.5655 ,Test : Accuracy: 0.8190 Recall : 0.4032 FDR: 0.0405  F1-score: 0.5678 '}


[92mINFO [0m:      fit progress: (3, 0.5416102837856765, {0: 'threshold=0.5655 ,Test : Accuracy: 0.8190 Recall : 0.4032 FDR: 0.0405  F1-score: 0.5678 '}, 1522.1402817749986)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 4]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


[36m(ClientAppActor pid=36318)[0m Train : time 41.64 s Epoch 1
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1559
[36m(ClientAppActor pid=36318)[0m Train : time 39.62 s Epoch 2
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1550
[36m(ClientAppActor pid=36318)[0m Train : time 41.58 s Epoch 3
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1544
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0597 std: 0.5216
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9922  
[36m(ClientAppActor pid=36318)[0m Train : time 35.92 s Epoch 1
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1528
[36m(ClientAppActor pid=36318)[0m Train : time 34.67 s Epoch 2
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1517
[36m(ClientAppActor pid=36318)[0m Train : time 32.97 s Epoch 3
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1510
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0475 std: 0.4424
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


Round 4: Aggregated threshold = 0.5436
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0632 std: 0.4867
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9911  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 193943, 1: 27386})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 1105, 1: 38982, 6: 8}
{0: 'threshold=0.5436 ,Test : Accuracy: 0.8188 Recall : 0.4026 FDR: 0.0403  F1-score: 0.5673 '}


[92mINFO [0m:      fit progress: (4, 0.5357206494630166, {0: 'threshold=0.5436 ,Test : Accuracy: 0.8188 Recall : 0.4026 FDR: 0.0403  F1-score: 0.5673 '}, 2026.6842474159985)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 5]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


[36m(ClientAppActor pid=36318)[0m Train : time 31.99 s Epoch 1
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1511
[36m(ClientAppActor pid=36318)[0m Train : time 36.03 s Epoch 2
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1507
[36m(ClientAppActor pid=36318)[0m Train : time 33.83 s Epoch 3
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1500
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0475 std: 0.3565
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9910  
[36m(ClientAppActor pid=36318)[0m Train : time 31.52 s Epoch 1
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1558
[36m(ClientAppActor pid=36318)[0m Train : time 29.75 s Epoch 2
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1556
[36m(ClientAppActor pid=36318)[0m Train : time 30.41 s Epoch 3
[36m(ClientAppActor pid=36318)[0m Train Loss: 0.1537
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0458 std: 0.3124
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


Round 5: Aggregated threshold = 0.4054
[36m(ClientAppActor pid=36318)[0m -----------mse_loss mean :  0.0498 std: 0.3166
[36m(ClientAppActor pid=36318)[0m Val: Accuracy: 0.9892  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 167338, 1: 53991})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 1235, 1: 12507, 6: 8}
{0: 'threshold=0.4054 ,Test : Accuracy: 0.9379 Recall : 0.8083 FDR: 0.0229  F1-score: 0.8847 '}


[92mINFO [0m:      fit progress: (5, 0.5403309383316239, {0: 'threshold=0.4054 ,Test : Accuracy: 0.9379 Recall : 0.8083 FDR: 0.0229  F1-score: 0.8847 '}, 2531.6515583889995)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [SUMMARY]
[92mINFO [0m:      Run finished 5 round(s) in 2531.65s
[92mINFO [0m:      	History (loss, centralized):
[92mINFO [0m:      		round 0: 18.036986793416137
[92mINFO [0m:      		round 1: 0.9953139613426166
[92mINFO [0m:      		round 2: 0.5350337478369306
[92mINFO [0m:      		round 3: 0.5416102837856765
[92mINFO [0m:      		round 4: 0.5357206494630166
[92mINFO [0m:      		round 5: 0.5403309383316239
[92mINFO [0m:      	History (metrics, centralized):
[92mINFO [0m:      	{0: [(0,
[92mINFO [0m:      	      'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  '
[92mINFO [0m:      	      'F1-score: 0.4555 '),
[92mINFO [0m:      	     (1,
[92mIN

model VAE is saved in fed_best_models/FED_AVG_VAE_f1_0.88_recall_1.00_.pth


[92mINFO [0m:      	      'F1-score: 0.7947 '),
[92mINFO [0m:      	     (2,
[92mINFO [0m:      	      'threshold=0.5662 ,Test : Accuracy: 0.8177 Recall : 0.4019 FDR: 0.0475  '
[92mINFO [0m:      	      'F1-score: 0.5653 '),
[92mINFO [0m:      	     (3,
[92mINFO [0m:      	      'threshold=0.5655 ,Test : Accuracy: 0.8190 Recall : 0.4032 FDR: 0.0405  '
[92mINFO [0m:      	      'F1-score: 0.5678 '),
[92mINFO [0m:      	     (4,
[92mINFO [0m:      	      'threshold=0.5436 ,Test : Accuracy: 0.8188 Recall : 0.4026 FDR: 0.0403  '
[92mINFO [0m:      	      'F1-score: 0.5673 '),
[92mINFO [0m:      	     (5,
[92mINFO [0m:      	      'threshold=0.4054 ,Test : Accuracy: 0.9379 Recall : 0.8083 FDR: 0.0229  '
[92mINFO [0m:      	      'F1-score: 0.8847 ')]}
[92mINFO [0m:      


Federated learning simulation finished.


In [None]:
cfg.STRATEGY="FED_PROX"
strategy=set_server_strategy()

print("Starting federated learning simulation...")
history = fl.simulation.start_simulation(
    client_fn=client_function,
    num_clients=cfg.NUM_TRAIN_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=cfg.NUM_ROUNDS),
    strategy=strategy,
    client_resources={"num_cpus": 1, "num_gpus": 1} if DEVICE.type == "cuda" else {"num_cpus": 1},
)
print("Federated learning simulation finished.")

	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=5, no round_timeout


Using FedProx strategy with VAE model.
Starting federated learning simulation...


2025-07-27 01:23:28,377	INFO worker.py:1771 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initialized with resources: {'accelerator_type:G': 1.0, 'node:__internal_head__': 1.0, 'CPU': 2.0, 'memory': 3277833831.0, 'object_store_memory': 1638916915.0, 'GPU': 1.0, 'node:172.24.78.91': 1.0}
[92mINFO [0m:      Optimize your simulation with Flower VCE: https://flower.ai/docs/framework/how-to-run-simulations.html
[92mINFO [0m:      Flower VCE: Resources for each Virtual Client: {'num_cpus': 1, 'num_gpus': 1}
[92mINFO [0m:      Flower VCE: Creating VirtualClientEngineActorPool with 1 actors
[92mINFO [0m:      [INIT]
[92mINFO [0m:      Using initial global parameters provided by strategy
[92mINFO [0m:      Starting evaluation of initial global parameters


Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({1: 221329})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 156058}
{0: 'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  F1-score: 0.4555 '}


[92mINFO [0m:      initial parameters (loss, other metrics): 17.708340298831153, {0: 'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  F1-score: 0.4555 '}
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 1]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


[36m(ClientAppActor pid=44427)[0m Train : time 49.82 s Epoch 1
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.8157
[36m(ClientAppActor pid=44427)[0m Train : time 45.44 s Epoch 2
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.4286
[36m(ClientAppActor pid=44427)[0m Train : time 48.59 s Epoch 3
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.4183
[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean :  0.1000 std: 0.4407
[36m(ClientAppActor pid=44427)[0m Val: Accuracy: 0.9910  
[36m(ClientAppActor pid=44427)[0m Train : time 63.35 s Epoch 1
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.7061
[36m(ClientAppActor pid=44427)[0m Train : time 62.99 s Epoch 2
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.4222
[36m(ClientAppActor pid=44427)[0m Train : time 63.36 s Epoch 3
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.4165
[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean :  0.1087 std: 0.4783
[36m(ClientAppActor pid=44427)[0m Val: Accuracy: 0.9

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean :  0.1003 std: 0.4691
[36m(ClientAppActor pid=44427)[0m Val: Accuracy: 0.9908  
Round 1: Aggregated threshold = 0.5552
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 187327, 1: 34002})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 2843, 1: 34104, 6: 8}
{0: 'threshold=0.5552 ,Test : Accuracy: 0.8330 Recall : 0.4774 FDR: 0.0836  F1-score: 0.6277 '}


[92mINFO [0m:      fit progress: (1, 0.5551878863027438, {0: 'threshold=0.5552 ,Test : Accuracy: 0.8330 Recall : 0.4774 FDR: 0.0836  F1-score: 0.6277 '}, 770.0015450480005)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 2]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


model VAE is saved in fed_best_models/FED_PROX_VAE_f1_0.63_recall_1.00_.pth
[36m(ClientAppActor pid=44427)[0m Train : time 46.03 s Epoch 1
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.2092
[36m(ClientAppActor pid=44427)[0m Train : time 47.20 s Epoch 2
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.2073
[36m(ClientAppActor pid=44427)[0m Train : time 46.75 s Epoch 3
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.2073
[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean :  0.0838 std: 0.4483
[36m(ClientAppActor pid=44427)[0m Val: Accuracy: 0.9897  
[36m(ClientAppActor pid=44427)[0m Train : time 63.03 s Epoch 1
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.2078
[36m(ClientAppActor pid=44427)[0m Train : time 62.69 s Epoch 2
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.2060
[36m(ClientAppActor pid=44427)[0m Train : time 60.83 s Epoch 3
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.2060
[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean 

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean :  0.0886 std: 0.4716
[36m(ClientAppActor pid=44427)[0m Val: Accuracy: 0.9885  
Round 2: Aggregated threshold = 0.5347
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 192435, 1: 28894})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 1411, 1: 37781, 6: 7}
{0: 'threshold=0.5347 ,Test : Accuracy: 0.8229 Recall : 0.4211 FDR: 0.0488  F1-score: 0.5837 '}


[92mINFO [0m:      fit progress: (2, 0.5316846611831256, {0: 'threshold=0.5347 ,Test : Accuracy: 0.8229 Recall : 0.4211 FDR: 0.0488  F1-score: 0.5837 '}, 1529.7249870169999)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 3]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


[36m(ClientAppActor pid=44427)[0m Train : time 63.29 s Epoch 1
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1901
[36m(ClientAppActor pid=44427)[0m Train : time 62.81 s Epoch 2
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1899
[36m(ClientAppActor pid=44427)[0m Train : time 61.80 s Epoch 3
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1901
[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean :  0.0751 std: 0.4421
[36m(ClientAppActor pid=44427)[0m Val: Accuracy: 0.9908  
[36m(ClientAppActor pid=44427)[0m Train : time 62.91 s Epoch 1
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1912
[36m(ClientAppActor pid=44427)[0m Train : time 60.14 s Epoch 2
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1908
[36m(ClientAppActor pid=44427)[0m Train : time 62.99 s Epoch 3
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1906
[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean :  0.0799 std: 0.4583
[36m(ClientAppActor pid=44427)[0m Val: Accuracy: 0.9

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


Round 3: Aggregated threshold = 0.5038
[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean :  0.0726 std: 0.4277
[36m(ClientAppActor pid=44427)[0m Val: Accuracy: 0.9913  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 175875, 1: 45454})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 1320, 1: 21129, 6: 8}
{0: 'threshold=0.5038 ,Test : Accuracy: 0.8985 Recall : 0.6762 FDR: 0.0290  F1-score: 0.7972 '}


[92mINFO [0m:      fit progress: (3, 0.5308336585467788, {0: 'threshold=0.5038 ,Test : Accuracy: 0.8985 Recall : 0.6762 FDR: 0.0290  F1-score: 0.7972 '}, 2290.9276955780006)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 4]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


model VAE is saved in fed_best_models/FED_PROX_VAE_f1_0.80_recall_1.00_.pth
[36m(ClientAppActor pid=44427)[0m Train : time 55.95 s Epoch 1
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1841
[36m(ClientAppActor pid=44427)[0m Train : time 53.35 s Epoch 2
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1838
[36m(ClientAppActor pid=44427)[0m Train : time 52.09 s Epoch 3
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1838
[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean :  0.0655 std: 0.3656
[36m(ClientAppActor pid=44427)[0m Val: Accuracy: 0.9915  
[36m(ClientAppActor pid=44427)[0m Train : time 45.71 s Epoch 1
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1856
[36m(ClientAppActor pid=44427)[0m Train : time 48.90 s Epoch 2
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1853
[36m(ClientAppActor pid=44427)[0m Train : time 44.99 s Epoch 3
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1853
[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean 

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean :  0.0796 std: 0.4404
Round 4: Aggregated threshold = 0.4844
[36m(ClientAppActor pid=44427)[0m Val: Accuracy: 0.9884  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 171689, 1: 49640})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 1532, 1: 17155, 6: 8}
{0: 'threshold=0.4844 ,Test : Accuracy: 0.9155 Recall : 0.7371 FDR: 0.0309  F1-score: 0.8373 '}


[92mINFO [0m:      fit progress: (4, 0.532405449014815, {0: 'threshold=0.4844 ,Test : Accuracy: 0.9155 Recall : 0.7371 FDR: 0.0309  F1-score: 0.8373 '}, 3062.090114531)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 5]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


model VAE is saved in fed_best_models/FED_PROX_VAE_f1_0.84_recall_1.00_.pth
[36m(ClientAppActor pid=44427)[0m Train : time 60.82 s Epoch 1
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1813
[36m(ClientAppActor pid=44427)[0m Train : time 62.18 s Epoch 2
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1813
[36m(ClientAppActor pid=44427)[0m Train : time 62.12 s Epoch 3
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1814
[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean :  0.0702 std: 0.4032
[36m(ClientAppActor pid=44427)[0m Val: Accuracy: 0.9898  
[36m(ClientAppActor pid=44427)[0m Train : time 46.89 s Epoch 1
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1817
[36m(ClientAppActor pid=44427)[0m Train : time 45.80 s Epoch 2
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1821
[36m(ClientAppActor pid=44427)[0m Train : time 45.74 s Epoch 3
[36m(ClientAppActor pid=44427)[0m Train Loss: 0.1817
[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean 

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


Round 5: Aggregated threshold = 0.4608
[36m(ClientAppActor pid=44427)[0m -----------mse_loss mean :  0.0694 std: 0.4085
[36m(ClientAppActor pid=44427)[0m Val: Accuracy: 0.9902  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 169831, 1: 51498})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 1457, 1: 15222, 6: 8}
{0: 'threshold=0.4608 ,Test : Accuracy: 0.9246 Recall : 0.7667 FDR: 0.0283  F1-score: 0.8571 '}


[92mINFO [0m:      fit progress: (5, 0.5313830386551243, {0: 'threshold=0.4608 ,Test : Accuracy: 0.9246 Recall : 0.7667 FDR: 0.0283  F1-score: 0.8571 '}, 3821.811365921001)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [SUMMARY]
[92mINFO [0m:      Run finished 5 round(s) in 3821.81s
[92mINFO [0m:      	History (loss, centralized):
[92mINFO [0m:      		round 0: 17.708340298831153
[92mINFO [0m:      		round 1: 0.5551878863027438
[92mINFO [0m:      		round 2: 0.5316846611831256
[92mINFO [0m:      		round 3: 0.5308336585467788
[92mINFO [0m:      		round 4: 0.532405449014815
[92mINFO [0m:      		round 5: 0.5313830386551243
[92mINFO [0m:      	History (metrics, centralized):
[92mINFO [0m:      	{0: [(0,
[92mINFO [0m:      	      'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  '
[92mINFO [0m:      	      'F1-score: 0.4555 '),
[92mINFO [0m:      	     (1,
[92mINFO

model VAE is saved in fed_best_models/FED_PROX_VAE_f1_0.86_recall_1.00_.pth


[92mINFO [0m:      	      'F1-score: 0.6277 '),
[92mINFO [0m:      	     (2,
[92mINFO [0m:      	      'threshold=0.5347 ,Test : Accuracy: 0.8229 Recall : 0.4211 FDR: 0.0488  '
[92mINFO [0m:      	      'F1-score: 0.5837 '),
[92mINFO [0m:      	     (3,
[92mINFO [0m:      	      'threshold=0.5038 ,Test : Accuracy: 0.8985 Recall : 0.6762 FDR: 0.0290  '
[92mINFO [0m:      	      'F1-score: 0.7972 '),
[92mINFO [0m:      	     (4,
[92mINFO [0m:      	      'threshold=0.4844 ,Test : Accuracy: 0.9155 Recall : 0.7371 FDR: 0.0309  '
[92mINFO [0m:      	      'F1-score: 0.8373 '),
[92mINFO [0m:      	     (5,
[92mINFO [0m:      	      'threshold=0.4608 ,Test : Accuracy: 0.9246 Recall : 0.7667 FDR: 0.0283  '
[92mINFO [0m:      	      'F1-score: 0.8571 ')]}
[92mINFO [0m:      


Federated learning simulation finished.


In [None]:
# Instantiate the configuration
cfg.STRATEGY="FED_AVG"
cfg.MODEL_NAME="AAE"
cfg.LEARNING_RATE=1e-2
cfg.WEIGHT_DECAY=1e-5
strategy=set_server_strategy()

# --- Start the Simulation ---
print("Starting federated learning simulation...")
history = fl.simulation.start_simulation(
    client_fn=client_function,
    num_clients=cfg.NUM_TRAIN_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=cfg.NUM_ROUNDS),
    strategy=strategy,
    client_resources={"num_cpus": 1, "num_gpus": 1} if DEVICE.type == "cuda" else {"num_cpus": 1},
)
print("Federated learning simulation finished.")

	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=5, no round_timeout


Using FedAvg strategy with AAE model.
Starting federated learning simulation...


2025-07-27 02:22:29,737	INFO worker.py:1771 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initialized with resources: {'accelerator_type:G': 1.0, 'node:__internal_head__': 1.0, 'CPU': 2.0, 'memory': 3242832692.0, 'object_store_memory': 1621416345.0, 'GPU': 1.0, 'node:172.24.78.91': 1.0}
[92mINFO [0m:      Optimize your simulation with Flower VCE: https://flower.ai/docs/framework/how-to-run-simulations.html
[92mINFO [0m:      Flower VCE: Resources for each Virtual Client: {'num_cpus': 1, 'num_gpus': 1}
[92mINFO [0m:      Flower VCE: Creating VirtualClientEngineActorPool with 1 actors
[92mINFO [0m:      [INIT]
[92mINFO [0m:      Using initial global parameters provided by strategy
[92mINFO [0m:      Starting evaluation of initial global parameters


Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({1: 221329})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 156058}
{0: 'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  F1-score: 0.4555 '}


[92mINFO [0m:      initial parameters (loss, other metrics): 17.18490911719657, {0: 'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  F1-score: 0.4555 '}
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 1]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


[36m(ClientAppActor pid=56481)[0m Train : time 67.21 s Epoch 1
[36m(ClientAppActor pid=56481)[0m Generator Loss: 120.6971 Discriminator Loss: 8.2306
[36m(ClientAppActor pid=56481)[0m Train : time 67.25 s Epoch 2
[36m(ClientAppActor pid=56481)[0m Generator Loss: 12.8002 Discriminator Loss: 7.7419
[36m(ClientAppActor pid=56481)[0m Train : time 67.29 s Epoch 3
[36m(ClientAppActor pid=56481)[0m Generator Loss: 3.6445 Discriminator Loss: 7.5593
[36m(ClientAppActor pid=56481)[0m -----------mse_loss mean :  0.0356 std: 0.3169
[36m(ClientAppActor pid=56481)[0m Val: Accuracy: 0.9944  
[36m(ClientAppActor pid=56481)[0m Train : time 63.79 s Epoch 1
[36m(ClientAppActor pid=56481)[0m Generator Loss: 120.6778 Discriminator Loss: 8.1995
[36m(ClientAppActor pid=56481)[0m Train : time 64.44 s Epoch 2
[36m(ClientAppActor pid=56481)[0m Generator Loss: 13.0397 Discriminator Loss: 7.7359
[36m(ClientAppActor pid=56481)[0m Train : time 65.80 s Epoch 3
[36m(ClientAppActor pid=56481)

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


Round 1: Aggregated threshold = 0.3635
[36m(ClientAppActor pid=56481)[0m -----------mse_loss mean :  0.0515 std: 0.3694
[36m(ClientAppActor pid=56481)[0m Val: Accuracy: 0.9934  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 154199, 1: 67130})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 2027, 1: 168}
{0: 'threshold=0.3635 ,Test : Accuracy: 0.9901 Recall : 0.9974 FDR: 0.0302  F1-score: 0.9834 '}


[92mINFO [0m:      fit progress: (1, 0.5044641193088117, {0: 'threshold=0.3635 ,Test : Accuracy: 0.9901 Recall : 0.9974 FDR: 0.0302  F1-score: 0.9834 '}, 800.0963934020001)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 2]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


model AdversarialAutoencoder is saved in fed_best_models/FED_AVG_AdversarialAutoencoder_f1_0.98_recall_1.00_.pth
[36m(ClientAppActor pid=56481)[0m Train : time 55.52 s Epoch 1
[36m(ClientAppActor pid=56481)[0m Generator Loss: 2.8521 Discriminator Loss: 8.3604
[36m(ClientAppActor pid=56481)[0m Train : time 54.61 s Epoch 2
[36m(ClientAppActor pid=56481)[0m Generator Loss: 2.1463 Discriminator Loss: 11.5994
[36m(ClientAppActor pid=56481)[0m Train : time 56.09 s Epoch 3
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.8089 Discriminator Loss: 16.6232
[36m(ClientAppActor pid=56481)[0m -----------mse_loss mean :  0.0185 std: 0.2092
[36m(ClientAppActor pid=56481)[0m Val: Accuracy: 0.9953  
[36m(ClientAppActor pid=56481)[0m Train : time 47.78 s Epoch 1
[36m(ClientAppActor pid=56481)[0m Generator Loss: 3.0424 Discriminator Loss: 7.9623
[36m(ClientAppActor pid=56481)[0m Train : time 51.28 s Epoch 2
[36m(ClientAppActor pid=56481)[0m Generator Loss: 2.3742 Discriminator 

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


[36m(ClientAppActor pid=56481)[0m -----------mse_loss mean :  0.0280 std: 0.2932
Round 2: Aggregated threshold = 0.2718
[36m(ClientAppActor pid=56481)[0m Val: Accuracy: 0.9929  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 155408, 1: 65921})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 952, 1: 302}
{0: 'threshold=0.2718 ,Test : Accuracy: 0.9943 Recall : 0.9954 FDR: 0.0144  F1-score: 0.9904 '}


[92mINFO [0m:      fit progress: (2, 0.5094359669315814, {0: 'threshold=0.2718 ,Test : Accuracy: 0.9943 Recall : 0.9954 FDR: 0.0144  F1-score: 0.9904 '}, 1595.0653152330015)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 3]


model AdversarialAutoencoder is saved in fed_best_models/FED_AVG_AdversarialAutoencoder_f1_0.99_recall_1.00_.pth


[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


[36m(ClientAppActor pid=56481)[0m Train : time 65.73 s Epoch 1
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.6625 Discriminator Loss: 20.0987
[36m(ClientAppActor pid=56481)[0m Train : time 65.29 s Epoch 2
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.4741 Discriminator Loss: 20.3186
[36m(ClientAppActor pid=56481)[0m Train : time 66.08 s Epoch 3
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.3783 Discriminator Loss: 19.4683
[36m(ClientAppActor pid=56481)[0m -----------mse_loss mean :  0.0198 std: 0.2162
[36m(ClientAppActor pid=56481)[0m Val: Accuracy: 0.9928  
[36m(ClientAppActor pid=56481)[0m Train : time 50.30 s Epoch 1
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.6881 Discriminator Loss: 20.1837
[36m(ClientAppActor pid=56481)[0m Train : time 48.57 s Epoch 2
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.5112 Discriminator Loss: 20.4733
[36m(ClientAppActor pid=56481)[0m Train : time 48.07 s Epoch 3
[36m(ClientAppActor pid=56481)

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


[36m(ClientAppActor pid=56481)[0m -----------mse_loss mean :  0.0153 std: 0.1953
Round 3: Aggregated threshold = 0.2284
[36m(ClientAppActor pid=56481)[0m Val: Accuracy: 0.9953  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 155265, 1: 66064})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 958, 1: 165}
{0: 'threshold=0.2284 ,Test : Accuracy: 0.9949 Recall : 0.9975 FDR: 0.0145  F1-score: 0.9914 '}
model AdversarialAutoencoder is saved in fed_best_models/FED_AVG_AdversarialAutoencoder_f1_0.99_recall_1.00_.pth


[92mINFO [0m:      fit progress: (3, 1.402052346280876, {0: 'threshold=0.2284 ,Test : Accuracy: 0.9949 Recall : 0.9975 FDR: 0.0145  F1-score: 0.9914 '}, 2393.3482871779997)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 4]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


[36m(ClientAppActor pid=56481)[0m Train : time 51.26 s Epoch 1
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.3156 Discriminator Loss: 18.8691
[36m(ClientAppActor pid=56481)[0m Train : time 47.56 s Epoch 2
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.2424 Discriminator Loss: 16.6868
[36m(ClientAppActor pid=56481)[0m Train : time 48.07 s Epoch 3
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.1202 Discriminator Loss: 14.9636
[36m(ClientAppActor pid=56481)[0m -----------mse_loss mean :  0.0161 std: 0.2090
[36m(ClientAppActor pid=56481)[0m Val: Accuracy: 0.9947  
[36m(ClientAppActor pid=56481)[0m Train : time 55.63 s Epoch 1
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.2754 Discriminator Loss: 18.8376
[36m(ClientAppActor pid=56481)[0m Train : time 57.20 s Epoch 2
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.2100 Discriminator Loss: 19.1915
[36m(ClientAppActor pid=56481)[0m Train : time 55.54 s Epoch 3
[36m(ClientAppActor pid=56481)

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


Round 4: Aggregated threshold = 0.2115
[36m(ClientAppActor pid=56481)[0m -----------mse_loss mean :  0.0143 std: 0.1974
[36m(ClientAppActor pid=56481)[0m Val: Accuracy: 0.9957  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 155306, 1: 66023})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 917, 1: 165}
{0: 'threshold=0.2115 ,Test : Accuracy: 0.9951 Recall : 0.9975 FDR: 0.0139  F1-score: 0.9918 '}


[92mINFO [0m:      fit progress: (4, 1.2725523993692647, {0: 'threshold=0.2115 ,Test : Accuracy: 0.9951 Recall : 0.9975 FDR: 0.0139  F1-score: 0.9918 '}, 3210.563227941002)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 5]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


model AdversarialAutoencoder is saved in fed_best_models/FED_AVG_AdversarialAutoencoder_f1_0.99_recall_1.00_.pth
[36m(ClientAppActor pid=56481)[0m Train : time 65.79 s Epoch 1
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.1213 Discriminator Loss: 13.2397
[36m(ClientAppActor pid=56481)[0m Train : time 69.05 s Epoch 2
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.0951 Discriminator Loss: 15.4672
[36m(ClientAppActor pid=56481)[0m Train : time 65.02 s Epoch 3
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.1059 Discriminator Loss: 13.5196
[36m(ClientAppActor pid=56481)[0m -----------mse_loss mean :  0.0138 std: 0.2017
[36m(ClientAppActor pid=56481)[0m Val: Accuracy: 0.9954  
[36m(ClientAppActor pid=56481)[0m Train : time 49.37 s Epoch 1
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.0964 Discriminator Loss: 13.4367
[36m(ClientAppActor pid=56481)[0m Train : time 51.22 s Epoch 2
[36m(ClientAppActor pid=56481)[0m Generator Loss: 1.0596 Discriminato

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


Round 5: Aggregated threshold = 0.2121
[36m(ClientAppActor pid=56481)[0m -----------mse_loss mean :  0.0130 std: 0.1916
[36m(ClientAppActor pid=56481)[0m Val: Accuracy: 0.9962  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 155294, 1: 66035})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 929, 1: 165}
{0: 'threshold=0.2121 ,Test : Accuracy: 0.9951 Recall : 0.9975 FDR: 0.0141  F1-score: 0.9917 '}


[92mINFO [0m:      fit progress: (5, 1.2696438333883042, {0: 'threshold=0.2121 ,Test : Accuracy: 0.9951 Recall : 0.9975 FDR: 0.0141  F1-score: 0.9917 '}, 4013.688750272002)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [SUMMARY]
[92mINFO [0m:      Run finished 5 round(s) in 4013.69s
[92mINFO [0m:      	History (loss, centralized):
[92mINFO [0m:      		round 0: 17.18490911719657
[92mINFO [0m:      		round 1: 0.5044641193088117
[92mINFO [0m:      		round 2: 0.5094359669315814
[92mINFO [0m:      		round 3: 1.402052346280876
[92mINFO [0m:      		round 4: 1.2725523993692647
[92mINFO [0m:      		round 5: 1.2696438333883042
[92mINFO [0m:      	History (metrics, centralized):
[92mINFO [0m:      	{0: [(0,
[92mINFO [0m:      	      'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  '
[92mINFO [0m:      	      'F1-score: 0.4555 '),
[92mINFO [0m:      	     (1,
[92mINFO 

Federated learning simulation finished.


In [None]:
# Instantiate the configuration
cfg.STRATEGY="FED_PROX"
cfg.MODEL_NAME="AAE"
strategy=set_server_strategy()

# --- Start the Simulation ---
print("Starting federated learning simulation...")
history = fl.simulation.start_simulation(
    client_fn=client_function,
    num_clients=cfg.NUM_TRAIN_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=cfg.NUM_ROUNDS),
    strategy=strategy,
    client_resources={"num_cpus": 1, "num_gpus": 1} if DEVICE.type == "cuda" else {"num_cpus": 1},
)
print("Federated learning simulation finished.")

	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=5, no round_timeout


Using FedProx strategy with AAE model.
Starting federated learning simulation...


2025-07-27 09:25:23,522	INFO worker.py:1771 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initialized with resources: {'accelerator_type:G': 1.0, 'node:__internal_head__': 1.0, 'CPU': 2.0, 'memory': 3270986958.0, 'object_store_memory': 1635493478.0, 'GPU': 1.0, 'node:172.24.78.91': 1.0}
[92mINFO [0m:      Optimize your simulation with Flower VCE: https://flower.ai/docs/framework/how-to-run-simulations.html
[92mINFO [0m:      Flower VCE: Resources for each Virtual Client: {'num_cpus': 1, 'num_gpus': 1}
[92mINFO [0m:      Flower VCE: Creating VirtualClientEngineActorPool with 1 actors
[92mINFO [0m:      [INIT]
[92mINFO [0m:      Using initial global parameters provided by strategy
[92mINFO [0m:      Starting evaluation of initial global parameters


Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({1: 221329})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 156058}
{0: 'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  F1-score: 0.4555 '}


[92mINFO [0m:      initial parameters (loss, other metrics): 17.891992237799837, {0: 'threshold=0.0000 ,Test : Accuracy: 0.2949 Recall : 1.0000 FDR: 0.7051  F1-score: 0.4555 '}
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 1]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


[36m(ClientAppActor pid=139326)[0m Train : time 90.41 s Epoch 1
[36m(ClientAppActor pid=139326)[0m Generator Loss: 140.9080 Discriminator Loss: 7.8880
[36m(ClientAppActor pid=139326)[0m Train : time 90.33 s Epoch 2
[36m(ClientAppActor pid=139326)[0m Generator Loss: 8.0050 Discriminator Loss: 0.3683
[36m(ClientAppActor pid=139326)[0m Train : time 92.05 s Epoch 3
[36m(ClientAppActor pid=139326)[0m Generator Loss: 6.0133 Discriminator Loss: 0.7436
[36m(ClientAppActor pid=139326)[0m -----------mse_loss mean :  0.0345 std: 0.2231
[36m(ClientAppActor pid=139326)[0m Val: Accuracy: 0.9953  
[36m(ClientAppActor pid=139326)[0m Train : time 112.40 s Epoch 1
[36m(ClientAppActor pid=139326)[0m Generator Loss: 123.7736 Discriminator Loss: 6.5872
[36m(ClientAppActor pid=139326)[0m Train : time 106.42 s Epoch 2
[36m(ClientAppActor pid=139326)[0m Generator Loss: 7.3452 Discriminator Loss: 0.5118
[36m(ClientAppActor pid=139326)[0m Train : time 109.92 s Epoch 3
[36m(ClientAppAc

[92mINFO [0m:      aggregate_fit: received 4 results and 0 failures


[36m(ClientAppActor pid=139326)[0m -----------mse_loss mean :  0.0471 std: 0.2836
Round 1: Aggregated threshold = 0.2762
[36m(ClientAppActor pid=139326)[0m Val: Accuracy: 0.9932  
Counts of : original binary labels Counter({0: 156058, 1: 65271}) predicted binary labels Counter({0: 155275, 1: 66054})
Counts of  original  labels: {0: 156058, 1: 65233, 2: 1, 3: 1, 4: 1, 5: 2, 6: 32, 7: 1}
Counts of misclassified original labels: {0: 951, 1: 168}
{0: 'threshold=0.2762 ,Test : Accuracy: 0.9949 Recall : 0.9974 FDR: 0.0144  F1-score: 0.9915 '}


[92mINFO [0m:      fit progress: (1, 0.48734678493328937, {0: 'threshold=0.2762 ,Test : Accuracy: 0.9949 Recall : 0.9974 FDR: 0.0144  F1-score: 0.9915 '}, 1304.0023682550018)
[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 2]
[92mINFO [0m:      configure_fit: strategy sampled 4 clients (out of 4)


model AdversarialAutoencoder is saved in fed_best_models/FED_PROX_AdversarialAutoencoder_f1_0.99_recall_1.00_.pth
[36m(ClientAppActor pid=139326)[0m Train : time 84.67 s Epoch 1
[36m(ClientAppActor pid=139326)[0m Generator Loss: 4.1141 Discriminator Loss: 0.9664
[36m(ClientAppActor pid=139326)[0m Train : time 88.04 s Epoch 2
[36m(ClientAppActor pid=139326)[0m Generator Loss: 4.0689 Discriminator Loss: 1.5014
[36m(ClientAppActor pid=139326)[0m Train : time 95.11 s Epoch 3
[36m(ClientAppActor pid=139326)[0m Generator Loss: 3.5488 Discriminator Loss: 2.5702
[36m(ClientAppActor pid=139326)[0m -----------mse_loss mean :  0.0196 std: 0.1664
[36m(ClientAppActor pid=139326)[0m Val: Accuracy: 0.9931  
[36m(ClientAppActor pid=139326)[0m Train : time 119.94 s Epoch 1
[36m(ClientAppActor pid=139326)[0m Generator Loss: 4.1929 Discriminator Loss: 1.0361
[36m(ClientAppActor pid=139326)[0m Train : time 119.88 s Epoch 2
[36m(ClientAppActor pid=139326)[0m Generator Loss: 4.1083 D

KeyboardInterrupt: 

[36m(ClientAppActor pid=139326)[0m Train : time 119.33 s Epoch 3
[36m(ClientAppActor pid=139326)[0m Generator Loss: 3.0342 Discriminator Loss: 2.9670
[36m(ClientAppActor pid=139326)[0m -----------mse_loss mean :  0.0159 std: 0.1333
[36m(ClientAppActor pid=139326)[0m Val: Accuracy: 0.9944  
[36m(ClientAppActor pid=139326)[0m Train : time 119.72 s Epoch 1
[36m(ClientAppActor pid=139326)[0m Generator Loss: 4.1789 Discriminator Loss: 1.0082
[36m(ClientAppActor pid=139326)[0m Train : time 108.03 s Epoch 2
[36m(ClientAppActor pid=139326)[0m Generator Loss: 4.0193 Discriminator Loss: 2.0101
[36m(ClientAppActor pid=139326)[0m Train : time 113.99 s Epoch 3
[36m(ClientAppActor pid=139326)[0m Generator Loss: 2.9522 Discriminator Loss: 2.7669
[36m(ClientAppActor pid=139326)[0m -----------mse_loss mean :  0.0166 std: 0.1413
[36m(ClientAppActor pid=139326)[0m Val: Accuracy: 0.9928  
[36m(ClientAppActor pid=139326)[0m Train : time 106.53 s Epoch 1
[36m(ClientAppActor pid=13

#### FedAVG evaluation

In [62]:
[col for sublist in SERVER_EVALUATION_DATA_MAPPING[:-1] for col in sublist]


['./ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-24-labeled.csv',
 './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-28-labeled.csv',
 './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-30-labeled.csv',
 './ModbusDataset/benign/network-wide-pcap-capture/network-wide/ready/network-wide-normal-22-labeled.csv']

In [None]:
dataset_directory = "./ModbusDataset/" 

modbus = ModbusDataset(dataset_directory,"ready")
modbus.summary_print()

Trained_AE_model=AE(input_dim=76)
Trained_AE_model.load_state_dict(torch.load("./fed_best_models/FED_AVG_AE_f1_0.99_recall_1.00_.pth"))
Trained_VAE_model=VAE(input_dim=76)
Trained_VAE_model.load_state_dict(torch.load("./fed_best_models/FED_AVG_VAE_f1_0.88_recall_1.00_.pth"))
Trained_AAE_model=AdversarialAutoencoder()
Trained_AAE_model.load_state_dict(torch.load("./fed_best_models/FED_AVG_AdversarialAutoencoder_f1_0.99_recall_1.00_.pth"))
val_files=[col for sublist in SERVER_EVALUATION_DATA_MAPPING[:-1] for col in sublist]


for scenario in {"compromised-ied","external","compromised-scada"}:
    if scenario=="compromised-scada":
        print("scenario :",scenario)
        dataset_directory = "./ModbusDataset" 
        test_files= [col for col in modbus.dataset["attack_dataset_dir"][scenario] if col.find("ied1b")!=-1]
        ### missed attack logs for the day 21 for ied1b which can reduce the accuracy.
        test_files.remove(dataset_directory+"attack/compromised-scada/ied1b/ied1b-network-captures/ready/vethc76bd3f-6-labeled.csv")    

    elif scenario=="compromised-ied":
        print("scenario :",scenario)
        test_files= [col for col in modbus.dataset["attack_dataset_dir"][scenario] if col.find("trust-scada-hmi")!=-1]
    else:
        print("scenario :",scenario)
        test_files= [col for col in modbus.dataset["attack_dataset_dir"][scenario] if col.find("network-wide")!=-1]        

    print("----------- benign valid files:",len(val_files),val_files)
    print(f"----------{scenario} attack  test files : ",len(test_files),test_files)
    val_dataloader=DataLoader(ModbusFlowStream(
                shuffle=False,
                chunk_size=1,
                batch_size=64,
                csv_files=val_files,
                scalers=loaded_scalers['network-wide']['min_max_scalers'],
            ),batch_size=1,shuffle=False)
    test_dataloader=DataLoader(ModbusFlowStream(
                shuffle=False,
                chunk_size=1,
                batch_size=64,
                csv_files=test_files,
                scalers=loaded_scalers['network-wide']['min_max_scalers'],
            ),batch_size=1,shuffle=False)
    for trained_model in {Trained_AE_model,Trained_VAE_model,Trained_AAE_model}:
        print("*"*10,trained_model._get_name(),10*"*")
        train_eval(trained_model,None,val_dataloader,test_dataloader,shuffle_files=False,num_epochs=1,eval_epoch=1,criterion_method="mse",train_model=False,learning_rates=[0],weight_decays=[0])
        

 The CIC Modbus Dataset contains network (pcap) captures and attack logs from a simulated substation network.
                The dataset is categorized into two groups: an attack dataset and a benign dataset
                The attack dataset includes network traffic captures that simulate various types of Modbus protocol attacks in a substation environment.
                The attacks are reconnaissance, query flooding, loading payloads, delay response, modify length parameters, false data injection, stacking Modbus frames, brute force write and baseline replay.
                These attacks are based of some techniques in the MITRE ICS ATT&CK framework.
                On the other hand, the benign dataset consists of normal network traffic captures representing legitimate Modbus communication within the substation network.
                The purpose of this dataset is to facilitate research, analysis, and development of intrusion detection systems, anomaly detection algorithms and