In [None]:
import torch
import numpy as np
import pickle
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import confusion_matrix, f1_score
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, average_precision_score, confusion_matrix
import seaborn as sns
import os
import random

# For reproducibility
np.random.seed(42)

# Device configuration (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

learning_rates = [0.005, 0.004, 0.003, 0.002, 0.001]
batch_size = 64

epochs = 5 #Number of epochs
num_clients = 10  # Number of clients in federated learning
federated_rounds = 3  # Number of rounds of federated training
selection_factor = 0.5 # Number of clients to be selected

In [None]:
transform = transforms.ToTensor()

# Load the training dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)

# Load the testing dataset
test_dataset = datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:01<00:00, 18.7MB/s]


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 312kB/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:00<00:00, 5.51MB/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<00:00, 6.47MB/s]

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw






In [None]:
# Convert the datasets to numpy arrays for further processing
def convert_to_numpy(dataset):
    X = []
    y = []
    for img, label in dataset:
        X.append(img.cpu().numpy().flatten())  # Flatten the 28x28 image to a 1D array of 784 elements
        y.append(label)
    return np.array(X), np.array(y)
    # return torch.tensor(np.array(X)),torch.tensor(np.array(y))

In [None]:
def format_data(train_dataset,test_dataset):# Convert train and test datasets to numpy arrays
    X_train, y_train = convert_to_numpy(train_dataset)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    X_test, y_test = convert_to_numpy(test_dataset)

    return X_train, X_val, y_train, y_val,X_test, y_test


In [None]:
def dataset_split(dataset):
    X, y = convert_to_numpy(dataset)

    return X, y

In [None]:
X_train, X_val, y_train, y_val,X_test, y_test = format_data(train_dataset,test_dataset)

print(f'Train Data Shape: {X_train.shape}, Train Labels Shape: {y_train.shape}')
print(f'Validation Data Shape: {X_val.shape}, Validation Labels Shape: {y_val.shape}')
print(f'Test Data Shape: {X_test.shape}, Test Labels Shape: {y_test.shape}')
print(y_test[:10])

Train Data Shape: (48000, 784), Train Labels Shape: (48000,)
Validation Data Shape: (12000, 784), Validation Labels Shape: (12000,)
Test Data Shape: (10000, 784), Test Labels Shape: (10000,)
[9 2 1 1 6 1 4 6 5 7]


In [None]:
class Dense:
    def __init__(self, input_size, output_size, lr=0.005, beta1=0.9, beta2=0.999, epsilon=1e-8, device='cpu'):
        self.device = device
        self.weights = torch.randn(input_size, output_size, device=device) * np.sqrt(2.0 / input_size)
        self.bias = torch.zeros(1, output_size, device=device)
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m_w, self.v_w = torch.zeros_like(self.weights), torch.zeros_like(self.weights)
        self.m_b, self.v_b = torch.zeros_like(self.bias), torch.zeros_like(self.bias)
        self.t = 0  # Time step

    def forward(self, X):
        self.input = X
        return torch.mm(X, self.weights) + self.bias  # Matrix multiplication (for GPU support)

    def backward(self, d_output):
        prev_weights = self.weights
        d_weights = torch.mm(self.input.T, d_output)
        d_bias = torch.sum(d_output, axis=0, keepdims=True)

        # Update Adam time step
        self.t += 1

        # Moving averages of gradients for weights
        self.m_w = self.beta1 * self.m_w + (1 - self.beta1) * d_weights
        self.v_w = self.beta2 * self.v_w + (1 - self.beta2) * (d_weights ** 2)

        # Moving averages of gradients for biases
        self.m_b = self.beta1 * self.m_b + (1 - self.beta1) * d_bias
        self.v_b = self.beta2 * self.v_b + (1 - self.beta2) * (d_bias ** 2)

        # Bias correction
        m_w_hat = self.m_w / (1 - self.beta1 ** self.t)
        v_w_hat = self.v_w / (1 - self.beta2 ** self.t)
        m_b_hat = self.m_b / (1 - self.beta1 ** self.t)
        v_b_hat = self.v_b / (1 - self.beta2 ** self.t)

        self.weights -= self.lr * m_w_hat / (torch.sqrt(v_w_hat) + self.epsilon)
        self.bias -= self.lr * m_b_hat / (torch.sqrt(v_b_hat) + self.epsilon)

        # Return the gradient for the previous layer
        return torch.mm(d_output, prev_weights.T)


class ReLU:
    def forward(self, X):
        self.input = X
        return torch.maximum(torch.tensor(0.0, device=X.device), X)

    def backward(self, d_output):
        return d_output * (self.input > 0)


In [None]:
class Dropout:
    def __init__(self, rate=0.2):
        self.rate = rate

    def forward(self, X, training=True):
        if training:
            self.mask = (torch.rand(*X.shape, device=X.device) > self.rate) / (1 - self.rate)
            return X * self.mask
        else:
            return X

    def backward(self, d_output):
        return d_output * self.mask

In [None]:
class BatchNorm:
    def __init__(self, input_dim, lr=0.005, epsilon=1e-5, momentum=0.9, device='cpu'):
        self.device = device
        self.lr = lr
        self.epsilon = epsilon
        self.momentum = momentum
        self.gamma = torch.ones((1, input_dim), device=device)
        self.beta = torch.zeros((1, input_dim), device=device)
        self.running_mean = torch.zeros((1, input_dim), device=device)
        self.running_var = torch.ones((1, input_dim), device=device)

    def forward(self, X, training=True):
        if training:
            batch_mean = torch.mean(X, axis=0, keepdims=True)
            batch_var = torch.var(X, axis=0, keepdims=True)

            self.X_centered = X - batch_mean
            self.stddev_inv = 1. / torch.sqrt(batch_var + self.epsilon)
            X_norm = self.X_centered * self.stddev_inv

            out = self.gamma * X_norm + self.beta  # to give the model flexibility to cancel out normalization
            self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * batch_mean
            self.running_var = self.momentum * self.running_var + (1 - self.momentum) * batch_var
        else:
            X_norm = (X - self.running_mean) / torch.sqrt(self.running_var + self.epsilon)
            out = self.gamma * X_norm + self.beta

        return out

    def backward(self, d_output):
        n_samples = d_output.shape[0]

        d_gamma = torch.sum(d_output * self.X_centered * self.stddev_inv, axis=0, keepdims=True)
        d_beta = torch.sum(d_output, axis=0, keepdims=True)

        d_X_norm = d_output * self.gamma
        d_var = torch.sum(d_X_norm * self.X_centered * -0.5 * self.stddev_inv**3, axis=0)
        d_mean = torch.sum(d_X_norm * -self.stddev_inv, axis=0) + d_var * torch.mean(-2. * self.X_centered, axis=0)

        d_input = d_X_norm * self.stddev_inv + d_var * 2 * self.X_centered / n_samples + d_mean / n_samples
        self.gamma -= self.lr * d_gamma
        self.beta -= self.lr * d_beta

        return d_input



In [None]:
class Softmax:
    def forward(self, x):
        x_max = torch.max(x, axis=1, keepdims=True).values
        exp_values = torch.exp(x - x_max)
        sum_exp_values = torch.sum(exp_values, axis=1, keepdims=True)
        epsilon = 1e-10
        probabilities = exp_values / (sum_exp_values + epsilon)
        return probabilities

    def backward(self, output, y):
        n_samples = y.shape[0]
        grad = output.clone()
        grad[range(n_samples), y] -= 1
        grad = grad / n_samples
        return grad

def cross_entropy_loss(predictions, labels):
    n_samples = labels.shape[0]
    logp = -torch.log(predictions[range(n_samples), labels] + 1e-15)
    loss = torch.sum(logp) / n_samples
    return loss


In [None]:
class NeuralNetwork:
    def __init__(self, input_dim, num_classes, lr=0.001, device='cpu'):
        self.input_dim = input_dim
        self.num_classes= num_classes
        self.device = device
        self.softmax = Softmax()
        self.layers = [
            Dense(input_dim, 128, lr, device=device),
            BatchNorm(128, device=device),
            ReLU(),
            Dropout(),
            Dense(128, 64, lr, device=device),
            BatchNorm(64, device=device),
            ReLU(),
            Dropout(),
            Dense(64, num_classes, lr, device=device)
        ]

    def forward(self, X, training=True):
        X = X.to(self.device)
        for layer in self.layers:
            if isinstance(layer, (BatchNorm, Dropout)):
                X = layer.forward(X, training=training)
            else:
                X = layer.forward(X)

        return self.softmax.forward(X)

    def backward(self, d_output):
        for layer in reversed(self.layers):
            d_output = layer.backward(d_output)

    #def train(self, X_train, y_train, X_val, y_val, epochs, batch_size):

    def train(self, X_train, y_train, epochs, batch_size):

        n_samples = X_train.shape[0]
        training_loss, val_loss, training_acc, val_acc, val_f1 = [], [], [], [], []

        for epoch in range(epochs):
            losses = []
            for i in tqdm(range(0, n_samples, batch_size)):
                x_batch = torch.tensor(X_train[i:i+batch_size], dtype=torch.float32).to(self.device)
                y_batch = torch.tensor(y_train[i:i+batch_size], dtype=torch.long).to(self.device)

                # Forward pass
                output = self.forward(x_batch, training=True)

                # Loss calculation
                loss = cross_entropy_loss(output, y_batch)
                losses.append(loss)

                # Backward pass
                d_output = self.softmax.backward(output, y_batch)
                self.backward(d_output)

            # Training Loss & Accuracy
            train_preds = self.predict(X_train)
            train_accuracy = accuracy_score(y_train, train_preds)
            losses = [loss.item() if isinstance(loss, torch.Tensor) else loss for loss in losses]
            training_loss.append(np.mean(losses))
            # training_loss.append(np.mean(losses.cpu().numpy()))
            training_acc.append(train_accuracy)

            # Validation Loss & Accuracy
            # val_preds = self.predict(X_val)
            # val_accuracy = accuracy_score(y_val, val_preds)
            # val_f1_score = f1_score(y_val, val_preds, average="macro")
            # val_loss_epoch = cross_entropy_loss(self.forward(torch.tensor(X_val, dtype=torch.float32).to(self.device), training=False), torch.tensor(y_val, dtype=torch.long).to(self.device))

            # val_loss.append(val_loss_epoch.item())
            # val_acc.append(val_accuracy)
            # val_f1.append(val_f1_score)

            # print(f"Epoch {epoch+1}/{epochs}, Train Loss: {training_loss[-1]:.4f}, "
            #       f"Val Loss: {val_loss[-1]:.4f}, Train Acc: {train_accuracy:.4f}, "
            #       f"Val Acc: {val_accuracy:.4f}, Val F1: {val_f1_score:.4f}")

            print(f"Epoch {epoch+1}/{epochs}, Train Loss: {training_loss[-1]:.4f}, "
                  f"Train Acc: {train_accuracy:.4f}, ")




        # return training_loss, val_loss, training_acc, val_acc, val_f1
        return training_loss, training_acc

    def predict(self, X_test):
        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
        output = self.forward(X_test_tensor, training=False)
        predictions = torch.argmax(output, axis=1).cpu().numpy()  # Move to CPU for numpy conversion
        return predictions

    def evaluate(self, validation_dataset, test_dataset):

        X_val, y_val = dataset_split(validation_dataset)
        val_preds = self.predict(X_val)
        val_accuracy = accuracy_score(y_val, val_preds)
        #val_f1_score = f1_score(y_val, val_preds, average="macro")
        # val_loss_epoch = cross_entropy_loss(self.forward(torch.tensor(X_val, dtype=torch.float32).to(self.device), training=False), torch.tensor(y_val, dtype=torch.long).to(self.device))

        # val_loss.append(val_loss_epoch.item())
        # val_acc.append(val_accuracy)
        # val_f1.append(val_f1_score)

        X_test, y_test = dataset_split(test_dataset)

        test_preds = self.predict(X_test)
        test_accuracy = accuracy_score(y_test, test_preds)

        print( f"Val Acc: {val_accuracy:.4f}, Test Acc: {test_accuracy:.4f}")




In [None]:
def split_data(X, y, num_clients):
    client_data = []
    client_labels = []
    for i in range(num_clients):
        # Assign data to each client
        start = i * len(X) // num_clients
        end = (i + 1) * len(X) // num_clients
        client_data.append(X[start:end])
        client_labels.append(y[start:end])
    return client_data, client_labels

In [None]:
client_data, client_labels = split_data(X_train, y_train, num_clients)

In [None]:
class Krum:

    def __init__(self, localUpdates, n, f=1):
        self.f = f
        self.n = n
        self.localUpdates = localUpdates
        self.distanceSet = []
        self.krum_scores = []
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def compute_Euclidian_Distances(self):

        for i in range(self.n):
            for key in self.localUpdates[i].keys():
                self.localUpdates[i][key] = self.localUpdates[i][key].to(self.device)


        stacked_updates = {key: torch.stack([self.localUpdates[i][key] for i in range(self.n)], dim=0) for key in self.localUpdates[0].keys()}


        for i in range(self.n):

            clientwise_Distance = []

            for j in range(self.n):

                distances = []

                if i != j:


                    for key in stacked_updates.keys():

                        diff = stacked_updates[key][i] - stacked_updates[key][j]
                        distances.append((diff ** 2).sum())


                    total_distance = torch.sqrt(torch.sum(torch.stack(distances)))

                    clientwise_Distance.append(total_distance)

            self.distanceSet.append(clientwise_Distance)

    def krum_aggregation(self):

        self.compute_Euclidian_Distances()

        distanceSet_tensor = torch.tensor(self.distanceSet, device=self.device)

        krum_scores = []

        for i in range(self.n):

            distances = distanceSet_tensor[i]

            sorted_distances,_ = torch.sort(distances)


            top_distances = sorted_distances[:self.n - self.f - 1]

            krum_score = top_distances.sum()

            krum_scores.append(krum_score)

        krum_scores_tensor = torch.tensor(krum_scores, device=self.device)


        best_client_index = torch.argmin(krum_scores_tensor).item()


        return best_client_index

In [None]:
class Server:

    def __init__(self ,model, validation_dataset, test_dataset, learning_rate = 0.01, aggregation_method = "FedAvg", device= "cuda"):
        self.global_model = model
        self.clientList = [] #List of all clients registered
        self.selectedClientList = [] #List of clients selected in a specific round
        self.markedClientList = [] #List of clients marked vulnerable( Not yet used )
        self.learning_rate = learning_rate
        self.device = device
        self.aggregation_method = aggregation_method
        self.validation_dataset = validation_dataset
        self.test_dataset = test_dataset

    # Register Clients

    def register(self, clients):
        self.clientList.append(clients)
        print("Registering client")


    # Select Clients in each round

    def select(self, clientMap):

        client_id, client = clientMap
        self.selectedClientList.append(clientMap)
        print("Selecting client ", client_id)

    # Reset the server in each round

    def reset(self):
        self.selectedClientList = []

    # Aggregrate updates

    def aggregate(self, localUpdates):

        aggregated_params = {}

        print("Getting aggregated by server")

        if self.aggregation_method == 'FedAvg':


            for key in localUpdates[0].keys():
                aggregated_params[key] = torch.stack([torch.tensor(update[key].float(),device=self.device) for update in localUpdates], dim=0).mean(dim=0)
            return aggregated_params

        if self.aggregation_method == 'Krum':

            krum = Krum(localUpdates, len(self.selectedClientList))
            best_client_index = krum.krum_aggregation()

            print(f"Best client for aggregation: {self.selectedClientList[best_client_index][0]}")
            return localUpdates[best_client_index]


    # Update global model with aggregrated parameters


    def update_global_model(self, localUpdates):

        aggregated_params = self.aggregate(localUpdates )

        # Layer by layer update weight to model self.global_model.
        for i, layer in enumerate(self.global_model.layers):
            if isinstance(layer, Dense):
                layer.weights = aggregated_params[f"layer_{i}_weights"]
                layer.bias = aggregated_params[f"layer_{i}_bias"]
            elif isinstance(layer, BatchNorm):
                layer.gamma = aggregated_params[f"layer_{i}_gamma"]
                layer.beta = aggregated_params[f"layer_{i}_beta"]
                layer.running_mean = aggregated_params[f"layer_{i}_running_mean"]
                layer.running_mean = aggregated_params[f"layer_{i}_running_var"]


        print("Done Aggregating")

    def evaluate(self):
      self.global_model.evaluate(self.validation_dataset, self.test_dataset)

    # Distribute updated model to clients

    def distribute_global_model(self):
        for client in self.clientList:
            client.receive_model(self.global_model)

In [None]:

# Federated Learning Client Class
class Client:
    def __init__(self, model, client_train_data, epochs, batch_size, device='cpu'):
        self.client_train_data=client_train_data
        #self.client_test_data=client_test_data
        self.model = model
        self.device = device


    def train(self):

        X_train, y_train = dataset_split(self.client_train_data)
         # print(f'Train Data Shape: {X_train.shape}, Train Labels Shape: {y_train.shape}')
            # print(f'Train Data Shape: {X_val.shape}, Train Labels Shape: {y_val.shape}')
            # print(f'Test Data Shape: {X_test.shape}, Test Labels Shape: {y_test.shape}')
            # print(y_test[:10])

        # train_loss, val_loss, train_acc, val_acc, val_f1 = self.model.train(
        #     X_train, y_train, X_val, y_val, epochs, batch_size
        # )

        # return train_loss, val_loss, train_acc, val_acc, val_f1

        #train_loss, train_acc = self.model.train( X_train, y_train, X_val, y_val, epochs, batch_size )

        train_loss, train_acc = self.model.train( X_train, y_train, epochs, batch_size )

    def get_parameters(self):
        weights = {}
        for i, layer in enumerate(self.model.layers):
            if isinstance(layer, Dense):
                weights[f"layer_{i}_weights"] = layer.weights
                weights[f"layer_{i}_bias"] = layer.bias
            elif isinstance(layer, BatchNorm):
                weights[f"layer_{i}_gamma"] = layer.gamma
                weights[f"layer_{i}_beta"] = layer.beta
                weights[f"layer_{i}_running_mean"] = layer.running_mean
                weights[f"layer_{i}_running_var"] = layer.running_var


        return weights

    def receive_model(self,global_model):
        self.model=global_model




In [None]:



# Main Federated Learning Process
def federated_learning_simulation(epochs, batch_size, learning_rates, num_clients, federated_rounds, selection_factor, train_dataset, test_dataset ):

    # X_train, X_val, y_train, y_val, X_test, y_test = format_data(train_dataset,test_dataset)


    # # Optionally, if you want to divide the test dataset as well (e.g., each client gets a portion of the test set)
    #client_test_data = torch.utils.data.random_split(test_dataset, [len(test_dataset) // num_clients] * num_clients)

    #client_train_data = torch.utils.data.random_split(train_dataset, [len(train_dataset) // num_clients] * num_clients)
    # X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    # y_train_tensor = torch.tensor(y_train, dtype=torch.int)



    # train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)

    client_train_data = torch.utils.data.random_split(train_dataset, [len(train_dataset) // num_clients] * num_clients)

    # X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    # y_val_tensor = torch.tensor(y_val, dtype=torch.int)

    # validation_dataset = torch.utils.data.TensorDataset(X_val_tensor, y_val_tensor)

    global_model = NeuralNetwork(input_dim=784, num_classes=10, lr=learning_rates[0], device=device)

    server = Server(model=global_model, learning_rate = 0.01, aggregation_method = "Krum", validation_dataset = train_dataset, test_dataset = test_dataset, device=device)

    for i in range(0,num_clients):
        client = Client(global_model, client_train_data[i], epochs, batch_size, device)
        server.register(client)


    for round in range(federated_rounds):

        server.reset()

        for i in range(0,num_clients):

            rd = random.randint(0,100)
            if rd < selection_factor*100:
                clientMap = i, server.clientList[i]
                server.select( clientMap )




        client_weights=[]

        for client_id,client in server.selectedClientList:

            print("Training started by client ", client_id )

            client.train()

            client_weights.append(client.get_parameters())

            print("Training completed by client ", client_id)

        server.update_global_model(client_weights)
        server.evaluate()
        server.distribute_global_model()



federated_learning_simulation(epochs, batch_size, learning_rates, num_clients, federated_rounds, selection_factor, train_dataset, test_dataset )

Registering client
Registering client
Registering client
Registering client
Registering client
Registering client
Registering client
Registering client
Registering client
Registering client
Selecting client  1
Selecting client  2
Selecting client  5
Selecting client  6
Selecting client  8
Training started by client  1


100%|██████████| 94/94 [00:00<00:00, 155.30it/s]


Epoch 1/5, Train Loss: 0.8246, Train Acc: 0.8208, 


100%|██████████| 94/94 [00:00<00:00, 188.11it/s]


Epoch 2/5, Train Loss: 0.5665, Train Acc: 0.8320, 


100%|██████████| 94/94 [00:00<00:00, 205.80it/s]


Epoch 3/5, Train Loss: 0.4910, Train Acc: 0.8618, 


100%|██████████| 94/94 [00:00<00:00, 185.08it/s]


Epoch 4/5, Train Loss: 0.4457, Train Acc: 0.8453, 


100%|██████████| 94/94 [00:00<00:00, 198.44it/s]


Epoch 5/5, Train Loss: 0.4198, Train Acc: 0.8723, 
Training completed by client  1
Training started by client  2


100%|██████████| 94/94 [00:00<00:00, 143.00it/s]


Epoch 1/5, Train Loss: 0.4979, Train Acc: 0.8583, 


100%|██████████| 94/94 [00:00<00:00, 142.93it/s]


Epoch 2/5, Train Loss: 0.4302, Train Acc: 0.8737, 


100%|██████████| 94/94 [00:00<00:00, 131.76it/s]


Epoch 3/5, Train Loss: 0.3891, Train Acc: 0.8847, 


100%|██████████| 94/94 [00:00<00:00, 132.80it/s]


Epoch 4/5, Train Loss: 0.3595, Train Acc: 0.8958, 


100%|██████████| 94/94 [00:00<00:00, 128.69it/s]


Epoch 5/5, Train Loss: 0.3295, Train Acc: 0.9013, 
Training completed by client  2
Training started by client  5


100%|██████████| 94/94 [00:00<00:00, 201.78it/s]


Epoch 1/5, Train Loss: 0.4607, Train Acc: 0.8773, 


100%|██████████| 94/94 [00:00<00:00, 190.95it/s]


Epoch 2/5, Train Loss: 0.4059, Train Acc: 0.8818, 


100%|██████████| 94/94 [00:00<00:00, 199.02it/s]


Epoch 3/5, Train Loss: 0.3638, Train Acc: 0.8970, 


100%|██████████| 94/94 [00:00<00:00, 190.42it/s]


Epoch 4/5, Train Loss: 0.3371, Train Acc: 0.9025, 


100%|██████████| 94/94 [00:00<00:00, 206.15it/s]


Epoch 5/5, Train Loss: 0.3142, Train Acc: 0.9102, 
Training completed by client  5
Training started by client  6


100%|██████████| 94/94 [00:00<00:00, 197.01it/s]


Epoch 1/5, Train Loss: 0.4561, Train Acc: 0.8795, 


100%|██████████| 94/94 [00:00<00:00, 197.04it/s]


Epoch 2/5, Train Loss: 0.3868, Train Acc: 0.8912, 


100%|██████████| 94/94 [00:00<00:00, 186.36it/s]


Epoch 3/5, Train Loss: 0.3354, Train Acc: 0.9010, 


100%|██████████| 94/94 [00:00<00:00, 198.73it/s]


Epoch 4/5, Train Loss: 0.3147, Train Acc: 0.9003, 


100%|██████████| 94/94 [00:00<00:00, 194.88it/s]


Epoch 5/5, Train Loss: 0.3041, Train Acc: 0.9043, 
Training completed by client  6
Training started by client  8


100%|██████████| 94/94 [00:00<00:00, 189.78it/s]


Epoch 1/5, Train Loss: 0.4245, Train Acc: 0.8785, 


100%|██████████| 94/94 [00:00<00:00, 188.92it/s]


Epoch 2/5, Train Loss: 0.3518, Train Acc: 0.8962, 


100%|██████████| 94/94 [00:00<00:00, 187.99it/s]


Epoch 3/5, Train Loss: 0.3316, Train Acc: 0.8892, 


100%|██████████| 94/94 [00:00<00:00, 181.80it/s]


Epoch 4/5, Train Loss: 0.3053, Train Acc: 0.9047, 


100%|██████████| 94/94 [00:00<00:00, 134.82it/s]


Epoch 5/5, Train Loss: 0.2806, Train Acc: 0.9127, 
Training completed by client  8
Getting aggregated by server
Best client for aggregation: 5
Done Aggregating
Val Acc: 0.1000, Test Acc: 0.1000
Selecting client  0
Selecting client  5
Selecting client  8
Selecting client  9
Training started by client  0


100%|██████████| 94/94 [00:00<00:00, 161.36it/s]


Epoch 1/5, Train Loss: 0.4500, Train Acc: 0.8770, 


100%|██████████| 94/94 [00:00<00:00, 140.09it/s]


Epoch 2/5, Train Loss: 0.3599, Train Acc: 0.8932, 


100%|██████████| 94/94 [00:00<00:00, 129.75it/s]


Epoch 3/5, Train Loss: 0.3192, Train Acc: 0.9015, 


100%|██████████| 94/94 [00:00<00:00, 115.09it/s]


Epoch 4/5, Train Loss: 0.3129, Train Acc: 0.9033, 


100%|██████████| 94/94 [00:00<00:00, 110.95it/s]


Epoch 5/5, Train Loss: 0.2947, Train Acc: 0.9173, 
Training completed by client  0
Training started by client  5


100%|██████████| 94/94 [00:00<00:00, 122.41it/s]


Epoch 1/5, Train Loss: 0.3762, Train Acc: 0.9010, 


100%|██████████| 94/94 [00:00<00:00, 108.80it/s]


Epoch 2/5, Train Loss: 0.3110, Train Acc: 0.9123, 


100%|██████████| 94/94 [00:00<00:00, 154.89it/s]


Epoch 3/5, Train Loss: 0.2821, Train Acc: 0.9195, 


100%|██████████| 94/94 [00:00<00:00, 157.77it/s]


Epoch 4/5, Train Loss: 0.2661, Train Acc: 0.9260, 


100%|██████████| 94/94 [00:00<00:00, 167.45it/s]


Epoch 5/5, Train Loss: 0.2489, Train Acc: 0.9253, 
Training completed by client  5
Training started by client  8


100%|██████████| 94/94 [00:00<00:00, 174.57it/s]


Epoch 1/5, Train Loss: 0.3417, Train Acc: 0.9017, 


100%|██████████| 94/94 [00:00<00:00, 156.74it/s]


Epoch 2/5, Train Loss: 0.2945, Train Acc: 0.9143, 


100%|██████████| 94/94 [00:00<00:00, 170.69it/s]


Epoch 3/5, Train Loss: 0.2597, Train Acc: 0.9095, 


100%|██████████| 94/94 [00:00<00:00, 157.49it/s]


Epoch 4/5, Train Loss: 0.2416, Train Acc: 0.9252, 


100%|██████████| 94/94 [00:00<00:00, 157.84it/s]


Epoch 5/5, Train Loss: 0.2207, Train Acc: 0.9145, 
Training completed by client  8
Training started by client  9


100%|██████████| 94/94 [00:00<00:00, 137.58it/s]


Epoch 1/5, Train Loss: 0.4593, Train Acc: 0.8828, 


100%|██████████| 94/94 [00:00<00:00, 97.83it/s]


Epoch 2/5, Train Loss: 0.3691, Train Acc: 0.8875, 


100%|██████████| 94/94 [00:00<00:00, 119.32it/s]


Epoch 3/5, Train Loss: 0.3435, Train Acc: 0.8972, 


100%|██████████| 94/94 [00:00<00:00, 135.17it/s]


Epoch 4/5, Train Loss: 0.3104, Train Acc: 0.9037, 


100%|██████████| 94/94 [00:00<00:00, 140.07it/s]


Epoch 5/5, Train Loss: 0.2864, Train Acc: 0.9083, 
Training completed by client  9
Getting aggregated by server
Best client for aggregation: 8
Done Aggregating
Val Acc: 0.1000, Test Acc: 0.1000
Selecting client  0
Selecting client  1
Selecting client  4
Selecting client  8
Selecting client  9
Training started by client  0


100%|██████████| 94/94 [00:00<00:00, 191.66it/s]


Epoch 1/5, Train Loss: 0.3428, Train Acc: 0.9087, 


100%|██████████| 94/94 [00:00<00:00, 189.54it/s]


Epoch 2/5, Train Loss: 0.2929, Train Acc: 0.9182, 


100%|██████████| 94/94 [00:00<00:00, 190.35it/s]


Epoch 3/5, Train Loss: 0.2693, Train Acc: 0.9268, 


100%|██████████| 94/94 [00:00<00:00, 178.46it/s]


Epoch 4/5, Train Loss: 0.2470, Train Acc: 0.9268, 


100%|██████████| 94/94 [00:00<00:00, 203.94it/s]


Epoch 5/5, Train Loss: 0.2314, Train Acc: 0.9240, 
Training completed by client  0
Training started by client  1


100%|██████████| 94/94 [00:00<00:00, 196.18it/s]


Epoch 1/5, Train Loss: 0.4399, Train Acc: 0.8965, 


100%|██████████| 94/94 [00:00<00:00, 198.60it/s]


Epoch 2/5, Train Loss: 0.3551, Train Acc: 0.9035, 


100%|██████████| 94/94 [00:00<00:00, 191.93it/s]


Epoch 3/5, Train Loss: 0.3279, Train Acc: 0.9113, 


100%|██████████| 94/94 [00:00<00:00, 199.67it/s]


Epoch 4/5, Train Loss: 0.2972, Train Acc: 0.9177, 


100%|██████████| 94/94 [00:00<00:00, 190.03it/s]


Epoch 5/5, Train Loss: 0.2839, Train Acc: 0.9247, 
Training completed by client  1
Training started by client  4


100%|██████████| 94/94 [00:00<00:00, 204.43it/s]


Epoch 1/5, Train Loss: 0.4261, Train Acc: 0.8872, 


100%|██████████| 94/94 [00:00<00:00, 194.42it/s]


Epoch 2/5, Train Loss: 0.3588, Train Acc: 0.9018, 


100%|██████████| 94/94 [00:00<00:00, 203.57it/s]


Epoch 3/5, Train Loss: 0.3247, Train Acc: 0.9053, 


100%|██████████| 94/94 [00:00<00:00, 195.32it/s]


Epoch 4/5, Train Loss: 0.2992, Train Acc: 0.9105, 


100%|██████████| 94/94 [00:00<00:00, 167.45it/s]


Epoch 5/5, Train Loss: 0.2824, Train Acc: 0.9210, 
Training completed by client  4
Training started by client  8


100%|██████████| 94/94 [00:00<00:00, 138.05it/s]


Epoch 1/5, Train Loss: 0.2994, Train Acc: 0.9215, 


100%|██████████| 94/94 [00:00<00:00, 122.61it/s]


Epoch 2/5, Train Loss: 0.2596, Train Acc: 0.9215, 


100%|██████████| 94/94 [00:00<00:00, 138.90it/s]


Epoch 3/5, Train Loss: 0.2301, Train Acc: 0.9283, 


100%|██████████| 94/94 [00:00<00:00, 113.52it/s]


Epoch 4/5, Train Loss: 0.2145, Train Acc: 0.9332, 


100%|██████████| 94/94 [00:01<00:00, 88.12it/s]


Epoch 5/5, Train Loss: 0.2011, Train Acc: 0.9388, 
Training completed by client  8
Training started by client  9


100%|██████████| 94/94 [00:01<00:00, 60.65it/s]


Epoch 1/5, Train Loss: 0.3730, Train Acc: 0.9108, 


100%|██████████| 94/94 [00:01<00:00, 70.25it/s]


Epoch 2/5, Train Loss: 0.3099, Train Acc: 0.9093, 


100%|██████████| 94/94 [00:00<00:00, 123.40it/s]


Epoch 3/5, Train Loss: 0.2828, Train Acc: 0.9200, 


100%|██████████| 94/94 [00:00<00:00, 193.86it/s]


Epoch 4/5, Train Loss: 0.2558, Train Acc: 0.9288, 


100%|██████████| 94/94 [00:00<00:00, 202.51it/s]


Epoch 5/5, Train Loss: 0.2460, Train Acc: 0.9238, 
Training completed by client  9
Getting aggregated by server
Best client for aggregation: 4
Done Aggregating
Val Acc: 0.1000, Test Acc: 0.1000


In [None]:

# best_model = None
# best_f1_score = 0

# # Run experiments
# results = {}
# for lr in learning_rates:
#     print(f"Training with learning rate: {lr}")

#     # Initialize the model
#     model = NeuralNetwork(input_dim=784, num_classes=10, lr=lr, device=device)

#     # Train the model and get training and validation metrics
#     train_loss, val_loss, train_acc, val_acc, val_f1 = model.train(
#         X_train, y_train, X_val, y_val, epochs, batch_size
#     )

#     # Store the results
#     results[lr] = (train_loss, val_loss, train_acc, val_acc, val_f1)

#     # Track the best model based on validation F1 score
#     if val_f1[-1] > best_f1_score:
#         best_f1_score = val_f1[-1]
#         print(f"Best model found with learning rate: {lr} and F1 score: {best_f1_score}")
#         best_model = model


In [None]:


# # Make predictions with the best model
# Y_pred = best_model.predict(X_test)
# accuracy = accuracy_score(y_test, Y_pred)
# precision = precision_score(y_test, Y_pred, average="weighted")
# recall = recall_score(y_test, Y_pred, average="weighted")
# f1 = f1_score(y_test, Y_pred, average="weighted")

# # Print the evaluation metrics for the best model
# print(f"Accuracy: {accuracy}")
# print(f"Precision: {precision}, Recall: {recall}, F1 Score: {f1}")

