In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 1. Custom Dataset Loader


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import time
import os
from torchvision import datasets
import pandas as pd


class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform

        # Get the list of class labels from subfolder names
        self.class_labels = [class_label for class_label in sorted(os.listdir(root_dir))
                             if os.path.isdir(os.path.join(root_dir, class_label))]
        self.class_to_idx = {class_label: idx for idx, class_label in enumerate(self.class_labels)}

        # Build a list of image paths and corresponding labels
        self.samples = []
        for class_label in self.class_labels:
            class_path = os.path.join(root_dir, class_label)
            if os.path.isdir(class_path):
              for file_name in os.listdir(class_path):
                file_path = os.path.join(class_path, file_name)
                self.samples.append((file_path, self.class_to_idx[class_label]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, label

# 2. Custom CNN Model for classification



In [None]:
# basic CNN image classification model
class Net(nn.Module):
    def __init__(self, num_classes=3):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64 * 64 * 64, 128)
        self.fc2 = nn.Linear(128, num_classes)
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# 3. Centralised Learning

In [None]:
# Creating the model
model = Net()
model.train

# Defining the loss function
criterion = nn.CrossEntropyLoss()

# Defining the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001)

transform = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
          ])

# Loading the dataset
dataset = CustomDataset("/content/drive/MyDrive/ML_Assignment_2/Centralised_Data", transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Training the model
for inputs, labels in dataloader:
  outputs = model(inputs)
  loss = criterion(outputs, labels)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

In [None]:
model.eval()
correct = 0
total = 0
dataset = CustomDataset("/content/drive/MyDrive/ML_Assignment_2/Centralised_Data", transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=False)

# Calculating the accuracy of the model
with torch.no_grad():
        for inputs, labels in dataloader:
          outputs = model(inputs)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

In [None]:
training_accuracy = correct / total
# printing the training accuracy of this epoch of federated learning
print(f'Training Accuracy: {100 * training_accuracy:.2f}%')

Training Accuracy: 53.03%


In [None]:
dataset = CustomDataset("/content/drive/MyDrive/ML_Assignment_2/Test_Data", transform=transform)
dataloader_test = DataLoader(dataset, batch_size=32, shuffle=False)

model.eval()
correct_test = 0
total_test = 0

# Calculing the accuracy of the model
with torch.no_grad():
        for inputs, labels in dataloader_test:
          outputs = model(inputs)
          _, predicted = torch.max(outputs.data, 1)
          total_test += labels.size(0)
          correct_test += (predicted == labels).sum().item()

In [None]:
testing_accuracy = correct_test / total_test
# printing the training accuracy of this epoch of federated learning
print(f'Testing Accuracy: {100 * testing_accuracy:.2f}%')

Testing Accuracy: 47.67%


# 4. Federated Learning Model

## 4.1  Function for selecting clients

In [None]:
# function to select a client based on fixed criteria
def select_client(client):
  meta_data_path = os.path.join(client, "meta.csv")
  meta_data = pd.read_csv(meta_data_path)
  if (meta_data['response'][0]>0.25 and meta_data['response'][1]>1.25 and meta_data['response'][2]==1 and meta_data['response'][3]>20):
    return True
  return False

## 4.2 Function for aggregating the model weights

In [None]:
# function to aggregate the weights of different models
def average_weights(models):
    # Get the number of models
    num_models = len(models)

    # get the state_dict of the first model
    average_state_dict = models[0].state_dict()

    # sum up the state_dicts of the remaining models
    for i in range(1, num_models):
        current_state_dict = models[i].state_dict()
        average_state_dict = {name: average_state_dict[name] + current_state_dict[name] for name in average_state_dict}

    # average the state_dicts
    average_state_dict = {name: param / num_models for name, param in average_state_dict.items()}

    return average_state_dict


## 4.3 Function for Training

In [None]:
def federated_learning(main_model, epochs, lr, list_of_client_lists):
    for epoch in range(epochs):
      root_dir_list = []
      # select the list of clients registered for the current epoch
      client_list = list_of_client_lists[epoch]

      # for each epoch select the eligible clients
      for client in client_list:
        client_path = os.path.join("/content/drive/MyDrive/ML_Assignment_2/Federated_Data", client)
        if (select_client(client_path)):
          root_dir_list.append(client_path)

      models = []

      # stimulation of training for each client using a for loop
      for root_dir in root_dir_list:
            print(root_dir)
            model_local = Net()
            model_local.load_state_dict(main_model.state_dict())
            model_local.train

            # define loss function and optimizer
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.SGD(model_local.parameters(), lr=lr)

            transform = transforms.Compose([
                transforms.Resize((256, 256)),
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ])

            dataset = CustomDataset(root_dir, transform=transform)
            dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

            for inputs, labels in dataloader:
                outputs = model_local(inputs)
                loss = criterion(outputs, labels)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            # appending the model to model list represents that the trained model has been returned by this client
            models.append(model_local)

      # averaging the weights of all the trained models received from clients in this epoch
      main_model.load_state_dict(average_weights(models))

      main_model.eval()
      correct = 0
      total = 0

      transform = transforms.Compose([
                transforms.Resize((256, 256)),
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ])

      dataset = CustomDataset("/content/drive/MyDrive/ML_Assignment_2/Centralised_Data", transform=transform)
      dataloader_train = DataLoader(dataset, batch_size=32, shuffle=False)

      with torch.no_grad():
        for inputs, labels in dataloader_train:
          outputs = main_model(inputs)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

      training_accuracy = correct / total
      # printing the training accuracy of this epoch of federated learning
      print(f'Training Accuracy: {100 * training_accuracy:.2f}%')

      print(f"Epoch {epoch+1}/{epochs} completed")

## 4.4 Train the model using Federated learning

In [None]:

# Store the location of clients for each round
client_list_epoch1 = ["Epoch_1/Client_1", "Epoch_1/Client_2", "Epoch_1/Client_3", "Epoch_1/Client_4", "Epoch_1/Client_5", "Epoch_1/Client_6", "Epoch_1/Client_7", "Epoch_1/Client_8", "Epoch_1/Client_9", "Epoch_1/Client_10"]
client_list_epoch2 = ["Epoch_2/Client_1", "Epoch_2/Client_2", "Epoch_2/Client_3", "Epoch_2/Client_4", "Epoch_2/Client_5", "Epoch_2/Client_6", "Epoch_2/Client_7", "Epoch_2/Client_8", "Epoch_2/Client_9", "Epoch_2/Client_10"]
client_list_epoch3 = ["Epoch_3/Client_1", "Epoch_3/Client_2", "Epoch_3/Client_3", "Epoch_3/Client_4", "Epoch_3/Client_5", "Epoch_3/Client_6", "Epoch_3/Client_7", "Epoch_3/Client_8", "Epoch_3/Client_9", "Epoch_3/Client_10"]
client_list_epoch4 = ["Epoch_4/Client_1", "Epoch_4/Client_2", "Epoch_4/Client_3", "Epoch_4/Client_4", "Epoch_4/Client_5", "Epoch_4/Client_6", "Epoch_4/Client_7", "Epoch_4/Client_8", "Epoch_4/Client_9", "Epoch_4/Client_10"]

list_of_client_lists = [client_list_epoch1, client_list_epoch2, client_list_epoch3, client_list_epoch4]

# Create the model
main_model = Net()

# Train the model
federated_learning(main_model, epochs=4, lr=0.001, list_of_client_lists=list_of_client_lists)

/content/drive/MyDrive/ML_Assignment_2/Federated_Data/Epoch_1/Client_1
/content/drive/MyDrive/ML_Assignment_2/Federated_Data/Epoch_1/Client_2
/content/drive/MyDrive/ML_Assignment_2/Federated_Data/Epoch_1/Client_3
/content/drive/MyDrive/ML_Assignment_2/Federated_Data/Epoch_1/Client_4
/content/drive/MyDrive/ML_Assignment_2/Federated_Data/Epoch_1/Client_5
/content/drive/MyDrive/ML_Assignment_2/Federated_Data/Epoch_1/Client_6
/content/drive/MyDrive/ML_Assignment_2/Federated_Data/Epoch_1/Client_7
/content/drive/MyDrive/ML_Assignment_2/Federated_Data/Epoch_1/Client_8
/content/drive/MyDrive/ML_Assignment_2/Federated_Data/Epoch_1/Client_9
/content/drive/MyDrive/ML_Assignment_2/Federated_Data/Epoch_1/Client_10
Training Accuracy: 34.83%
Epoch 1/4 completed
/content/drive/MyDrive/ML_Assignment_2/Federated_Data/Epoch_2/Client_1
/content/drive/MyDrive/ML_Assignment_2/Federated_Data/Epoch_2/Client_2
/content/drive/MyDrive/ML_Assignment_2/Federated_Data/Epoch_2/Client_4
/content/drive/MyDrive/ML_Assi

In [None]:
dataset = CustomDataset("/content/drive/MyDrive/ML_Assignment_2/Test_Data", transform=transform)
dataloader_test = DataLoader(dataset, batch_size=32, shuffle=False)

main_model.eval()
correct_test = 0
total_test = 0

# Calculing the accuracy of the model
with torch.no_grad():
        for inputs, labels in dataloader_test:
          outputs = main_model(inputs)
          _, predicted = torch.max(outputs.data, 1)
          total_test += labels.size(0)
          correct_test += (predicted == labels).sum().item()

In [None]:
testing_accuracy = correct_test / total_test
# printing the training accuracy of this epoch of federated learning
print(f'Testing Accuracy: {100 * testing_accuracy:.2f}%')

Testing Accuracy: 41.33%
