In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from datasets import load_dataset, concatenate_datasets, Dataset
import copy
from tqdm import tqdm  # For tracking training progress


In [3]:
import torch
import numpy as np
import random

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [4]:
# Example list of labels
label_names = ['cat', 'dog', 'bird', 'fish', 'car', 'aircraft', 'flower', 'truck', 'parachute', 'mushroom']

# Create a mapping from label names to indices
label_to_index = {label: idx for idx, label in enumerate(label_names)}


In [5]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3), # Convert grayscale to 3 channels (RGB)
    transforms.Resize((256, 256)),  # Resize all images to 256x256
    transforms.ToTensor(),          # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

from PIL import Image

def apply_transform(example):
    # Check if 'example['image']' is a list (batch of images)
    transformed_images = [transform(img) for img in example['image']]
    labels = [label_to_index[label] for label in example['label']]
    # Return the transformed images and the unchanged labels
    return {
        'image': transformed_images,  # Stack to create a single tensor
        'label': torch.tensor(labels)  # Convert labels to tensor
    }

# Apply the transformations to the dataset (train + test split for each 


In [6]:
def prepare_custom_dataloader(dataset, batch_size=16):
    # Apply the transformation to each sample in the dataset
    dataset = dataset.with_transform(apply_transform)
    
    # Create dataloaders
    train_loader = DataLoader(dataset['train'], batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(dataset['test'], batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

In [7]:
dataset_1 = load_dataset("AnnantJain/client1_federated_dataset_modified")
dataset_2 = load_dataset("AnnantJain/client2_federated_dataset_modified")
dataset_3 = load_dataset("AnnantJain/client3_federated_dataset_modified")
dataset_4 = load_dataset("AnnantJain/client4_federated_dataset_modified")
dataset_5 = load_dataset("AnnantJain/client5_federated_dataset_modified")

In [8]:
dataset_1

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 1530
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 270
    })
})

In [9]:
train_loader_1, test_loader_1 = prepare_custom_dataloader(dataset_1)
train_loader_2, test_loader_2 = prepare_custom_dataloader(dataset_2)
train_loader_3, test_loader_3 = prepare_custom_dataloader(dataset_3)
train_loader_4, test_loader_4 = prepare_custom_dataloader(dataset_4)
train_loader_5, test_loader_5 = prepare_custom_dataloader(dataset_5)

In [10]:
# Inspect the output of the DataLoader
batch = next(iter(train_loader_1))
print(type(batch))
print(len(batch))
print(batch)  # Print to inspect the content


<class 'dict'>
2
{'image': tensor([[[[ 1.5810,  1.4783,  1.3242,  ..., -0.8507, -0.7822, -0.7308],
          [ 1.5639,  1.4612,  1.3070,  ..., -0.8678, -0.8335, -0.7993],
          [ 1.5468,  1.4440,  1.2899,  ..., -0.8507, -0.8678, -0.8678],
          ...,
          [-0.2171, -0.2171, -0.2171,  ..., -0.5082, -0.4397, -0.4054],
          [-0.2171, -0.2171, -0.2171,  ..., -0.6623, -0.5596, -0.4911],
          [-0.2171, -0.2171, -0.2171,  ..., -0.7822, -0.6623, -0.5767]],

         [[ 1.7458,  1.6408,  1.4832,  ..., -0.7402, -0.6702, -0.6176],
          [ 1.7283,  1.6232,  1.4657,  ..., -0.7577, -0.7227, -0.6877],
          [ 1.7108,  1.6057,  1.4482,  ..., -0.7402, -0.7577, -0.7577],
          ...,
          [-0.0924, -0.0924, -0.0924,  ..., -0.3901, -0.3200, -0.2850],
          [-0.0924, -0.0924, -0.0924,  ..., -0.5476, -0.4426, -0.3725],
          [-0.0924, -0.0924, -0.0924,  ..., -0.6702, -0.5476, -0.4601]],

         [[ 1.9603,  1.8557,  1.6988,  ..., -0.5147, -0.4450, -0.3927],
   

In [11]:
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        # Define layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(32 * 64 * 64, 128)  # Adjust based on output size from conv layers
        self.fc2 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(nn.ReLU()(self.conv1(x)))  # Conv Layer 1
        x = self.pool((nn.ReLU()(self.conv2(x))))  # Conv Layer 2
        x = x.view(-1, 32 * 64 * 64)  # Flatten for fully connected layer
        x = nn.ReLU()(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x) 
        return x

In [12]:
import copy

# Define distillation loss
def distillation_loss(student_outputs, teacher_outputs, labels, alpha=0.5, T=2):
    soft_loss = nn.KLDivLoss(reduction='batchmean')(nn.functional.log_softmax(student_outputs / T, dim=1),
                               nn.functional.softmax(teacher_outputs / T, dim=1)) * (alpha * T * T)
    hard_loss = nn.CrossEntropyLoss()(student_outputs, labels) * (1. - alpha)
    return soft_loss + hard_loss

# Modify train_local to include the distillation process
def train_local(model, train_loader, criterion, optimizer, global_model, alpha=0.5, T=2, epochs=2):
    model.train()
    global_model.eval()  # Ensure the global model is in evaluation mode
    
    for epoch in range(epochs):
        for batch in train_loader:
            images = batch['image']
            labels = batch['label']
            optimizer.zero_grad()

            # Get outputs from the local model (student) and the global model (teacher)
            student_outputs = model(images)
            with torch.no_grad():  # No gradients needed for the teacher model
                teacher_outputs = global_model(images)
            
            # Compute distillation loss
            loss = distillation_loss(student_outputs, teacher_outputs, labels, alpha=alpha, T=T)
            loss.backward()
            optimizer.step()

def evaluate(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for batch in test_loader:
            images = batch['image']  # This should be a tensor
            labels = batch['label'] 
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

# Function to average the weights of global layers (FedAvg)
def average_global_weights(global_model, client_models):
    global_state_dict = global_model.state_dict()
    for key in global_state_dict.keys():
        # Average the global layers (shared part)
        global_state_dict[key] = torch.mean(
            torch.stack([client_models[i].state_dict()[key] for i in range(len(client_models))]), dim=0
        )
    global_model.load_state_dict(global_state_dict)



def average_global_weights1(global_model, client_models, client_weights):
    global_state_dict = global_model.state_dict()
    for key in global_state_dict.keys():
        weighted_sum = torch.zeros_like(global_state_dict[key])
        total_weight = 0.0

        # Weighted sum of the model weights from clients based on their performance
        for i, client_model in enumerate(client_models):
            client_weight = client_weights[i]
            weighted_sum += client_weight * client_model.state_dict()[key]
            total_weight += client_weight

        global_state_dict[key] = weighted_sum / total_weight

    global_model.load_state_dict(global_state_dict)


# Average pruned weights across all clients for selective layers
def selective_average_global_weights(global_model, client_models, client_weights):
    global_state_dict = global_model.state_dict()
    for key in global_state_dict.keys():
        weighted_sum = torch.zeros_like(global_state_dict[key])
        total_weight = 0.0
        for i, client_model in enumerate(client_models):
            client_weight = client_weights[i]
            weighted_sum += client_weight * client_model.state_dict().get(key, 0)
            total_weight += client_weight
        global_state_dict[key] = weighted_sum / total_weight
    global_model.load_state_dict(global_state_dict, strict=False)


In [13]:
# 2. Prune parameters by retaining only the top fraction of important weights
def prune_model_weights(model, prune_fraction=0.3):
    pruned_state_dict = {}
    for name, param in model.state_dict().items():
        threshold = torch.quantile(param.abs(), prune_fraction)
        pruned_param = param * (param.abs() > threshold)  # Zero out less important weights
        pruned_state_dict[name] = pruned_param
    return pruned_state_dict

def distill_logits(logits, targets, temperature=2.0):
    return nn.functional.softmax(logits / temperature, dim=1)

# 1. Freeze selective layers after a few rounds
def freeze_layers(model, layers_to_freeze=['conv1', 'conv2']):
    for name, param in model.named_parameters():
        if any(layer in name for layer in layers_to_freeze):
            param.requires_grad = False

def get_adaptive_lr(base_lr, round_num, decay_factor=0.95):
    return base_lr * (decay_factor ** round_num)


In [14]:
clients = [
    (train_loader_1, test_loader_1),
    (train_loader_2, test_loader_2),
    (train_loader_3, test_loader_3),
    (train_loader_4, test_loader_4),
    (train_loader_5, test_loader_5)
]

# Initialize the global model
global_model = CNN(num_classes=10)

In [15]:
def federated_learning(clients, global_model, num_rounds=5, prune_fraction=0.3, freeze_after_round=3):
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    
    for round in range(num_rounds):
        print(f"Round {round+1}/{num_rounds}")

        # Step 1: Train personalized models locally
        local_models = []
        client_weights = []
        for i, (train_loader, test_loader) in enumerate(clients):
            print(f"Client {i+1} local training...")
            local_model = copy.deepcopy(global_model)  # Clone the global model
            
            #optimizer = optim.Adam(local_model.parameters(), lr=get_adaptive_lr(0.001, round), weight_decay=1e-4)
            optimizer = optim.Adam(local_model.parameters(), lr=0.001)

            # Train on each client's data
            train_local(local_model, train_loader, criterion, optimizer, global_model)
            local_models.append(local_model)

            prune_model_weights(local_model, prune_fraction=prune_fraction)

            acc = evaluate(local_model, test_loader)
            client_weights.append(acc)

        # Step 2: Average global layers across clients (FedAvg)
        print("Averaging global model...")
        selective_average_global_weights(global_model, local_models, client_weights)

        # Step 3: Evaluate each personalized model after federated update
        for i, (train_loader, test_loader) in enumerate(clients):
            acc = evaluate(local_models[i], test_loader)
            print(f"Client {i+1} Accuracy: {acc * 100:.2f}%")

# Run federated learning with client-specific noise adaptation
federated_learning(clients, global_model)

Round 1/5
Client 1 local training...
Client 2 local training...
Client 3 local training...
Client 4 local training...
Client 5 local training...
Averaging global model...
Client 1 Accuracy: 81.48%
Client 2 Accuracy: 71.78%
Client 3 Accuracy: 47.22%
Client 4 Accuracy: 57.67%
Client 5 Accuracy: 61.25%
Round 2/5
Client 1 local training...
Client 2 local training...
Client 3 local training...
Client 4 local training...
Client 5 local training...
Averaging global model...
Client 1 Accuracy: 80.74%
Client 2 Accuracy: 79.56%
Client 3 Accuracy: 50.56%
Client 4 Accuracy: 56.00%
Client 5 Accuracy: 61.88%
Round 3/5
Client 1 local training...
Client 2 local training...
Client 3 local training...
Client 4 local training...
Client 5 local training...
Averaging global model...
Client 1 Accuracy: 84.07%
Client 2 Accuracy: 77.33%
Client 3 Accuracy: 63.06%
Client 4 Accuracy: 57.67%
Client 5 Accuracy: 67.29%
Round 4/5
Client 1 local training...
Client 2 local training...
Client 3 local training...
Client

In [None]:
from sklearn.cluster import KMeans
import numpy as np

def cluster_clients(client_weights, num_clusters=2):
    # Use KMeans to cluster clients based on their weights
    kmeans = KMeans(n_clusters=num_clusters)
    clusters = kmeans.fit_predict(np.array(client_weights).reshape(-1, 1))
    return clusters

# Cluster the clients after each round based on accuracy
clusters = cluster_clients(client_weights, num_clusters=2)
clustered_clients = {i: [] for i in range(2)}

# Group clients into clusters
for i, cluster_id in enumerate(clusters):
    clustered_clients[cluster_id].append((train_loader, test_loader))

# Perform federated learning for each cluster separately
for cluster_id, cluster_clients in clustered_clients.items():
    print(f"Training for cluster {cluster_id}...")
    federated_learning(cluster_clients, global_model, num_rounds=num_rounds)
