# Seminar 5 - Federated Learning

In [3]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn

 ## Data preparation

- We load the datasets for **10 clients**.
- Each client has:
    - `client_{i}_features.csv`: Wi-Fi CSI measurements (270 features per sample).
    - `client_{i}_labels.csv`: Corresponding pose labels (integers from 1 to 12).
- **Test Data**:
  - A separate test set:
    - `test_features.csv` and `test_labels.csv` containing **500 samples**.

The CSI data represent Wi-Fi signal reflections when subjects perform different poses, collected for human pose estimation tasks.

In [4]:
data_dir = 'dataset_Seminar5'
num_clients = 10
client_data = {}

# Load each client’s training data
for i in range(1, num_clients + 1):
    X_path = os.path.join(data_dir, f'client_datasets/client_{i}_features.csv')
    y_path = os.path.join(data_dir, f'client_datasets/client_{i}_labels.csv')
    
    X = pd.read_csv(X_path, header=None).values  # shape: (num_samples, 270)
    y = pd.read_csv(y_path, header=None).values.flatten()  # shape: (num_samples,)
    
    client_data[i] = {'X': X, 'y': y}

# Load test data
X_test = pd.read_csv(os.path.join(data_dir, 'test_features.csv'), header=None).values
y_test = pd.read_csv(os.path.join(data_dir, 'test_labels.csv'), header=None).values.flatten()

print(X.shape)  # shape: (num_samples, 270)
print(y.shape)  # shape: (num_samples,)
print(X_test.shape)  # shape: (num_samples, 270)
print(y_test.shape)  # shape: (num_samples,)

(64, 270)
(64,)
(500, 270)
(500,)


We will adjust test labels from 0-11 instead of 1-12

In [5]:
for client_id in client_data:
    client_data[client_id]['y'] -= 1  # Now ranges 0-11

y_test -= 1  # Also adjust test labels

We tried training with different models to see which one fits the most:
### `PoseClassifierFC` (Fully Connected Network)
- **Type**: Simple Feedforward Neural Network (MLP).
- **Architecture**:
  - Input: 270 features (flattened CSI data).
  - Two hidden layers:
    - `Linear(270 → 128)`, `ReLU`, `Dropout(0.3)`.
    - `Linear(128 → 64)`, `ReLU`.
  - Output layer: `Linear(64 → 12 classes)`.
- **Use Case**: 
  - Lightweight.
  - Good for quick testing or clients with very limited compute power.

---

### `ResSim` (Simplified Residual CNN)
- **Type**: Lightweight ResNet-style Convolutional Neural Network.
- **Architecture**:
  - Input reshaped to (3 channels × 30 × 3 matrix).
  - **Two residual blocks**:
    - `Conv → ReLU → Conv` + Skip Connection → `ReLU → MaxPool`.
  - Flatten the output.
  - Fully Connected layer for classification.
- **Use Case**:
  - Leverages **residual connections** for better training stability.
  - Suitable for capturing spatial patterns with limited depth.

---

### `PoseClassifierCNN` (Standard CNN)
- **Type**: Regular Convolutional Neural Network.
- **Architecture**:
  - Input reshaped to (3 × 30 × 3).
  - Two convolutional blocks:
    - `Conv → ReLU → BatchNorm → MaxPool`.
  - Flatten the output.
  - Fully Connected layer → Dropout → Final classification layer.
- **Use Case**:
  - A classic CNN approach.
  - Good balance between expressiveness and simplicity.
  - Well-suited for spatial data like Wi-Fi CSI matrices.

In [None]:
class PoseClassifierFC(nn.Module):
    def __init__(self, num_classes=12):
        super(PoseClassifierFC, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(270, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )
    
    def forward(self, x):
        return self.net(x)

In [6]:
class ResSim(nn.Module):
    """
    A simplified ResNet-style network with residual connections and sequential blocks.
    
    Adapted for CSI input (flattened to 3x30x3).

    Architecture:
        - Two residual blocks: Conv → ReLU → Conv + skip
        - Each followed by MaxPool
        - Fully connected classifier

    Args:
        num_classes (int): Number of output classes.
    """
    def __init__(self, num_classes=12): # 12 different pose classes
        super(ResSim, self).__init__()

        # Block 1
        self.block1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1)
        )
        self.shortcut1 = nn.Conv2d(3, 64, kernel_size=1)  # aligns input channels

        # Block 2
        self.block2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1)
        )
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 1)
        self.fc = nn.Linear(64 * 7 * 4, num_classes)

    def forward(self, x):
        x = x.view(-1, 3, 30, 3)  # reshape input vector (270,) → (3, 30, 3)

        # First residual connection
        residual = self.shortcut1(x)
        x = self.block1(x)
        x = self.relu(x + residual)
        x = self.pool(x) # (64, 15, 3)

        # Second residual connection (no need for shortcut: same shape)
        residual = x
        x = self.block2(x)
        x = self.relu(x + residual)
        x = self.pool(x) # (64, 7, 3)

        x = x.view(x.size(0), -1) # flatten
        return self.fc(x)

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class PoseClassifierCNN(nn.Module):
    def __init__(self, num_classes=12):
        super(PoseClassifierCNN, self).__init__()
        
        self.cnn = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1),  # (3, 30, 3) → (16, 30, 3)
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(kernel_size=(2, 1)),  # (16, 15, 3)
            
            nn.Conv2d(16, 32, kernel_size=3, padding=1),  # (32, 15, 3)
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=(2, 1))  # (32, 7, 3)
        )
        
        self.fc = nn.Sequential(
            nn.Flatten(),                    # (32 * 7 * 3)
            nn.Linear(32 * 7 * 3, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)     # output logits for 12 classes
        )

    def forward(self, x):
        # Reshape input from (batch_size, 270) → (batch_size, 3, 30, 3)
        x = x.view(-1, 3, 30, 3)  # match channel-first format
        x = self.cnn(x)
        x = self.fc(x)
        return x

### Model Evaluation

`eval_model` Function:

Evaluates a model’s accuracy:

- **Set eval mode**: Disables layers like dropout.
- **Convert** inputs to tensors.
- **Predict** class labels (`argmax` over logits).
- **Compute accuracy**: Percentage of correct predictions.

In [None]:
def eval_model(model, X, y):
    model.eval()  # Set model to evaluation mode (important: disables dropout, batchnorm updates)
    
    # Convert input features and labels to PyTorch tensors
    X_tensor = torch.tensor(X, dtype=torch.float32)  # Features: float32
    y_tensor = torch.tensor(y, dtype=torch.long)     # Labels: int64 (long)
    
    with torch.no_grad():  # Disable gradient computation (saves memory and speeds up evaluation)
        logits = model(X_tensor)     # Forward pass: compute raw scores (logits)
        preds = torch.argmax(logits, dim=1)  # Get class with highest score (predicted label)
        accuracy = ((preds == y_tensor).float().mean().item()) * 100  # Compute accuracy as percentage
    
    return accuracy  # Return accuracy %

`eval_loss`
- **Evaluates loss** for a model on given data.
- **No gradient** computation (`torch.no_grad()`).
- Uses the provided **loss criterion** (e.g., CrossEntropy).

In [None]:
def eval_loss(model, X, y, criterion):
    model.eval()  # Set model to evaluation mode
    X_tensor = torch.tensor(X, dtype=torch.float32)  # Features tensor
    y_tensor = torch.tensor(y, dtype=torch.long)     # Labels tensor
    with torch.no_grad():  # No gradient computation
        logits = model(X_tensor)      # Forward pass
        loss = criterion(logits, y_tensor)  # Compute loss
    return loss.item()  # Return loss value

`weighted_train_loss`
- **Purpose**: Compute the weighted average loss across selected clients.
- For each client:
  - Load global model weights.
  - Evaluate local loss.
- **Weight** each client's loss by its number of samples.

In [21]:
def weighted_train_loss(model_class, global_state, selected_clients, client_data, criterion):
    train_losses = []
    weights = []
    
    for k in selected_clients:
        model = model_class(num_classes=12)          # Initialize model
        model.load_state_dict(global_state)          # Load global model state
        loss = eval_loss(model, client_data[k]['X'], client_data[k]['y'], criterion)  # Client loss
        train_losses.append(loss)
        weights.append(len(client_data[k]['X']))     # Weight: number of samples
    
    alpha = [w / sum(weights) for w in weights]  # Compute sample ratio per client
    weighted_loss = sum(a * l for a, l in zip(alpha, train_losses))  # Weighted average loss
    
    return weighted_loss

## Training

`initialize_global_model`
- Creates a fresh model and saves its initial weights.

`select_clients`
- Randomly picks a subset of clients for the current round.

`local_train`
- Each selected client:
  - Loads the global model.
  - Trains locally using its private data.
  - Returns updated weights and number of samples.

`aggregate_models`
- Aggregates client models using **FedAvg**:
  - Weighted average of parameters based on client data sizes.

`federated_learning`
- Orchestrates the full FL process:
  - For each round:
    - Select clients.
    - Perform local training.
    - Aggregate updates.
  - Every 5 rounds:
    - Report test accuracy and weighted train loss.

In [20]:
import random
from copy import deepcopy

def initialize_global_model(model_class, num_classes=12):
    """Initialize the global model and save its initial state."""
    model = model_class(num_classes=num_classes)
    return model, deepcopy(model.state_dict())

def select_clients(client_ids, num_clients_per_round):
    """Randomly select a subset of clients."""
    return random.sample(client_ids, num_clients_per_round)

def local_train(model_class, global_state, client_dataset, local_epochs, batch_size, lr):
    """Train local model on client's data."""
    model = model_class(num_classes=12)
    model.load_state_dict(global_state)
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    # Prepare DataLoader
    X_local = torch.tensor(client_dataset['X'], dtype=torch.float32)
    y_local = torch.tensor(client_dataset['y'], dtype=torch.long)
    dataset = torch.utils.data.TensorDataset(X_local, y_local)
    loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    # Local training loop
    for _ in range(local_epochs):
        for xb, yb in loader:
            optimizer.zero_grad()
            logits = model(xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()
    
    return deepcopy(model.state_dict()), len(X_local)

def aggregate_models(local_states, local_sizes):
    """Aggregate local models using FedAvg."""
    total_samples = sum(local_sizes)
    new_global_state = deepcopy(local_states[0])
    
    for key in new_global_state:
        new_global_state[key] = sum(
            (local_states[i][key] * (local_sizes[i] / total_samples) for i in range(len(local_states)))
        )
    
    return new_global_state

def federated_learning(model_class, client_data, num_rounds, clients_per_round, local_epochs, batch_size, lr):
    """Main Federated Learning orchestration loop."""
    client_ids = list(client_data.keys())
    global_model, global_state = initialize_global_model(model_class)
    
    for round_idx in range(num_rounds):
        selected = select_clients(client_ids, clients_per_round)
        local_states, local_sizes = [], []
        
        # Local training on selected clients
        for k in selected:
            state, size = local_train(model_class, global_state, client_data[k], local_epochs, batch_size, lr)
            local_states.append(state)
            local_sizes.append(size)
        
        # Update global model
        global_state = aggregate_models(local_states, local_sizes)
        global_model.load_state_dict(global_state)
        
        print(f"Round {round_idx+1}/{num_rounds} complete.")
        
        if (round_idx + 1) % 5 == 0:
            train_accuracy = eval_model(global_model, X_test, y_test)
            print(f"Round {round_idx+1}: Test Accuracy = {train_accuracy:.4f} %")
            loss = weighted_train_loss(model_class, global_state, selected, client_data, nn.CrossEntropyLoss())
            print(f"Round {round_idx+1}: Weighted Train Loss = {loss:.4f}")
    
    return global_model

Defining the parameters:

In [16]:
# Federated Learning Parameters
NUM_CLIENTS = 10
CLIENTS_PER_ROUND = 5
FL_ROUNDS = 30

# Local training parameters
LOCAL_EPOCHS = 5
BATCH_SIZE = 32
LEARNING_RATE = 0.001

Initialize the model and train it. The evaluate the model on the test set

In [None]:
# Train the model using Federated Learning
global_model = federated_learning(
    model_class=PoseClassifierCNN,
    client_data=client_data,
    num_rounds=FL_ROUNDS,
    clients_per_round=CLIENTS_PER_ROUND,
    local_epochs=LOCAL_EPOCHS,
    batch_size=BATCH_SIZE,
    lr=LEARNING_RATE
)

# Evaluate the trained global model on the test set
accuracy = eval_model(global_model, X_test, y_test)
print(f"Final Test Accuracy: {accuracy:.4f}")

Round 1/100 complete.
Round 2/100 complete.
Round 3/100 complete.
Round 4/100 complete.
Round 5/100 complete.
Round 5: Test Accuracy = 41.8000 %
Round 5: Weighted Train Loss = 0.7563
Round 6/100 complete.
Round 7/100 complete.
Round 8/100 complete.
Round 9/100 complete.
Round 10/100 complete.
Round 10: Test Accuracy = 39.8000 %
Round 10: Weighted Train Loss = 1.0997
Round 11/100 complete.
Round 12/100 complete.
Round 13/100 complete.
Round 14/100 complete.
Round 15/100 complete.
Round 15: Test Accuracy = 46.8000 %
Round 15: Weighted Train Loss = 0.6431
Round 16/100 complete.
Round 17/100 complete.
Round 18/100 complete.
Round 19/100 complete.
Round 20/100 complete.
Round 20: Test Accuracy = 49.0000 %
Round 20: Weighted Train Loss = 0.5877
Round 21/100 complete.
Round 22/100 complete.
Round 23/100 complete.
Round 24/100 complete.
Round 25/100 complete.
Round 25: Test Accuracy = 52.8000 %
Round 25: Weighted Train Loss = 0.5408
Round 26/100 complete.
Round 27/100 complete.
Round 28/100 co

In [23]:
# Train the model using Federated Learning
global_model = federated_learning(
    model_class=ResSim(),
    client_data=client_data,
    num_rounds=FL_ROUNDS,
    clients_per_round=CLIENTS_PER_ROUND,
    local_epochs=LOCAL_EPOCHS,
    batch_size=BATCH_SIZE,
    lr=LEARNING_RATE
)

# Evaluate the trained global model on the test set
accuracy = eval_model(global_model, X_test, y_test)
print(f"Final Test Accuracy: {accuracy:.4f}")

TypeError: ResSim.forward() got an unexpected keyword argument 'num_classes'

In [None]:
# Train the model using Federated Learning
global_model = federated_learning(
    model_class=PoseClassifierFC,
    client_data=client_data,
    num_rounds=FL_ROUNDS,
    clients_per_round=CLIENTS_PER_ROUND,
    local_epochs=LOCAL_EPOCHS,
    batch_size=BATCH_SIZE,
    lr=LEARNING_RATE
)

# Evaluate the trained global model on the test set
accuracy = eval_model(global_model, X_test, y_test)
print(f"Final Test Accuracy: {accuracy:.4f}")

The model which reached the best value accuracy is the CNN architecture, that reached a test accuracy 0f 58% and then dropped down due to overfitting

In [17]:
import random
from copy import deepcopy

# 1. Initialize a global ML model, ω(t = 0)
global_model = PoseClassifierCNN(num_classes=12)
global_model_state = deepcopy(global_model.state_dict())

# Helper: get number of samples for each client
client_num_samples = {k: len(client_data[k]['X']) for k in client_data}

# Federated Learning Loop
for round_idx in range(100):
    # 2. Select a subset of clients S ⊆ K
    selected_clients = random.sample(list(client_data.keys()), CLIENTS_PER_ROUND)
    
    local_states = []
    local_sizes = []
    
    # 3. Send the global model to the clients, retrain locally
    for k in selected_clients:
        local_model = PoseClassifierCNN(num_classes=12)
        local_model.load_state_dict(global_model_state)
        local_model.train()
        
        optimizer = torch.optim.Adam(local_model.parameters(), lr=LEARNING_RATE)
        criterion = nn.CrossEntropyLoss()
        
        X_local = torch.tensor(client_data[k]['X'], dtype=torch.float32)
        y_local = torch.tensor(client_data[k]['y'], dtype=torch.long)
        
        dataset = torch.utils.data.TensorDataset(X_local, y_local)
        loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
        
        for epoch in range(LOCAL_EPOCHS):
            for xb, yb in loader:
                optimizer.zero_grad()
                logits = local_model(xb)
                loss = criterion(logits, yb)
                loss.backward()
                optimizer.step()
        
        # 4. Retrieve the individual models ωk(t)
        local_states.append(deepcopy(local_model.state_dict()))
        local_sizes.append(len(X_local))
    
    # 5. Aggregate the individual contributions (FedAvg)
    total_samples = sum(local_sizes)
    new_global_state = deepcopy(global_model_state)
    for key in new_global_state:
        new_global_state[key] = sum(
            (local_states[i][key] * (local_sizes[i] / total_samples) for i in range(len(local_states)))
        )
    global_model_state = new_global_state
    global_model.load_state_dict(global_model_state)
    
    print(f"Round {round_idx+1}/{FL_ROUNDS} complete.")

# The trained global_model now contains the aggregated weights

Round 1/30 complete.
Round 2/30 complete.
Round 3/30 complete.
Round 4/30 complete.
Round 5/30 complete.
Round 6/30 complete.
Round 7/30 complete.
Round 8/30 complete.
Round 9/30 complete.
Round 10/30 complete.
Round 11/30 complete.
Round 12/30 complete.
Round 13/30 complete.
Round 14/30 complete.
Round 15/30 complete.
Round 16/30 complete.
Round 17/30 complete.
Round 18/30 complete.
Round 19/30 complete.
Round 20/30 complete.
Round 21/30 complete.
Round 22/30 complete.
Round 23/30 complete.
Round 24/30 complete.
Round 25/30 complete.
Round 26/30 complete.
Round 27/30 complete.
Round 28/30 complete.
Round 29/30 complete.
Round 30/30 complete.
Round 31/30 complete.
Round 32/30 complete.
Round 33/30 complete.
Round 34/30 complete.
Round 35/30 complete.
Round 36/30 complete.
Round 37/30 complete.
Round 38/30 complete.
Round 39/30 complete.
Round 40/30 complete.
Round 41/30 complete.
Round 42/30 complete.
Round 43/30 complete.
Round 44/30 complete.
Round 45/30 complete.
Round 46/30 complet

In [18]:
# Evaluate the global_model on the test set
global_model.eval()
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

with torch.no_grad():
    logits = global_model(X_test_tensor)
    preds = torch.argmax(logits, dim=1)
    accuracy = (preds == y_test_tensor).float().mean().item()

print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.5860
