In [1]:
!pip install -q transformers datasets


In [2]:
import torch
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding
from datasets import load_dataset

import numpy as np
import random

# Reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [3]:
# Load IMDB dataset: labels are 0 (neg) / 1 (pos)
dataset = load_dataset("imdb")

print(dataset)
print("Train size:", len(dataset["train"]))
print("Test size:", len(dataset["test"]))

# OPTIONAL: use a smaller subset for speed (e.g., 6000 train, 2000 test)
USE_SUBSET = True

if USE_SUBSET:
    small_train = dataset["train"].shuffle(seed=SEED).select(range(6000))
    small_test = dataset["test"].shuffle(seed=SEED).select(range(2000))
else:
    small_train = dataset["train"]
    small_test = dataset["test"]

dataset_small = {
    "train": small_train,
    "test": small_test
}
print("Subset train size:", len(dataset_small["train"]))
print("Subset test size:", len(dataset_small["test"]))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

plain_text/train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

plain_text/test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

plain_text/unsupervised-00000-of-00001.p(…):   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})
Train size: 25000
Test size: 25000
Subset train size: 6000
Subset test size: 2000


In [4]:
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        padding=False,   # padding will be done dynamically in batches
        max_length=256
    )

tokenized_dataset = {}
for split in ["train", "test"]:
    tokenized_dataset[split] = dataset_small[split].map(
        tokenize_function,
        batched=True
    )

# Rename 'label' to 'labels' for Transformers
for split in ["train", "test"]:
    tokenized_dataset[split] = tokenized_dataset[split].rename_column("label", "labels")
    tokenized_dataset[split].set_format(
        type="torch",
        columns=["input_ids", "attention_mask", "labels"]
    )

print(tokenized_dataset["train"])
print(tokenized_dataset["test"])


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/6000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Dataset({
    features: ['text', 'labels', 'input_ids', 'attention_mask'],
    num_rows: 6000
})
Dataset({
    features: ['text', 'labels', 'input_ids', 'attention_mask'],
    num_rows: 2000
})


In [5]:
NUM_CLIENTS = 3  # you can change this to 5 if you like

full_train = tokenized_dataset["train"].shuffle(seed=SEED)

clients_datasets = []

for cid in range(NUM_CLIENTS):
    client_data = full_train.shard(num_shards=NUM_CLIENTS, index=cid)
    # Each client keeps 90% for local train, 10% for local validation
    client_split = client_data.train_test_split(test_size=0.1, seed=SEED)
    clients_datasets.append(client_split)

print(f"Created {NUM_CLIENTS} clients.")
for i, cd in enumerate(clients_datasets):
    print(f"Client {i}: train={len(cd['train'])}, val={len(cd['test'])}")

global_test_dataset = tokenized_dataset["test"]


Created 3 clients.
Client 0: train=1800, val=200
Client 1: train=1800, val=200
Client 2: train=1800, val=200


In [6]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

def make_dataloader(ds, batch_size=16, shuffle=True):
    return DataLoader(
        ds,
        batch_size=batch_size,
        shuffle=shuffle,
        collate_fn=data_collator
    )

# Example: DataLoaders for each client
BATCH_SIZE = 16

client_loaders = []
for cid in range(NUM_CLIENTS):
    train_loader = make_dataloader(clients_datasets[cid]["train"], batch_size=BATCH_SIZE, shuffle=True)
    val_loader = make_dataloader(clients_datasets[cid]["test"], batch_size=BATCH_SIZE, shuffle=False)
    client_loaders.append((train_loader, val_loader))

test_loader = make_dataloader(global_test_dataset, batch_size=32, shuffle=False)

print("DataLoaders ready.")


DataLoaders ready.


In [7]:
from copy import deepcopy

def create_model():
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2
    )
    return model

# Utility: extract model parameters as a dict of tensors on CPU
def get_model_params(model):
    return {name: param.cpu().detach().clone() for name, param in model.state_dict().items()}

# Utility: load parameters into a model
def set_model_params(model, params_dict):
    model.load_state_dict(params_dict)
    return model

# Federated Averaging: average params from all clients
def average_params(param_list):
    avg_params = {}
    for key in param_list[0].keys():
        stacked = torch.stack([p[key] for p in param_list], dim=0)
        avg_params[key] = torch.mean(stacked, dim=0)
    return avg_params

# Local training on one client
def train_one_client(model, train_loader, epochs, lr, device):
    model.to(device)
    model.train()
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)

    total_loss = 0.0
    num_steps = 0

    for epoch in range(epochs):
        for batch in train_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            total_loss += loss.item()
            num_steps += 1

    avg_loss = total_loss / max(1, num_steps)
    return avg_loss

# Evaluation on a given dataloader
def evaluate(model, data_loader, device):
    model.to(device)
    model.eval()
    correct = 0
    total = 0
    total_loss = 0.0
    num_steps = 0

    with torch.no_grad():
        for batch in data_loader:
            labels = batch["labels"].to(device)
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss
            logits = outputs.logits
            preds = torch.argmax(logits, dim=-1)

            correct += (preds == labels).sum().item()
            total += labels.size(0)
            total_loss += loss.item()
            num_steps += 1

    accuracy = correct / total
    avg_loss = total_loss / max(1, num_steps)
    return avg_loss, accuracy


In [8]:
NUM_ROUNDS = 3        # you can increase to 5+ if you have time
LOCAL_EPOCHS = 1      # small to keep it fast
LR = 2e-5

# Initialize global model
global_model = create_model()
global_params = get_model_params(global_model)

# Evaluate before training
init_test_loss, init_test_acc = evaluate(global_model, test_loader, device)
print(f"Initial global model -> Test loss: {init_test_loss:.4f}, Test acc: {init_test_acc:.4f}")

for rnd in range(1, NUM_ROUNDS + 1):
    print(f"\n***** Federated Round {rnd} *****")
    client_params_list = []
    client_losses = []

    for cid in range(NUM_CLIENTS):
        print(f"\nClient {cid} local training...")
        # Create a fresh model and load current global parameters
        client_model = create_model()
        set_model_params(client_model, global_params)

        train_loader, val_loader = client_loaders[cid]

        avg_train_loss = train_one_client(
            client_model,
            train_loader,
            epochs=LOCAL_EPOCHS,
            lr=LR,
            device=device
        )

        val_loss, val_acc = evaluate(client_model, val_loader, device)
        print(f"Client {cid} -> train_loss: {avg_train_loss:.4f}, val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}")

        client_losses.append(avg_train_loss)
        client_params_list.append(get_model_params(client_model))

        # Free GPU memory
        del client_model
        torch.cuda.empty_cache()

    # FedAvg: update global_params by averaging all client params
    global_params = average_params(client_params_list)
    set_model_params(global_model, global_params)

    # Evaluate updated global model on global test set
    test_loss, test_acc = evaluate(global_model, test_loader, device)
    print(f"\nAfter Round {rnd} -> Global Test loss: {test_loss:.4f}, Test acc: {test_acc:.4f}")


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Initial global model -> Test loss: 0.6967, Test acc: 0.4110

***** Federated Round 1 *****

Client 0 local training...


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Client 0 -> train_loss: 0.4852, val_loss: 0.3360, val_acc: 0.8450

Client 1 local training...


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Client 1 -> train_loss: 0.5075, val_loss: 0.3774, val_acc: 0.8300

Client 2 local training...


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Client 2 -> train_loss: 0.5056, val_loss: 0.3807, val_acc: 0.8350

After Round 1 -> Global Test loss: 0.2962, Test acc: 0.8780

***** Federated Round 2 *****

Client 0 local training...


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Client 0 -> train_loss: 0.2976, val_loss: 0.2620, val_acc: 0.8850

Client 1 local training...


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Client 1 -> train_loss: 0.3212, val_loss: 0.3570, val_acc: 0.8600

Client 2 local training...


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Client 2 -> train_loss: 0.2861, val_loss: 0.3952, val_acc: 0.8300

After Round 2 -> Global Test loss: 0.2560, Test acc: 0.8900

***** Federated Round 3 *****

Client 0 local training...


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Client 0 -> train_loss: 0.2324, val_loss: 0.4337, val_acc: 0.8600

Client 1 local training...


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Client 1 -> train_loss: 0.2419, val_loss: 0.3313, val_acc: 0.8750

Client 2 local training...


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Client 2 -> train_loss: 0.2344, val_loss: 0.4035, val_acc: 0.8600

After Round 3 -> Global Test loss: 0.2909, Test acc: 0.8815


In [9]:
final_test_loss, final_test_acc = evaluate(global_model, test_loader, device)
print(f"\nFinal Global Model -> Test loss: {final_test_loss:.4f}, Test accuracy: {final_test_acc:.4f}")



Final Global Model -> Test loss: 0.2909, Test accuracy: 0.8815


In [10]:
label_names = {0: "negative", 1: "positive"}

def predict_sentiment(text, model, tokenizer, device):
    model.to(device)
    model.eval()
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=256,
        padding=True
    ).to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=-1)
        pred = torch.argmax(probs, dim=-1).item()
        confidence = probs[0, pred].item()

    return label_names[pred], confidence

examples = [
    "This movie was absolutely fantastic, I loved every minute of it!",
    "The plot was boring and the acting was terrible.",
    "It was okay, not the best but watchable."
]

for text in examples:
    label, conf = predict_sentiment(text, global_model, tokenizer, device)
    print(f"\nReview: {text}")
    print(f"Predicted sentiment: {label} (confidence: {conf:.3f})")



Review: This movie was absolutely fantastic, I loved every minute of it!
Predicted sentiment: positive (confidence: 0.989)

Review: The plot was boring and the acting was terrible.
Predicted sentiment: negative (confidence: 0.988)

Review: It was okay, not the best but watchable.
Predicted sentiment: positive (confidence: 0.891)
