In [40]:
from scipy.io import loadmat
from utils import *
data = loadmat('emnist-digits.mat')

In [41]:
import matplotlib.pyplot as plt
import numpy as np


In [42]:
dataset = data['dataset'][0, 0]
train = dataset[0][0, 0]  
test = dataset[1][0, 0]  
mapping = dataset[2]

train_images = train['images']   # Shape: (N, 28*28)
train_labels = train['labels']  # Shape: (N, 1)
train_writers = train['writers']

In [None]:
X = train_images.astype(np.float32) / 255.0  # Normalize to [0, 1]
y = train_labels.flatten().astype(np.int64)

# Wrap into a datalist with a single client
datalist = [(X, y)]

In [43]:

# Hyperparameters
T = 5       # number of global rounds
K = 10      # number of client GD steps
gamma = 0.1 # learning rate

# Run FedAvg with 1 client
print("now training the baseline, i.e. fedAvg with one client holding all the data")
model = fedavg(datalist, T, K, gamma)


now training the baseline, i.e. fedAvg with one client holding all the data
round :  1
round :  2
round :  3
round :  4
round :  5


In [23]:
test_images = test['images'].astype(np.float32) / 255.0
test_labels = test['labels'].flatten().astype(np.int64)

test_accuracy = evaluate(model, test_images, test_labels)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Test Accuracy: 84.37%


In [25]:
n_clients = 5
beta = 0.5 
datalist = create_dirichlet_clients(X, y, n_clients, beta)

# Hyperparameters
T = 5       # number of global rounds
K = 10      # number of client GD steps
gamma = 0.1 # learning rate
print("case with 5 clients, beta=0.5 skewed distribution!")
model = fedavg(datalist, T, K, gamma)



case with 5 clients, beta=0.5 skewed distribution!
round :  1
round :  2
round :  3
round :  4
round :  5


In [26]:
# Evaluate
test_accuracy = evaluate(model, test_images, test_labels)
print(f"Test Accuracy with {n_clients} clients and Dir({beta}): {test_accuracy * 100:.2f}%")

Test Accuracy with 5 clients and Dir(0.5): 80.88%


In [27]:
n_clients = 5
beta = 1e5
datalist = create_dirichlet_clients(X, y, n_clients, beta)

# Hyperparameters
T = 5       # number of global rounds
K = 10      # number of client GD steps
gamma = 0.1 # learning rate
print("case with 5 clients, beta=10^5, which means close to IID clients (baseline 2)!")
model = fedavg(datalist, T, K, gamma)


case with 5 clients, beta=10^5, which means close to IID clients (baseline 2)!
round :  1
round :  2
round :  3
round :  4
round :  5


In [28]:
# Evaluate
test_accuracy = evaluate(model, test_images, test_labels)
print(f"Test Accuracy with {n_clients} clients and Dir({beta}): {test_accuracy * 100:.2f}%")

Test Accuracy with 5 clients and Dir(100000.0): 84.56%


In [29]:
import random
from collections import defaultdict

def make_femnist_datasets(X,y, K=10, seed=42):

    # 1) Group example‐indices by writer
    by_writer = defaultdict(list)
    for idx, writer in enumerate(train['writers']):
        by_writer[int(writer)].append(idx)

    # 2) shuffle writer IDs and split into K groups
    writer_ids = list(by_writer.keys())
    random.seed(seed)
    random.shuffle(writer_ids)
    per = len(writer_ids) // K
    groups = [writer_ids[i*per : (i+1)*per] for i in range(K-1)]
    groups.append(writer_ids[(K-1)*per :])

    # 3) for each group, collect X_i, y_i
    datalist = []
    for group in groups:
        idxs = [i for w in group for i in by_writer[w]]
        Xi = X[idxs]   # shape [n_i, ...]
        yi = y[idxs]   # shape [n_i,]
        datalist.append((Xi, yi))

    return datalist

In [30]:
n_clients = 30
datalist = make_femnist_datasets(X,y,n_clients)
# Hyperparameters
T = 5       # number of global rounds
K = 10      # number of client GD steps
gamma = 0.1 # learning rate
print(f"case with {n_clients} clients, with feature distribution shift")
model = fedavg(datalist, T, K, gamma)


  by_writer[int(writer)].append(idx)


case with 30 clients, with feature distribution shift
round :  1
round :  2
round :  3
round :  4
round :  5


In [31]:
# Evaluate
test_accuracy = evaluate(model, test_images, test_labels)
print(f"Test Accuracy with {n_clients} ): {test_accuracy * 100:.2f}%")

Test Accuracy with 30 ): 84.19%


In [32]:
class MADE(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(MADE, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, input_dim)
    def forward(self, x):
        return torch.sigmoid(self.fc2(F.relu(self.fc1(x))))



class WeightEstimator(nn.Module):
    """
    Estimates the weight α(x) = P(l=1 | u) / (1 - P(l=1 | u))
    based on MADE log-likelihood vectors.
    """
    def __init__(self, input_dim, hidden_dim=100):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h = self.relu(self.fc1(x))
        return self.sigmoid(self.fc2(h)).squeeze(-1)

In [33]:
import torch.nn.functional as F
import torch.optim as optim


def train_local_made(model, loader, epochs=5, lr=1e-3):
    opt = optim.Adam(model.parameters(), lr=lr)
    for _ in range(epochs):
        for x, _ in loader:
            out = model(x)
            loss = F.binary_cross_entropy(out, x)
            opt.zero_grad(); loss.backward(); opt.step()
    return model.state_dict()

def aggregate_models(states, weights):
    return {k: sum(weights[i] * states[i][k] for i in range(len(states)))
            for k in states[0]}

def train_global_made(loaders, dim, hid, rounds=10, local_epochs=1):
    gm = MADE(dim, hid)
    for _ in range(rounds):
        states, sizes = [], []
        for ld in loaders:
            lm = MADE(dim, hid)
            lm.load_state_dict(gm.state_dict())
            sd = train_local_made(lm, ld, epochs=local_epochs)
            states.append(sd); sizes.append(len(ld.dataset))
        total = sum(sizes)
        gm.load_state_dict(aggregate_models(states, [s/total for s in sizes]))
    return gm


In [34]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions.bernoulli import Bernoulli


def compute_sample_weights(global_made, local_made, loader,
                           device='cpu', num_epochs=1, lr=1e-3):
    """
    Trains the WeightEstimator to distinguish global vs local MADE log-likelihoods
    and computes sample weights α for all samples in loader.

    Args:
        global_made, local_made: MADE models; their forward(x) returns logits for Bernoulli outputs.
        loader: DataLoader yielding (X, _) batches (X in [0,1]).
        device: 'cpu' or 'cuda'.
        num_epochs: number of training epochs for estimator.
        lr: learning rate.

    Returns:
        Tensor of α weights for all samples in order.
    """
    # Determine input dimension
    sample_batch = next(iter(loader))[0].to(device)
    with torch.no_grad():
        logits = global_made(sample_batch)
    input_dim = logits.size(1)

    estimator = WeightEstimator(input_dim).to(device)
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(estimator.parameters(), lr=lr)

    # Train
    estimator.train()
    for _ in range(num_epochs):
        for X, _ in loader:
            X = X.to(device)
            X_bin = (X >= 0.5).float()
            with torch.no_grad():
                ug = Bernoulli(logits=global_made(X)).log_prob(X_bin)
                ul = Bernoulli(logits=local_made(X)).log_prob(X_bin)
            U = torch.cat([ug, ul], dim=0)
            labels = torch.cat([
                torch.zeros(ug.size(0)),
                torch.ones(ul.size(0))
            ]).to(device)
            preds = estimator(U)
            loss = criterion(preds, labels)
            optimizer.zero_grad(); loss.backward(); optimizer.step()

    # Compute α for each sample
    estimator.eval()
    alphas = []
    with torch.no_grad():
        for X, _ in loader:
            X = X.to(device)
            X_bin = (X >= 0.5).float()
            ul = Bernoulli(logits=local_made(X)).log_prob(X_bin)
            p = estimator(ul)
            alphas.append((p / (1 - p)).cpu())
    return torch.cat(alphas)




In [35]:
from torch.utils.data import DataLoader, TensorDataset
import torch
import numpy as np

n_clients = 30
datalist = make_femnist_datasets(X,y,n_clients)

# Hyperparameters
T = 5       # number of global rounds
K = 10      # number of client GD steps
gamma = 0.1 # learning rate

# 1) Create MADE data loaders
made_loaders = [
    DataLoader(
        TensorDataset(
            torch.tensor(X, dtype=torch.float32),
            torch.zeros(len(X), dtype=torch.float32)
        ),
        batch_size=64,
        shuffle=True
    )
    for X, _ in datalist
]

# 2) Train global MADE
global_made = train_global_made(
    made_loaders,
    dim=28*28,
    hid=100,
    rounds=T,
    local_epochs=1
)

# 3) Compute sample‐weights α for each client
sample_weights = []
for ld in made_loaders:
    # train local MADE and load its weights
    local_made = MADE(28*28, 100)
    local_state = train_local_made(local_made, ld, epochs=1)
    local_made.load_state_dict(local_state)

    # compute α for this client (Tensor of shape [n_samples])
    alpha = compute_sample_weights(global_made, local_made, ld)
    sample_weights.append(alpha)

# 4) Build weighted datasets by oversampling
weighted_datalist = []
for (Xi, yi), a in zip(datalist, sample_weights):
    # normalize and scale to counts, then convert to numpy ints
    counts = (a / a.sum() * len(yi)).cpu().numpy().astype(int)
    idxs = np.repeat(np.arange(len(yi)), counts)
    weighted_datalist.append((Xi[idxs], yi[idxs]))

# 5) Federated training on weighted data
print(f"case with {n_clients} clients, with feature distribution shift")
model = fedavg(weighted_datalist, T, K, gamma)


  by_writer[int(writer)].append(idx)


KeyboardInterrupt: 

In [None]:

test_accuracy = evaluate(model, test_images, test_labels)
print(f"Test Accuracy with {n_clients} ): {test_accuracy * 100:.2f}%")