In [1]:
import numpy as np
import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


## Load the MNIST dataset (or any other dataset like HAM 10000)

In [2]:
mnist_train = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 54.3MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 1.95MB/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 15.0MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 5.08MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






## Extract two subsets of 600 data points each (without intersection)

In [3]:
def get_subsets(dataset_train, dataset_test, subset_size):
  ind = np.arange(len(dataset_train))
  np.random.shuffle(ind)
  #ind = ind[: 2 * subset_size]



  sub1 = ind[: subset_size]
  sub2 = ind[subset_size : 2 * subset_size]
  true_sub1 = Subset(dataset_train, sub1)
  true_sub2 = Subset(dataset_train, sub2)

  sub1_load = DataLoader(true_sub1, batch_size=50, shuffle=True)
  sub2_load = DataLoader(true_sub2, batch_size=50, shuffle=True)
  dl_train = DataLoader(dataset_train, batch_size=50, shuffle=True)
  dl_test = DataLoader(dataset_test, batch_size=50, shuffle=False)

  return sub1_load, sub2_load, dl_train, dl_test

#get_subsets(mnist_train, mnist_test, 600)

## Create a simple Convolutional Neural Network (2 convolutional layers and 2 dense layers, for example)

In [4]:
class CNN(nn.Module):
  def __init__(self, num_classes=10):
    super(CNN, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
    self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
    self.relu = nn.ReLU()
    self.fc1 = nn.Linear(64 * 28 * 28, 128)
    self.fc2 = nn.Linear(128, num_classes)

  def forward(self, x):
    out = self.relu(self.conv2(self.relu(self.conv1(x))))
    out = out.view(out.size(0), -1)
    out = self.fc2(self.relu(self.fc1(out)))
    return out

## Create a function average_model_parameters(models: iterable, average_weight): iterable that takes a list of models as an argument and returns the weighted average of the parameters of each model.

In [5]:
def average_model_parameters(models, average_weight):
  with torch.no_grad():

    mp = [list(m.parameters()) for m in models]
    #print(mp)
    res_amp = []
    for i in range(len(mp[0])):
      sum = 0
      for j, w in enumerate(average_weight):
        sum += w * mp[j][i].data
      #print(sum)
      res_amp.append(sum) #.clone() deleted
  return res_amp

## Create a function that updates the parameters of a model from a list of values

In [6]:
def update_parameters(model, param):
  with torch.no_grad():
    for i, p in zip(model.parameters(), param):
      i.data = p.clone()





def train_one_epoch(model, dataloader, optimizer, criterion):
    model.train()
    rl = 0.0
    truee = 0
    total = 0
    #other = 0
    for i, t in dataloader:
        i = i.to(device)
        t = t.to(device)
        #stop ici, continuer demain, doit faire evaluate (same) + update loss

        optimizer.zero_grad()
        outputs = model(i)
        loss = criterion(outputs, t)
        loss.backward()
        optimizer.step()

        #faire modif
        rl += loss.item() * i.size(0)
        _, predicted = outputs.max(1)
        truee += predicted.eq(t).sum().item()
        total += t.size(0)
    epoch_l = rl / total
    epoch_acc = 100.0 * truee / total
    return epoch_l, epoch_acc


def evaluate(model, dataloader, criterion):
    model.eval()
    rl = 0.0
    truee = 0
    total = 0
    with torch.no_grad():
        for i, t in dataloader:
            i = i.to(device)
            t = t.to(device)
            outputs = model(i)
            loss = criterion(outputs, t)
            rl += loss.item() * i.size(0)
            _, predicted = outputs.max(1)
            truee += predicted.eq(t).sum().item()
            total += t.size(0)
    #ajouter conditions
    epoch_l = rl / total if total > 0 else 0
    epoch_acc = 100.0 * truee / total if total > 0 else 0
    return epoch_l, epoch_acc

## Create a script/code/function that reproduces Algorithm 1, considering that both models are on your machine. Use an average_weight=[1/2, 1/2]. Reuse the same setup as in the article (50 examples per local batch)

In [7]:
def without_common(nb_subset=600, local_batch=50, local_epoch=1, average_weight=[0.5, 0.5], lr=0.001):
  sub1_load, sub2_load, dl_train, dl_test = get_subsets(mnist_train, mnist_test, nb_subset)

  #Cas without initializing the common parameters donc pas de modèle partagé
  model1 = CNN().to(device)
  model2 = CNN().to(device)
  opti1 = optim.Adam(model1.parameters(), lr=lr)
  opti2 = optim.Adam(model2.parameters(), lr=lr)
  criterion = nn.CrossEntropyLoss()
  for epoch in range(local_epoch):
    train_one_epoch(model1, sub1_load, opti1, criterion)
    train_one_epoch(model2, sub2_load, opti2, criterion)
  moy = average_model_parameters([model1, model2], average_weight)
  moy_model = CNN().to(device)
  update_parameters(moy_model, moy)
  train_l, train_acc = evaluate(moy_model, dl_train, criterion)
  test_l, test_acc = evaluate(moy_model, dl_test, criterion)
  print("For no common init :")
  print(f"Average Model on Train | Loss : {train_l} | Accuracy : {train_acc}")
  print(f"Average Model on Test | Loss : {test_l} | Accuracy : {test_acc}")
  return moy_model


def with_common(nb_subset=600, local_batch=50, local_epoch=1, average_weight=[0.5, 0.5], lr=0.001):
  sub1_load, sub2_load, dl_train, dl_test = get_subsets(mnist_train, mnist_test, nb_subset)

  #Cas with initializing the common parameters
  model = CNN().to(device)
  model1 = copy.deepcopy(model)
  model2 = copy.deepcopy(model)
  opti1 = optim.Adam(model1.parameters(), lr=lr)
  opti2 = optim.Adam(model2.parameters(), lr=lr)
  criterion = nn.CrossEntropyLoss()
  for epoch in range(local_epoch):
    train_one_epoch(model1, sub1_load, opti1, criterion)
    train_one_epoch(model2, sub2_load, opti2, criterion)
  moy = average_model_parameters([model1, model2], average_weight)
  moy_model = CNN().to(device)
  update_parameters(moy_model, moy)
  train_l, train_acc = evaluate(moy_model, dl_train, criterion)
  test_l, test_acc = evaluate(moy_model, dl_test, criterion)
  print("For common init :")
  print(f"Average Model on Train | Loss : {train_l} | Accuracy : {train_acc}")
  print(f"Average Model on Test | Loss : {test_l} | Accuracy : {test_acc}")
  return moy_model

## Train your models without initializing the common parameters and measure the performance on the entire dataset.

In [8]:
model_no_init = without_common(nb_subset=600, local_batch=50, local_epoch=1, average_weight=[0.5, 0.5], lr=0.001)

For no common init :
Average Model on Train | Loss : 1.9949518220623335 | Accuracy : 58.803333333333335
Average Model on Test | Loss : 1.9894275826215744 | Accuracy : 59.59


## Train your models with the initialization of common parameters and verify that the performance is better.

In [9]:
model_init = with_common(nb_subset=600, local_batch=50, local_epoch=1, average_weight=[0.5, 0.5], lr=0.001)

For common init :
Average Model on Train | Loss : 1.1920624007284641 | Accuracy : 73.65666666666667
Average Model on Test | Loss : 1.1699778446555138 | Accuracy : 75.06


## Reduce the number of data points in each sub-batch. What is the minimum number of data points necessary for the final model to have acceptable performance?

In [10]:
for b in [50, 25, 10, 5, 3, 2, 1]:
  print(f"Local batch size : {b}")
  model_no_init = without_common(nb_subset=600, local_batch=b, local_epoch=1, average_weight=[0.5, 0.5], lr=0.001)
  model_init = with_common(nb_subset=600, local_batch=b, local_epoch=1, average_weight=[0.5, 0.5], lr=0.001)
  print("============================================")

Local batch size : 50
For no common init :
Average Model on Train | Loss : 2.0872479326526325 | Accuracy : 54.23166666666667
Average Model on Test | Loss : 2.0835875594615936 | Accuracy : 54.57
For common init :
Average Model on Train | Loss : 1.6214909456173578 | Accuracy : 65.68
Average Model on Test | Loss : 1.6233508038520812 | Accuracy : 66.28
Local batch size : 25
For no common init :
Average Model on Train | Loss : 2.0293833129604657 | Accuracy : 40.67666666666667
Average Model on Test | Loss : 2.0210876554250716 | Accuracy : 41.62
For common init :
Average Model on Train | Loss : 1.1849098410705725 | Accuracy : 67.66166666666666
Average Model on Test | Loss : 1.1752170172333718 | Accuracy : 68.36
Local batch size : 10
For no common init :
Average Model on Train | Loss : 2.0186011198163034 | Accuracy : 43.725
Average Model on Test | Loss : 2.015729944705963 | Accuracy : 43.16
For common init :
Average Model on Train | Loss : 1.0375525864462058 | Accuracy : 67.23833333333333
Aver

## Repeat the study on CIFAR-1

In [11]:
cifar_train = datasets.CIFAR100(root="./data_cifar100", train=True, download=True, transform=transforms.ToTensor())
cifar_test = datasets.CIFAR100(root="./data_cifar100", train=False, download=True, transform=transforms.ToTensor())

class CNN2(nn.Module):
  def __init__(self, num_classes=100):
    super(CNN2, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
    self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
    self.relu = nn.ReLU()
    self.fc1 = nn.Linear(64 * 28 * 28, 256)
    self.fc2 = nn.Linear(256, num_classes)
    self.max_pool2d = nn.MaxPool2d(2)

  def forward(self, x):
    out = self.relu(self.conv2(self.relu(self.conv1(x))))
    out = out.view(out.size(0), -1)
    out = self.fc2(self.relu(self.fc1(out)))
    return out

def without_common_cifar(nb_subset=600, local_batch=50, local_epoch=1, average_weight=[0.5, 0.5], lr=0.001):
  sub1_load, sub2_load, dl_train, dl_test = get_subsets(mnist_train, mnist_test, nb_subset)

  #Cas without initializing the common parameters donc pas de modèle partagé
  model1 = CNN2().to(device)
  model2 = CNN2().to(device)
  opti1 = optim.Adam(model1.parameters(), lr=lr)
  opti2 = optim.Adam(model2.parameters(), lr=lr)
  criterion = nn.CrossEntropyLoss()
  for epoch in range(local_epoch):
    train_one_epoch(model1, sub1_load, opti1, criterion)
    train_one_epoch(model2, sub2_load, opti2, criterion)
  moy = average_model_parameters([model1, model2], average_weight)
  moy_model = CNN2().to(device)
  update_parameters(moy_model, moy)
  train_l, train_acc = evaluate(moy_model, dl_train, criterion)
  test_l, test_acc = evaluate(moy_model, dl_test, criterion)
  print("For no common init :")
  print(f"Average Model on Train | Loss : {train_l} | Accuracy : {train_acc}")
  print(f"Average Model on Test | Loss : {test_l} | Accuracy : {test_acc}")
  return moy_model

def with_common_cifar(nb_subset=600, local_batch=50, local_epoch=1, average_weight=[0.5, 0.5], lr=0.001):
  sub1_load, sub2_load, dl_train, dl_test = get_subsets(mnist_train, mnist_test, nb_subset)

  #Cas with initializing the common parameters
  model = CNN2().to(device)
  model1 = copy.deepcopy(model)
  model2 = copy.deepcopy(model)
  opti1 = optim.Adam(model1.parameters(), lr=lr)
  opti2 = optim.Adam(model2.parameters(), lr=lr)
  criterion = nn.CrossEntropyLoss()
  for epoch in range(local_epoch):
    train_one_epoch(model1, sub1_load, opti1, criterion)
    train_one_epoch(model2, sub2_load, opti2, criterion)
  moy = average_model_parameters([model1, model2], average_weight)
  moy_model = CNN2().to(device)
  update_parameters(moy_model, moy)
  train_l, train_acc = evaluate(moy_model, dl_train, criterion)
  test_l, test_acc = evaluate(moy_model, dl_test, criterion)
  print("For common init :")
  print(f"Average Model on Train | Loss : {train_l} | Accuracy : {train_acc}")
  print(f"Average Model on Test | Loss : {test_l} | Accuracy : {test_acc}")
  return moy_model

model_no_init_cifar = without_common_cifar(nb_subset=600, local_batch=50, local_epoch=1, average_weight=[0.5, 0.5], lr=0.001)
model_init_cifar = with_common_cifar(nb_subset=600, local_batch=50, local_epoch=1, average_weight=[0.5, 0.5], lr=0.001)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data_cifar100/cifar-100-python.tar.gz


100%|██████████| 169M/169M [00:02<00:00, 74.9MB/s]


Extracting ./data_cifar100/cifar-100-python.tar.gz to ./data_cifar100
Files already downloaded and verified
For no common init :
Average Model on Train | Loss : 2.59198892335097 | Accuracy : 43.38666666666666
Average Model on Test | Loss : 2.5748534286022187 | Accuracy : 43.6
For common init :
Average Model on Train | Loss : 0.7847658570607503 | Accuracy : 77.87333333333333
Average Model on Test | Loss : 0.7622268906235695 | Accuracy : 78.25


In [12]:
for b in [50, 25, 10, 5, 3, 2, 1]:
  print(f"Local batch size : {b}")
  model_no_init_cifar = without_common_cifar(nb_subset=600, local_batch=b, local_epoch=1, average_weight=[0.5, 0.5], lr=0.001)
  model_init_cifar = with_common_cifar(nb_subset=600, local_batch=b, local_epoch=1, average_weight=[0.5, 0.5], lr=0.001)
  print("============================================")

Local batch size : 50
For no common init :
Average Model on Train | Loss : 2.9354674555857976 | Accuracy : 14.881666666666666
Average Model on Test | Loss : 2.92389852643013 | Accuracy : 15.12
For common init :
Average Model on Train | Loss : 0.890872001449267 | Accuracy : 72.49
Average Model on Test | Loss : 0.8610889618098736 | Accuracy : 73.98
Local batch size : 25
For no common init :
Average Model on Train | Loss : 2.7401226967573167 | Accuracy : 12.203333333333333
Average Model on Test | Loss : 2.7304976081848142 | Accuracy : 12.18
For common init :
Average Model on Train | Loss : 1.0755546415348847 | Accuracy : 71.755
Average Model on Test | Loss : 1.0394791653752327 | Accuracy : 74.15
Local batch size : 10
For no common init :
Average Model on Train | Loss : 2.396014761328697 | Accuracy : 37.97666666666667
Average Model on Test | Loss : 2.387625995874405 | Accuracy : 37.98
For common init :
Average Model on Train | Loss : 0.9724938650429249 | Accuracy : 69.765
Average Model on 