In [None]:
!pip install --upgrade progressbar2

Collecting progressbar2
  Downloading progressbar2-3.53.1-py2.py3-none-any.whl (25 kB)
Installing collected packages: progressbar2
  Attempting uninstall: progressbar2
    Found existing installation: progressbar2 3.38.0
    Uninstalling progressbar2-3.38.0:
      Successfully uninstalled progressbar2-3.38.0
Successfully installed progressbar2-3.53.1


In [None]:
from torch import nn
from collections import OrderedDict
import torch.nn.functional as F
import torch
from torch.utils.data import DataLoader
import torchvision
import random
from torch.utils.data import Subset
from matplotlib import pyplot as plt
from torchsummary import summary
from torchvision import transforms
import progressbar as pb
import numpy as np

In [None]:
from google.colab import files
uploaded= files.upload()

Saving federated_svhn_best_test.pth to federated_svhn_best_test.pth
Saving federated_mnist_best_test.pth to federated_mnist_best_test.pth


In [None]:
state_dict = torch.load('federated_mnist_best_test.pth')

In [None]:
state_dict_svhn = torch.load('federated_svhn_best_test.pth')

In [None]:
SUM = lambda x,y : x+y

In [None]:
def check_equity(property,a,b):
    pa = getattr(a,property)
    pb = getattr(b,property)
    assert  pa==pb, "Different {}: {}!={}".format(property,pa,pb)

    return pa

In [None]:
def module_unwrap(mod:nn.Module,recursive=False):
    children = OrderedDict()
    try:
        for name, module in mod.named_children():
            if (recursive):
                recursive_call = module_unwrap(module,recursive=True)
                if (len(recursive_call)>0):
                    for k,v in recursive_call.items():
                        children[name+"_"+k] = v
                else:
                    children[name] = module
            else:
                children[name] = module
    except AttributeError:
        pass

    return children

In [None]:
class VGGBlock(nn.Module):
    def __init__(self, in_channels, out_channels,batch_norm=False):

        super().__init__()

        conv2_params = {'kernel_size': (3, 3),
                        'stride'     : (1, 1),
                        'padding'   : 1
                        }

        noop = lambda x : x

        self._batch_norm = batch_norm

        self.conv1 = nn.Conv2d(in_channels=in_channels,out_channels=out_channels , **conv2_params)
        #self.bn1 = nn.BatchNorm2d(out_channels) if batch_norm else noop
        self.bn1 = nn.GroupNorm(32, out_channels) if batch_norm else noop

        self.conv2 = nn.Conv2d(in_channels=out_channels,out_channels=out_channels, **conv2_params)
        #self.bn2 = nn.BatchNorm2d(out_channels) if batch_norm else noop
        self.bn2 = nn.GroupNorm(32, out_channels) if batch_norm else noop

        self.max_pooling = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))

    @property
    def batch_norm(self):
        return self._batch_norm

    def forward(self,x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)

        x = self.max_pooling(x)

        return x

In [None]:
class Classifier(nn.Module):

    def __init__(self,num_classes=10):
        super().__init__()

        self.classifier = nn.Sequential(
            nn.Linear(2048, 2048),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(2048, 512),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self,x):

        return self.classifier(x)

In [None]:
class VGG16(nn.Module):

  def __init__(self, input_size, batch_norm=False):
    super(VGG16, self).__init__()

    self.in_channels,self.in_width,self.in_height = input_size

    self.block_1 = VGGBlock(self.in_channels,64,batch_norm=batch_norm)
    self.block_2 = VGGBlock(64, 128,batch_norm=batch_norm)
    self.block_3 = VGGBlock(128, 256,batch_norm=batch_norm)
    self.block_4 = VGGBlock(256,512,batch_norm=batch_norm)


  @property
  def input_size(self):
      return self.in_channels,self.in_width,self.in_height

  def forward(self, x):

    x = self.block_1(x)
    x = self.block_2(x)
    x = self.block_3(x)
    x = self.block_4(x)
    # x = self.avgpool(x)
    x = torch.flatten(x,1)

    return x

In [None]:
class CombinedLoss(nn.Module):
    def __init__(self, loss_a, loss_b, loss_combo, _lambda=1.0):
        super().__init__()
        self.loss_a = loss_a
        self.loss_b = loss_b
        self.loss_combo = loss_combo

        self.register_buffer('_lambda',torch.tensor(float(_lambda),dtype=torch.float32))


    def forward(self,y_hat,y):

        return self.loss_a(y_hat[0],y[0]) + self.loss_b(y_hat[1],y[1]) + self._lambda * self.loss_combo(y_hat[2],torch.cat(y,0))


In [None]:
DO='TRAIN'

random.seed(47)

combo_fn = SUM

lambda_reg = 1

In [None]:
def test(net,classifier, loader):

      net.to(dev)
      classifier.to(dev)

      net.eval()

      sum_accuracy = 0

      # Process each batch
      for j, (input, labels) in enumerate(loader):

        input = input.to(dev)
        labels = labels.float().to(dev)

        features = net(input)

        pred = torch.squeeze(classifier(features))

        # https://discuss.pytorch.org/t/bcewithlogitsloss-and-model-accuracy-calculation/59293/ 2
        #pred_labels = (pred >= 0.0).long()  # Binarize predictions to 0 and 1
        _,pred_label = torch.max(pred, dim = 1)
        pred_labels = (pred_label == labels).float()

        batch_accuracy = pred_labels.sum().item() / len(labels)

        # Update accuracy
        sum_accuracy += batch_accuracy

      epoch_accuracy = sum_accuracy / len(loader)
      return epoch_accuracy
      #print(f"Accuracy test: {epoch_accuracy:0.5}")

MNIST (prima metà a vgga (che era stata allenata su mnist), seconda metà a vggb (che era stata allenata su svhn), e tutto a vgg* (che è inizializzata random e poi diventa vggstar come somma dei pesi di vgga e vggb

In [None]:
root_dir = './'

In [None]:
rescale_data = transforms.Lambda(lambda x : x/255)

# Compose transformations
data_transform = transforms.Compose([
  transforms.Resize(32),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
  rescale_data,
  #transforms.Normalize((-0.7376), (0.5795))
])

test_transform = transforms.Compose([
  transforms.Resize(32),
  transforms.ToTensor(),
  rescale_data,
  #transforms.Normalize((0.1327), (0.2919))
])

In [None]:
# Load MNIST dataset with transforms
train_set = torchvision.datasets.MNIST(root=root_dir, train=True, download=True, transform=data_transform)
test_set = torchvision.datasets.MNIST(root=root_dir, train=False, download=True, transform=test_transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [None]:
train_idx = np.random.permutation(np.arange(len(train_set)))
test_idx = np.arange(len(test_set))

val_frac = 0.1

n_val = int(len(train_idx) * val_frac)
val_idx = train_idx[0:n_val]
train_idx = train_idx[n_val:]

h = len(train_idx)//2

train_set_a = Subset(train_set,train_idx[0:h])
train_set_b = Subset(train_set,train_idx[h:])

h = len(val_idx)//2

val_set_a = Subset(train_set,val_idx[0:h])
val_set_b = Subset(train_set,val_idx[h:])

h = len(test_idx)//2

test_set_a = Subset(test_set,test_idx[0:h])
test_set_b = Subset(test_set,test_idx[h:])

In [None]:
# Define loaders

train_loader_a = DataLoader(train_set_a, batch_size=64, num_workers=0, shuffle=True, drop_last=True)
val_loader_a   = DataLoader(val_set_a,   batch_size=64, num_workers=0, shuffle=False, drop_last=False)
test_loader_a  = DataLoader(test_set_a,  batch_size=64, num_workers=0, shuffle=False, drop_last=False)

train_loader_b = DataLoader(train_set_b, batch_size=64, num_workers=0, shuffle=True, drop_last=True)
val_loader_b   = DataLoader(val_set_b,   batch_size=64, num_workers=0, shuffle=False, drop_last=False)
test_loader_b  = DataLoader(test_set_b,  batch_size=64, num_workers=0, shuffle=False, drop_last=False)

test_loader_all = DataLoader(test_set,batch_size=64, num_workers=0,shuffle=False,drop_last=False)


# Define dictionary of loaders
loaders = {"train": [train_loader_a,train_loader_b],
           "val":   [val_loader_a,val_loader_b],
           "test":  [test_loader_a,test_loader_b]}

In [None]:
model1 = VGG16((1,32,32),batch_norm=True)
model2 = VGG16((1,32,32),batch_norm=True)
model3 = VGG16((1,32,32),batch_norm=True)
classifier = Classifier(num_classes=10)

In [None]:
classifier.load_state_dict(state_dict['classifier'])

<All keys matched successfully>

In [None]:
model1.load_state_dict(state_dict['vgg_a'])

<All keys matched successfully>

In [None]:
model2.load_state_dict(state_dict_svhn['vgg_a'])

<All keys matched successfully>

In [None]:
nets = [model1,model2,model3,classifier]

dev = torch.device('cuda')

parameters = set()

for n in nets:
  parameters |= set(n.parameters())

optimizer = torch.optim.SGD(parameters, lr = 0.01)
# Define a loss
#criterion = nn.BCEWithLogitsLoss()#,nn.BCEWithLogitsLoss(),nn.BCEWithLogitsLoss(),_lambda = 1)
criterion = nn.CrossEntropyLoss()
n_params = 0

In [None]:
# We set require_grad = False for all the layer of the net
for param in classifier.parameters():
  param.requires_grad = False

In [None]:
# We set require_grad = False for all the layer of the net
for param in model1.parameters():
  param.requires_grad = False

In [None]:
# We set require_grad = False for all the layer of the net
for param in model2.parameters():
  param.requires_grad = False

In [None]:
def final_test(nets, loaders, optimizer, criterion, dev=None, save_param=False, model_name="federated_svhn"):
    # try:
      nets = [n.to(dev) for n in nets]

      model_a = module_unwrap(nets[0], True)
      model_b = module_unwrap(nets[1], True)
      model_c = module_unwrap(nets[2], True)

      reg_loss = nn.MSELoss()

      criterion.to(dev)
      reg_loss.to(dev)

      print(f"Accuracy test VGGA: {test(nets[0], nets[3], test_loader_a):0.5}")
      print(f"Accuracy test VGGB: {test(nets[1], nets[3], test_loader_b):0.5}")
      print(f"Accuracy test VGG*: {test(nets[2], nets[3], test_loader_all):0.5}")
 
      summed_state_dict = OrderedDict()
      for key in nets[2].state_dict():
        if key.find('conv') >=0:
          #print(key)
          summed_state_dict[key] = combo_fn(nets[0].state_dict()[key],nets[1].state_dict()[key])
        else:
          summed_state_dict[key] = nets[2].state_dict()[key]
 
      nets[2].load_state_dict(summed_state_dict)
      accuracy_star = test(nets[2], nets[3], test_loader_all)
      print(f"Accuracy test VGGSTAR: {accuracy_star:0.5}")

Lo riscrivo per ricordare: qui abbiamo MNIST (prima metà a vgga (che era stata allenata su mnist), seconda metà a vggb (che era stata allenata su svhn), e tutto a vgg* (che è inizializzata random e poi diventa vggstar come somma dei pesi di vgga e vggb.

In [None]:
final_test(nets, loaders, optimizer, criterion, dev=dev,save_param=True)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Accuracy test VGGA: 0.97666
Accuracy test VGGB: 0.39359
Accuracy test VGG*: 0.096636
Accuracy test VGGSTAR: 0.74691


Adesso faccio MNIST a VGGA, SVHN a VGGB (quindi mi aspetto buone performances su entrambe), VGG* gli do entrambi e a VGGSTAR sommo i pesi e do entrambi i datasets.

In [None]:
root_dir = './'

rescale_data = transforms.Lambda(lambda x : x/255)

# Compose transformations
data_transform = transforms.Compose([
  transforms.Resize(32),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
  rescale_data,
  #transforms.Normalize((-0.7376), (0.5795))
])

test_transform = transforms.Compose([
  transforms.Resize(32),
  transforms.ToTensor(),
  rescale_data,
  #transforms.Normalize((0.1327), (0.2919))
])

# Load MNIST dataset with transforms
train_set = torchvision.datasets.MNIST(root=root_dir, train=True, download=True, transform=data_transform)
test_set = torchvision.datasets.MNIST(root=root_dir, train=False, download=True, transform=test_transform)

# Dataset len
num_train = len(train_set)
num_test = len(test_set)
print(f"Num. training samples: {num_train}")
print(f"Num. test samples:     {num_test}")

train_idx = np.random.permutation(np.arange(len(train_set)))
test_idx = np.arange(len(test_set))

# Fraction of the original train set that we want to use as validation set
val_frac = 0.1
# Number of samples of the validation set
num_val = int(num_train * val_frac) 
num_train = num_train - num_val

# Split training set
val_idx = train_idx[num_train:]
train_idx = train_idx[:num_train]

print(f"{num_train} samples used as train set")
print(f"{num_val}  samples used as val set")
print(f"{len(test_set)} samples used as test set")

val_set_a = Subset(train_set, val_idx)
train_set_a = Subset(train_set, train_idx)
test_set_a = test_set

# Compose transformations
data_transform = transforms.Compose([
  transforms.Resize((32,32)),
  transforms.Grayscale(num_output_channels=1),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
])

# Compose transformations
test_transform = transforms.Compose([
  transforms.Resize((32,32)),
  transforms.Grayscale(num_output_channels=1),
  transforms.ToTensor(),
])

Num. training samples: 60000
Num. test samples:     10000
54000 samples used as train set
6000  samples used as val set
10000 samples used as test set


In [None]:
base_dir = 'http://ufldl.stanford.edu/housenumbers/'
train_set_svhn = torchvision.datasets.SVHN(root=base_dir, split='train', download=True, transform=data_transform, target_transform=None)
test_set_svhn = torchvision.datasets.SVHN(root=base_dir, split='test', download=True, transform=test_transform)

num_train_svhn = len(train_set_svhn)
num_test_svhn = len(test_set_svhn)
print(f"Num. training samples: {num_train_svhn}")
print(f"Num. test samples:     {num_test_svhn}")

# List of indexes on the training set
train_idx_svhn = list(range(num_train_svhn))

# List of indexes of the test set
test_idx_svhn = list(range(num_test_svhn))

# Shuffle the training set
import random

random.shuffle(train_idx_svhn)
for i in range(10):
  print(train_idx_svhn[i])

# Fraction of the original train set that we want to use as validation set
val_frac = 0.1
# Number of samples of the validation set
num_val = int(num_train_svhn * val_frac) 
num_train = num_train_svhn - num_val

# Split training set
val_idx_svhn = train_idx_svhn[num_train:]
train_idx_svhn = train_idx_svhn[:num_train]

print(f"{num_train} samples used as train set")
print(f"{num_val}  samples used as val set")

val_set_b = Subset(train_set_svhn, val_idx_svhn)
train_set_b = Subset(train_set_svhn, train_idx_svhn)
test_set_b = test_set_svhn

test_set = torch.utils.data.ConcatDataset([test_set_a, test_set_b])

# Define loaders

train_loader_a = DataLoader(train_set_a, batch_size=128, num_workers=0, shuffle=True, drop_last=True)
val_loader_a   = DataLoader(val_set_a,   batch_size=128, num_workers=0, shuffle=False, drop_last=False)
test_loader_a  = DataLoader(test_set_a,  batch_size=128, num_workers=0, shuffle=False, drop_last=False)

train_loader_b = DataLoader(train_set_b, batch_size=128, num_workers=0, shuffle=True, drop_last=True)
val_loader_b   = DataLoader(val_set_b,   batch_size=128, num_workers=0, shuffle=False, drop_last=False)
test_loader_b  = DataLoader(test_set_b,  batch_size=128, num_workers=0, shuffle=False, drop_last=False)

test_loader_all = DataLoader(test_set,batch_size=128, num_workers=0,shuffle=False,drop_last=False)


# Define dictionary of loaders
loaders = {"train": [train_loader_a,train_loader_b],
           "val":   [val_loader_a,val_loader_b],
           "test":  [test_loader_a,test_loader_b]}

Downloading http://ufldl.stanford.edu/housenumbers/train_32x32.mat to http://ufldl.stanford.edu/housenumbers/train_32x32.mat


  0%|          | 0/182040794 [00:00<?, ?it/s]

Downloading http://ufldl.stanford.edu/housenumbers/test_32x32.mat to http://ufldl.stanford.edu/housenumbers/test_32x32.mat


  0%|          | 0/64275384 [00:00<?, ?it/s]

Num. training samples: 73257
Num. test samples:     26032
32553
61316
44598
62771
49643
55735
33610
29224
47794
11023
65932 samples used as train set
7325  samples used as val set


In [None]:
model1 = VGG16((1,32,32),batch_norm=True)
model2 = VGG16((1,32,32),batch_norm=True)
model3 = VGG16((1,32,32),batch_norm=True)
classifier = Classifier(num_classes=10)

In [None]:
classifier.load_state_dict(state_dict['classifier'])

<All keys matched successfully>

In [None]:
model1.load_state_dict(state_dict['vgg_a'])

<All keys matched successfully>

In [None]:
model2.load_state_dict(state_dict_svhn['vgg_a'])

<All keys matched successfully>

In [None]:
nets = [model1,model2,model3,classifier]

dev = torch.device('cuda')

parameters = set()

for n in nets:
  parameters |= set(n.parameters())

optimizer = torch.optim.SGD(parameters, lr = 0.01)
# Define a loss
#criterion = nn.BCEWithLogitsLoss()#,nn.BCEWithLogitsLoss(),nn.BCEWithLogitsLoss(),_lambda = 1)
criterion = nn.CrossEntropyLoss()
n_params = 0

In [None]:
final_test(nets, loaders, optimizer, criterion, dev=dev,save_param=True)

Accuracy test VGGA: 0.98497
Accuracy test VGGB: 0.93347
Accuracy test VGG*: 0.099402
Accuracy test VGGSTAR: 0.34536
