In [69]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from dlc_practical_prologue import load_data
from models import CNN
import numpy as np

from torch.utils.data import DataLoader, TensorDataset

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [123]:
#Global constants shared among all clients that we vary in testing
EPOCHS = 10
BATCH_SIZE = 50
NUM_CLIENTS = 5
ACCURACY_THRESHOLD = 99

In [124]:
def no_sparsification():
    return 0

In [125]:
class Client:
    def __init__(self,train_data_loader):
        self.train_data_loader = train_data_loader
        self.batch_size = BATCH_SIZE
        self.epochs = EPOCHS
        self.lr = 0.001
        self.log_interval = 5
        self.seed = 42
        self.save_model = False
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = CNN().to(self.device)
        self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr)
        self.gradient_compression = None
        self.criterion = torch.nn.CrossEntropyLoss()
        self.gradient_compression = no_sparsification
        self.model_name = "mnist_cnn"


In [126]:
def train(client, epoch, logging=True):
    
    # put model in train mode, we need gradients
    client.model.train()
    train_loader = client.train_data_loader
    train_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        client.optimizer.zero_grad()
        output = client.model(data)
        # get the basic loss for our main task
        total_loss = client.criterion(output, target)
        total_loss.backward()
        train_loss += total_loss.item()
        client.optimizer.step()
    _, train_accuracy = test(client, train_loader, logging=False)
    if logging:
        print(f'Train Epoch: {epoch} Loss: {total_loss.item():.6f}, Train accuracy: {train_accuracy}')
    return train_loss, train_accuracy


def test(config, test_loader, logging=True):
    # put model in eval mode, disable dropout etc.
    config.model.eval()
    test_loss = 0
    correct = 0
    # disable grad to perform testing quicker
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(config.device), target.to(config.device)
            output = config.model(data)
            test_loss += config.criterion(output, target).item()
            # prediction is an output with maximal probability
            pred = output.argmax(1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    if logging:
        print(f'Test set: Average loss: {test_loss:.4f}, '
              f'Test accuracy: {correct} / {len(test_loader.dataset)} '
              f'({test_accuracy:.0f}%)\n')
    return test_loss, test_accuracy


def get_data_loaders(batch_size, data_split_type='iid', percentage_val=0.2):
    val_loader = None
    train_input, train_target, test_input, test_target = load_data(flatten = False)
    train_dataset = TensorDataset(train_input, train_target)
 
    # if validation set is needed randomly split training set
    if percentage_val:
        val_dataset, train_dataset = torch.utils.data.random_split(train_dataset,
                                                               (int(percentage_val*len(train_dataset)),
                                                                int((1-percentage_val)*len(train_dataset)))
                                                               )
        val_loader = DataLoader(dataset=val_dataset,
                                batch_size=batch_size,
                                shuffle=True)
    #Split data for each client
    if data_split_type=='iid':
        # Random IID data split
        client_datasets = torch.utils.data.random_split(train_dataset,np.tile(int(len(train_dataset)/NUM_CLIENTS),NUM_CLIENTS).tolist())
    #else:
        # TODO: code non-iid data split
        
    train_loaders = []
    for train_dataset in worker_datasets:
        train_loader = DataLoader(dataset=train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True)
        train_loaders.append(train_loader)
        
    test_loader = DataLoader(dataset=TensorDataset(test_input, test_target),
                             batch_size=batch_size)
    return train_loaders, val_loader, test_loader

In [127]:
#Load data
train_loaders, val_loader, test_loader = get_data_loaders(config.batch_size) #!!!make this function return variable number of vectors

#Initialize all clients
clients = []
for train_loader in train_loaders:
    client = Client(train_loader)
    clients.append(client)
torch.manual_seed(clients[0].seed)

* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 5000 train and 5000 test samples


<torch._C.Generator at 0x19f6ae0d250>

In [128]:
testing_accuracy = 0
bits_transferred = 0
num_rounds = 0
bits_conserved = 0

#while testing_accuracy < ACCURACY_THRESHOLD:

#Perform E local training steps for each client
for client_idx, client in enumerate(clients):
    print("Training client {0}".format(client_idx))
    for epoch in range(1, client.epochs + 1):
        train(client, epoch)

# if config.save_model:
#     torch.save(config.model.state_dict(), f"{config.model_name}.pt")

Training client 0
Train Epoch: 1 Loss: 1.247042, Train accuracy: 69.875
Train Epoch: 2 Loss: 0.811745, Train accuracy: 85.125
Train Epoch: 3 Loss: 0.468260, Train accuracy: 93.5
Train Epoch: 4 Loss: 0.274624, Train accuracy: 94.75
Train Epoch: 5 Loss: 0.249253, Train accuracy: 97.125
Train Epoch: 6 Loss: 0.173937, Train accuracy: 97.0
Train Epoch: 7 Loss: 0.111779, Train accuracy: 98.625
Train Epoch: 8 Loss: 0.066257, Train accuracy: 99.5
Train Epoch: 9 Loss: 0.090676, Train accuracy: 99.75
Train Epoch: 10 Loss: 0.082974, Train accuracy: 99.25
Training client 1
Train Epoch: 1 Loss: 1.157964, Train accuracy: 70.75
Train Epoch: 2 Loss: 0.992068, Train accuracy: 78.625
Train Epoch: 3 Loss: 0.349507, Train accuracy: 89.75
Train Epoch: 4 Loss: 0.167553, Train accuracy: 95.125
Train Epoch: 5 Loss: 0.167555, Train accuracy: 97.0
Train Epoch: 6 Loss: 0.280127, Train accuracy: 97.875
Train Epoch: 7 Loss: 0.123644, Train accuracy: 98.875
Train Epoch: 8 Loss: 0.068722, Train accuracy: 99.375
Trai

In [130]:
dicts = []
for client in clients:
    client_dict = dict(client.model.named_parameters())
    dicts.append(client_dict)

conv1.weight
conv1.bias
conv2.weight
conv2.bias
fc1.weight
fc1.bias
fc2.weight
fc2.bias
/n
conv1.weight
conv1.bias
conv2.weight
conv2.bias
fc1.weight
fc1.bias
fc2.weight
fc2.bias
/n
conv1.weight
conv1.bias
conv2.weight
conv2.bias
fc1.weight
fc1.bias
fc2.weight
fc2.bias
/n
conv1.weight
conv1.bias
conv2.weight
conv2.bias
fc1.weight
fc1.bias
fc2.weight
fc2.bias
/n
conv1.weight
conv1.bias
conv2.weight
conv2.bias
fc1.weight
fc1.bias
fc2.weight
fc2.bias
/n


In [110]:
dict(config.model.named_parameters())

{'conv1.weight': Parameter containing:
 tensor([[[[ 1.9773e-03,  1.1044e-01, -1.6238e-01, -1.4470e-01, -7.4847e-02],
           [ 5.8908e-02,  2.2796e-03,  1.6114e-01, -1.6700e-02,  5.4160e-02],
           [-5.5401e-02, -3.2503e-02, -1.8861e-01, -1.3303e-01, -8.4351e-02],
           [ 1.2069e-02,  8.5325e-02,  1.2217e-01, -1.3671e-01, -9.0038e-02],
           [ 7.7324e-02,  1.7722e-01, -3.2954e-02,  1.5331e-01, -3.2883e-02]]],
 
 
         [[[ 2.5443e-02,  1.8507e-01, -1.8346e-01, -1.2522e-01, -5.1196e-02],
           [-7.5703e-02,  1.7448e-01, -1.2820e-01, -9.2946e-02, -1.4053e-01],
           [-1.8706e-01, -1.1534e-01,  1.7365e-01,  8.8473e-02,  9.6891e-02],
           [ 1.0899e-02, -1.0048e-01,  3.5661e-02, -1.8693e-01, -1.4493e-01],
           [-1.0128e-01,  1.2877e-01,  1.1980e-01, -8.7872e-02, -6.6731e-03]]],
 
 
         [[[ 1.2108e-01,  1.9511e-01,  7.9918e-02,  3.1734e-02,  1.3358e-01],
           [-1.2329e-01,  3.3243e-02, -1.5186e-01, -1.3595e-01, -1.0449e-01],
           [ 

In [104]:
#Load data
train_loaders, val_loader, test_loader = get_data_loaders(config.batch_size) #!!!make this function return variable number of vectors


* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 5000 train and 5000 test samples


In [105]:
print(len(train_loaders[0]))

16
