In [1]:
import json
import os
import random
from tqdm import tqdm
import numpy as np
import torch, torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data.dataset import Dataset, TensorDataset  
torch.backends.cudnn.benchmark=True
from pyhessian import hessian # Hessian computation
import scipy.io
import matplotlib.pyplot as plt


In [2]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Inputs to hidden layer linear transformation
        self.hidden = nn.Linear(784, 784)
        self.hidden2 = nn.Linear(784, 600)
        self.hidden3 = nn.Linear(600, 400)
        self.hidden4 = nn.Linear(400, 200)
        # Output layer, 62 units 
        self.output = nn.Linear(200, 26)
        
        # Define sigmoid activation and softmax output 
        self.ReLu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        # Pass the input tensor through each of our operations
        x = torch.reshape(x, (-1, 784))
        x = self.hidden(x)
        x = self.ReLu(x)
        x = self.hidden2(x)
        x = self.ReLu(x)
        x = self.hidden3(x)
        x = self.ReLu(x)
        x = self.hidden4(x)
        x = self.ReLu(x)
        x = self.output(x)
        x = self.softmax(x)
        
        return x

In [3]:
def client_update(client_model, optimizer, train_loader, mode, epoch=5):
    """
    This function updates/trains client model on client data
    """
    client_model.train()
    for e in range(epoch):
        for batch_idx, (inputs, target) in enumerate(train_loader):
            inputs, target = inputs.cuda(), target.cuda()
            optimizer.zero_grad()
            output = client_model(inputs)
            loss = nn.functional.nll_loss(output, target)
            loss.backward()
            optimizer.step()
    if mode == 'Average':
        return loss.item()
    if mode == 'HessFuse':
        client_model.eval()
        for batch_idx, (inputs, target) in enumerate(train_loader):
                inputs, target = inputs.cuda(), target.cuda()
                loss2 = torch.nn.CrossEntropyLoss()
                hessian_comp = hessian(client_model, loss2, data=(inputs, target), cuda=True)
                top_eigenvalues, top_eigenvector = hessian_comp.eigenvalues()
                break

        return loss.item(), top_eigenvalues

In [4]:
def server_aggregate(global_model, client_models, weights):
    """
    This function has aggregation method 'mean'
    """
    ### This will take simple mean of the weights of models ###
    global_dict = global_model.state_dict()
    for k in global_dict.keys():
        global_dict[k] = torch.stack([(weights[0]*(client_models[0].state_dict()[k].float())) for i in range(len(client_models))], 0).mean(0)
            
    global_model.load_state_dict(global_dict)
    for model in client_models:
        model.load_state_dict(global_model.state_dict())

In [5]:
def test(global_model, test_loader):
    """This function test the global model on test data and returns test loss and test accuracy """
    global_model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        count = 0
        for data, target in test_loader:
            data, target = data.cuda(), target.cuda()
#             target = torch.nn.functional.one_hot(target)
            output = global_model(data)
            test_loss += nn.functional.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            count += 1
        
    test_loss /= len(test_loader.dataset)
    acc = correct / len(test_loader.dataset)

    return test_loss, acc

In [6]:
num_clients = 100
num_selected = 100
num_rounds = 20
epochs = 5
batch_size = 100
random.seed(13)
np.random.seed(13)
torch.manual_seed(13)

mat = scipy.io.loadmat('./dataset/emnist-letters.mat')
data = mat["dataset"]
writer_ids_train = data['train'][0,0]['writers'][0,0]
writer_ids_train = np.squeeze(writer_ids_train)
X_train = data['train'][0,0]['images'][0,0]
X_train = X_train.reshape((X_train.shape[0], 28, 28), order = "F")
y_train = data['train'][0,0]['labels'][0,0]
y_train = np.squeeze(y_train)
y_train -= 1 #y_train is zero-based
indtemp = list()
vec = writer_ids_train%num_clients
for i in range(num_clients):
    indi = np.where(vec == i)
    indtemp.append(list(indi[0]))

train_loader = [torch.utils.data.DataLoader(TensorDataset(torch.FloatTensor(X_train[indtemp[i]][:][:]), torch.LongTensor(y_train[indtemp[i]][:][:])), batch_size=batch_size, shuffle=True) for i in range(num_clients)]
    

writer_ids_test = data['test'][0,0]['writers'][0,0]
writer_ids_test = np.squeeze(writer_ids_test)
X_test = data['test'][0,0]['images'][0,0]
X_test= X_test.reshape((X_test.shape[0], 28, 28), order = "F")
y_test = data['test'][0,0]['labels'][0,0]
y_test = np.squeeze(y_test)
y_test -= 1 #y_test is zero-based
test_loader = torch.utils.data.DataLoader(TensorDataset(torch.FloatTensor(X_test), torch.LongTensor(y_test)), batch_size=batch_size, shuffle=True)
 


In [7]:
############################################
#### Initializing models and optimizer  ####
############################################

# #### global model ##########
# global_model =  VGG('VGG19').cuda()
global_model =  Network().cuda()
# ############## client models ##############
# client_models = [ VGG('VGG19').cuda() for _ in range(num_selected)]
client_models = [ Network().cuda() for _ in range(num_selected)]
for model in client_models:
    model.load_state_dict(global_model.state_dict()) ### initial synchronizing with global model 

############### optimizers ################
opt = [optim.Adam(model.parameters(), lr=0.00001) for model in client_models]


###### List containing info about learning #########
losses_train = []
losses_test = []
acc_train = []
acc_test = []
# Runnining FL
mode = 'Average'
for r in range(num_rounds):
    # select random clients
    client_idx = np.random.permutation(num_clients)[:num_selected]
    # client update
    losstot = 0
    eigs = np.ones(num_selected)
    for i in tqdm(range(num_selected)):
        if mode == 'HessFuse':
            loss, eigss = client_update(client_models[client_idx[i]], opt[client_idx[i]], train_loader[client_idx[i]], mode, epoch=epochs)
            eigs[i] = eigss[0]
        if mode == 'Average':
            loss = client_update(client_models[client_idx[i]], opt[client_idx[i]], train_loader[client_idx[client_idx[i]]], mode, epoch=epochs)
        losstot += loss
        
    weights = eigs/(np.sum(eigs))
    losses_train.append(loss)
    # server aggregate
    server_aggregate(global_model, client_models, weights*num_selected)
    
    test_loss, acc = test(global_model.eval(), test_loader)
    losses_test.append(test_loss)
    acc_test.append(acc)
    print('%d-th round' % r)
    print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (loss / num_selected, test_loss, acc))

 27%|██▋       | 27/100 [00:04<00:13,  5.41it/s]


RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 10.73 GiB total capacity; 993.02 MiB already allocated; 16.06 MiB free; 1.04 GiB reserved in total by PyTorch)

In [None]:
# import random
# choose = (random.randrange(len(datareally['y'])))

# datareally = (dd['f1816_24'])
# x1 = datareally['x'][choose]
# print(datareally['y'][choose])

# import numpy as np
# x1 = np.zeros((784,63))
# counters = np.zeros((63))
# print(x1.shape)
# for i in range(len(datareally['y'])):
#     print(np.array(datareally['x'][i]).shape)
#     label = datareally['y'][i]
#     counters[label] += 1 
#     x1[:, label] += np.array(datareally['x'][i])

# import matplotlib.pyplot as plt
# fig, axes = plt.subplots(8,8, figsize=(8,8))
# for i,ax in enumerate(axes.flat):
#     tempol = x1[:, i]/counters[i]
#     xplot = np.reshape(np.ravel(tempol), (28, 28))
#     ax.imshow(xplot)

