In [1]:
import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt
import copy
import numpy as np
from torchvision import datasets, transforms
import torch

from utils.sampling import mnist_iid, mnist_noniid, cifar_iid
from utils.options import args_parser
from models.Update import LocalUpdate
from models.Nets import MLP, CNNMnist, CNNCifar, LeNet, CNNMnist2
from models.Fed import FedAvg
from models.Fed import FedQAvg, Quantization, Quantization_Finite, my_score, my_score_Finite
from models.test import test_img

%load_ext autoreload
%autoreload 2

# 0. Define the Loss function

As we encode the labels as well, cross entropy function should take the one-hot vector with softed value as an input.
However, cross entorpy function supported by pytorch only takes one dimensional label (e.g [1,0,9,...] where entries presents class labels).
Hence, now I define my cross entropy function here.

In [106]:
def my_cross_entropy(input, target, size_average=True):
    """ Cross entropy that accepts soft targets
    Args:
         pred: predictions for neural network
         targets: targets, can be soft
         size_average: if false, sum is returned instead of mean

    Examples::

        input = torch.FloatTensor([[1.1, 2.8, 1.3], [1.1, 2.1, 4.8]])
        input = torch.autograd.Variable(out, requires_grad=True)

        target = torch.FloatTensor([[0.05, 0.9, 0.05], [0.05, 0.05, 0.9]])
        target = torch.autograd.Variable(y1)
        loss = cross_entropy(input, target)
        loss.backward()
    """
    logsoftmax = nn.LogSoftmax(dim=1)
    if size_average:
        return torch.mean(torch.sum(-target * logsoftmax(input) , dim=1))
    else:
        return torch.sum(torch.sum(-target * logsoftmax(input), dim=1))

## 0.1. Test my_cross_entropy

In [107]:
from torch import nn, autograd

input = torch.randn(3, 5, requires_grad=True)
# target = torch.empty(3, dtype=torch.long).random_(5)
target = torch.LongTensor([2,4,0])

one_hot = torch.nn.functional.one_hot(target,num_classes=5)

print(input.dim())
print(target)
print(one_hot)



2
tensor([2, 4, 0])
tensor([[0, 0, 1, 0, 0],
        [0, 0, 0, 0, 1],
        [1, 0, 0, 0, 0]])


In [117]:
model = nn.Linear(2, 2)
model.weight = torch.FloatTensor([[1,0],[0,1]])
print(model.weight)
print(model.bias)
x = torch.randn(1, 2)
# target = torch.randn(1, 2)
output = model(x)
print(x)
print(output)
# loss = my_loss(output, target)
# loss.backward()
# print(model.weight.grad)

TypeError: cannot assign 'torch.FloatTensor' as parameter 'weight' (torch.nn.Parameter or None expected)

In [108]:
print("\nfollowing outputs should be same.")

loss = nn.CrossEntropyLoss()
loss_defalut = loss(input, target)
print(loss_defalut)
loss_defalut.backward()
print(loss_defalut)


print(loss(input, target))
print(my_cross_entropy(input, one_hot))


following outputs should be same.
tensor(2.6336, grad_fn=<NllLossBackward>)
tensor(2.6336, grad_fn=<NllLossBackward>)
tensor(2.6336, grad_fn=<NllLossBackward>)
tensor(2.6336, grad_fn=<MeanBackward0>)


In [122]:
out = torch.FloatTensor([[0.05, 0.9, 0.05], [0.05, 0.05, 0.9], [0.9, 0.05, 0.05]])
out = torch.autograd.Variable(out)

# Categorical targets
y = torch.LongTensor([1, 2, 0])
y = torch.autograd.Variable(y)

# One-hot encoded targets
y1 = torch.FloatTensor([[0, 1, 0], [0, 0, 1], [1, 0, 0]])
y1 = torch.autograd.Variable(y1)

print(y1)

# Calculating the loss
loss_val = nn.CrossEntropyLoss()(out, y)
loss_val1 = nn.BCEWithLogitsLoss()(out, y1)

print(loss_val)
print(loss_val1)

tensor([[0., 1., 0.],
        [0., 0., 1.],
        [1., 0., 0.]])
tensor(0.6178)
tensor(0.5927)


### Check whether my_cross_entropy function works properly with soft-valued one-hot vector

In [128]:
target = torch.LongTensor([2,4,0])
one_hot = torch.nn.functional.one_hot(target,num_classes=5)
print(one_hot)
print(my_cross_entropy(input, one_hot))
print()

print("check the soft valued one-hot vector")
one_hot = torch.FloatTensor([[0,0.1, 0.9, 0.1, 0],[0, 0, 0, 0.1, 0.9],[1.1, -0.1, 0,0,0]])
print(one_hot)
print(my_cross_entropy(input, one_hot))

tensor([[0, 0, 1, 0, 0],
        [0, 0, 0, 0, 1],
        [1, 0, 0, 0, 0]])
tensor(2.6336, grad_fn=<MeanBackward0>)

check the soft valued one-hot vector
tensor([[ 0.0000,  0.1000,  0.9000,  0.1000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.1000,  0.9000],
        [ 1.1000, -0.1000,  0.0000,  0.0000,  0.0000]])
tensor(2.6009, grad_fn=<MeanBackward0>)


# 1. Load MNIST dataset

In [123]:
class my_argument:    
    epochs = 200    #"rounds of training"
    num_users = 15  # "number of users: K"
    frac = 0.5 #"the fraction of clients: C"
    local_ep = 1 #"the number of local epochs: E"
    local_bs = 200 #"local batch size: B"
    bs=128 #"test batch size"
    lr=0.01 #"learning rate"
    momentum=0.5 # "SGD momentum (default: 0.5)"
    split='user' # "train-test split type, user or sample"
    opt='ADAM'
    loss='Default' # 'Custom' or 'Default'

    # model arguments
    model = 'cnn'
    kernel_num=9 #, help='number of each kind of kernel')
    kernel_sizes='3,4,5' #  help='comma-separated kernel size to use for convolution')
    norm='None' #, help="batch_norm, layer_norm, or None")
    num_filters=32 #, help="number of filters for conv nets")
    max_pool='True' #help="Whether use max pooling rather than strided convolutions")

    # other arguments
    dataset='mnist' #, help="name of dataset")
    iid=1
    num_classes=10#, help="number of classes")
    num_channels=1#, help="number of channels of imges")
    gpu=1#, help="GPU ID, -1 for CPU")
    stopping_rounds=10#, help='rounds of early stopping')
    verbose='False'#, help='verbose print')
    seed=1#, help='random seed (default: 1)')
    
args = my_argument()

In [124]:
args.device = torch.device('cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu')

# load dataset and split users
if args.dataset == 'mnist':
    trans_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
    dataset_train = datasets.MNIST('../data/mnist/', train=True, download=True, transform=trans_mnist)
    dataset_test = datasets.MNIST('../data/mnist/', train=False, download=True, transform=trans_mnist)
    # sample users
    if args.iid:
        dict_users = mnist_iid(dataset_train, args.num_users)
    else:
        dict_users = mnist_noniid(dataset_train, args.num_users)
elif args.dataset == 'cifar':
    trans_cifar = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    dataset_train = datasets.CIFAR10('../data/cifar', train=True, download=True, transform=trans_cifar)
    dataset_test = datasets.CIFAR10('../data/cifar', train=False, download=True, transform=trans_cifar)
    if args.iid:
        dict_users = cifar_iid(dataset_train, args.num_users)
    else:
        exit('Error: only consider IID setting in CIFAR10')
else:
    exit('Error: unrecognized dataset')
img_size = dataset_train[0][0].shape

# print(dataset_train[1])

# 2. Train CNN with Torch's CrossEntropy function

In [74]:
net_glob = CNNMnist2(args=args)
net_glob.cuda()

CNNMnist2(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv2_drop): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=1024, out_features=10, bias=True)
)

In [125]:
print(net_glob)
net_glob.train()

# copy weights
w_glob = net_glob.state_dict()

# training
loss_train = []
loss_test_arr = []
acc_test_arr = []
cv_loss, cv_acc = [], []
val_loss_pre, counter = 0, 0
net_best = None
best_loss = None
val_acc_list, net_list = [], []

for iter in range(10): #args.epochs
    w_locals, loss_locals = [], []
    m = 15
    idxs_users = np.random.choice(range(args.num_users), m, replace=False)
    for idx in idxs_users:
#         print(idx)
        local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx])
        w, loss = local.train(net=copy.deepcopy(net_glob).cuda())
        w_locals.append(copy.deepcopy(w))
        loss_locals.append(copy.deepcopy(loss))
    # update global weights
    w_glob = FedAvg(w_locals)

    # copy weight to net_glob
    net_glob.load_state_dict(w_glob)

    # print loss
    loss_avg = sum(loss_locals) / len(loss_locals)
    
    loss_train.append(loss_avg)
    
    acc_test, loss_test = test_img(net_glob, dataset_test, args)
    acc_test_arr.append(acc_test)
    loss_test_arr.append(loss_test)
    if iter % 1 ==0:
        print('Round {:3d}, Average loss {:.3f} Test accuracy {:.3f}'.format(iter, loss_avg,acc_test))
    #print(loss_train)

CNNMnist2(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv2_drop): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=1024, out_features=10, bias=True)
)

Test set: Average loss: 2.3026 
Accuracy: 980/10000 (9.80%)

Round   0, Average loss 2.343 Test accuracy 9.800

Test set: Average loss: 2.3013 
Accuracy: 1823/10000 (18.23%)

Round   1, Average loss 2.098 Test accuracy 18.230

Test set: Average loss: 0.7712 
Accuracy: 8988/10000 (89.88%)

Round   2, Average loss 1.264 Test accuracy 89.880

Test set: Average loss: 0.1878 
Accuracy: 9523/10000 (95.23%)

Round   3, Average loss 0.661 Test accuracy 95.230

Test set: Average loss: 0.1428 
Accuracy: 9633/10000 (96.33%)

Round   4, Average loss 0.478 Test accuracy 96.330

Test set: Average loss: 0.1150 
Accuracy: 9677/10000 (96.77%)

Round   5, Average loss 0.403 Test accuracy 96.770

Test set: Average loss: 0.1033 
Accuracy: 9703/10000 (97.03%)

Round

# 3. Train CNN with Customized Loss function

In [126]:
args.loss='Custom' # 'Custom' or 'Default'

In [127]:
net_glob = CNNMnist2(args=args)
net_glob.cuda()
net_glob.train()

# copy weights
w_glob = net_glob.state_dict()

# training
loss_train = []
loss_test_arr = []
acc_test_arr = []
cv_loss, cv_acc = [], []
val_loss_pre, counter = 0, 0
net_best = None
best_loss = None
val_acc_list, net_list = [], []

for iter in range(10): #args.epochs
    w_locals, loss_locals = [], []
    m = 15
    idxs_users = np.random.choice(range(args.num_users), m, replace=False)
    for idx in range(args.num_users):
#         print(idx)
        local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx])
        w, loss = local.train(net=copy.deepcopy(net_glob).cuda())
        w_locals.append(copy.deepcopy(w))
        loss_locals.append(copy.deepcopy(loss))
    # update global weights
    w_glob = FedAvg(w_locals)

    # copy weight to net_glob
    net_glob.load_state_dict(w_glob)

    # print loss
    loss_avg = sum(loss_locals) / len(loss_locals)
    
    loss_train.append(loss_avg)
    
    acc_test, loss_test = test_img(net_glob, dataset_test, args)
    acc_test_arr.append(acc_test)
    loss_test_arr.append(loss_test)
    if iter % 1 ==0:
        print('Round {:3d}, Average loss {:.3f} Test accuracy {:.3f}'.format(iter, loss_avg,acc_test))
    #print(loss_train)


Test set: Average loss: 2.3026 
Accuracy: 980/10000 (9.80%)

Round   0, Average loss 2.309 Test accuracy 9.800

Test set: Average loss: 1.9062 
Accuracy: 8132/10000 (81.32%)

Round   1, Average loss 1.646 Test accuracy 81.320

Test set: Average loss: 0.1939 
Accuracy: 9448/10000 (94.48%)

Round   2, Average loss 0.732 Test accuracy 94.480

Test set: Average loss: 0.1276 
Accuracy: 9642/10000 (96.42%)

Round   3, Average loss 0.396 Test accuracy 96.420

Test set: Average loss: 0.1027 
Accuracy: 9705/10000 (97.05%)

Round   4, Average loss 0.320 Test accuracy 97.050

Test set: Average loss: 0.0857 
Accuracy: 9746/10000 (97.46%)

Round   5, Average loss 0.277 Test accuracy 97.460

Test set: Average loss: 0.0766 
Accuracy: 9767/10000 (97.67%)

Round   6, Average loss 0.249 Test accuracy 97.670

Test set: Average loss: 0.0694 
Accuracy: 9793/10000 (97.93%)

Round   7, Average loss 0.235 Test accuracy 97.930

Test set: Average loss: 0.0643 
Accuracy: 9805/10000 (98.05%)

Round   8, Average 

# 4. Train CNN by utilizing BACC

## 4.1. BACC encoding for MNIST dataset

In [212]:
class my_argument:    
    epochs = 200    #"rounds of training"
    num_users = 6  # "number of users: K"
    frac = 0.5 #"the fraction of clients: C"
    local_ep = 1 #"the number of local epochs: E"
    local_bs = 200 #"local batch size: B"
    bs=200 #"test batch size"
    lr=0.01 #"learning rate"
    momentum=0.5 # "SGD momentum (default: 0.5)"
    split='user' # "train-test split type, user or sample"
    opt='ADAM'
    loss='Custom' # 'Custom' or 'Default'

    # model arguments
    model = 'cnn'
    kernel_num=9 #, help='number of each kind of kernel')
    kernel_sizes='3,4,5' #  help='comma-separated kernel size to use for convolution')
    norm='None' #, help="batch_norm, layer_norm, or None")
    num_filters=32 #, help="number of filters for conv nets")
    max_pool='True' #help="Whether use max pooling rather than strided convolutions")

    # other arguments
    dataset='mnist' #, help="name of dataset")
    iid=1
    num_classes=10#, help="number of classes")
    num_channels=1#, help="number of channels of imges")
    gpu=1#, help="GPU ID, -1 for CPU")
    stopping_rounds=10#, help='rounds of early stopping')
    verbose='False'#, help='verbose print')
    seed=1#, help='random seed (default: 1)')
    
args = my_argument()

args.device = torch.device('cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu')

# load dataset and split users
trans_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
dataset_train = datasets.MNIST('../data/mnist/', train=True, download=True, transform=trans_mnist)
dataset_test = datasets.MNIST('../data/mnist/', train=False, download=True, transform=trans_mnist)

dict_users = mnist_iid(dataset_train, args.num_users)

In [213]:
encoding_input_array_np = np.empty((len(dataset_train),28*28))
encoding_label_array_np = np.empty((len(dataset_train),args.num_classes))
print("size of X:" ,encoding_input_array_np.shape)
print("size of Y:" ,encoding_label_array_np.shape)

Size_submatrices = int(60000/args.num_users)

for i in range(args.num_users):
    
    stt_pos = i*Size_submatrices
    end_pos = (i+1)*Size_submatrices
#     print(i,stt_pos,end_pos)
    Temp_train = DataLoader(DatasetSplit(dataset_train, dict_users[i]), batch_size=Size_submatrices, shuffle=True)
    
    for batch_idx, (images, labels) in enumerate(Temp_train):
        
        images_np = images.detach().cpu().numpy()
        encoding_input_array_np[stt_pos:end_pos,:] = np.reshape(images_np, (Size_submatrices,28*28))
#         print(encoding_input_array_np[stt_pos:end_pos,:].shape)

        onehot_labels = torch.nn.functional.one_hot(labels,num_classes=args.num_classes)
        labels_np = onehot_labels.detach().cpu().numpy()
#         print(labels_np.shape)
        encoding_label_array_np[stt_pos:end_pos,:] = labels_np


# print(labels_np[0:10,:])

size of X: (60000, 784)
size of Y: (60000, 10)


In [214]:
from utils.functions import *
import math

N= 15
K= args.num_users


j_array = np.array(range(K))
print("j: ",(2*j_array+1)*math.pi/2/K,'\n')

alpha_array = np.cos((2*j_array+1)*math.pi/(2*K)) #np.cos((2*j_array+1)*math.pi/(2*K))
print("alpha_array: ",alpha_array,'\n')

i_array = np.array(range(N))
z_array = np.cos(i_array*2*math.pi/N/2) # np.cos(i_array*2*math.pi/N/2)
print("z_array: ",z_array,'\n')

X_tilde = BACC_Enc(encoding_input_array_np, alpha_array, z_array)
y_tilde = BACC_Enc(encoding_label_array_np, alpha_array, z_array)

j:  [0.26179939 0.78539816 1.30899694 1.83259571 2.35619449 2.87979327] 

alpha_array:  [ 0.96592583  0.70710678  0.25881905 -0.25881905 -0.70710678 -0.96592583] 

z_array:  [ 1.          0.9781476   0.91354546  0.80901699  0.66913061  0.5
  0.30901699  0.10452846 -0.10452846 -0.30901699 -0.5        -0.66913061
 -0.80901699 -0.91354546 -0.9781476 ] 

@BACC_Enc: N,K, m_i= 15 6 10000 

@BACC_Enc: N,K, m_i= 15 6 10000 



In [215]:
print(X_tilde.shape)
print(y_tilde.shape)

(15, 10000, 784)
(15, 10000, 10)


In [222]:
from models.Update import LocalUpdate_with_BACC
from models.Fed import FedAvg_with_BACC_Dec

net_glob = CNNMnist2(args=args)
net_glob.cuda()
net_glob.train()

# copy weights
w_glob = net_glob.state_dict()

# training
loss_train_arr = []
loss_test_arr = []
acc_test_arr = []
cv_loss, cv_acc = [], []
val_loss_pre, counter = 0, 0
net_best = None
best_loss = None
val_acc_list, net_list = [], []

for iter in range(10): #args.epochs
    w_locals, loss_locals = [], []
    m = 15
    idxs_users = np.random.choice(range(N), m, replace=False)
    dec_z_array = []
    for idx in idxs_users: #for idx in range(N):
#         print(idx)
        local = LocalUpdate_with_BACC(args=args, dataset=X_tilde[idx,:,:], label=y_tilde[idx,:,:])
        w, loss = local.train(net=copy.deepcopy(net_glob).cuda())
        w_locals.append(copy.deepcopy(w))
        loss_locals.append(copy.deepcopy(loss))
        
        dec_z_array.append(z_array[idx])
    
    
    # update global weights
    #w_glob = FedAvg(w_locals)
    w_glob = FedAvg_with_BACC_Dec(w_locals, alpha_array, dec_z_array)

    # copy weight to net_glob
    net_glob.load_state_dict(w_glob)

    # print loss
#     acc_train, loss_train = test_img(net_glob, dataset_train, args)
    
#     loss_train_arr.append(loss_train)
    
    acc_test, loss_test = test_img(net_glob, dataset_test, args)
    acc_test_arr.append(acc_test)
    loss_test_arr.append(loss_test)
    if iter % 1 ==0:
        print('Round {:3d}, Average loss {:.3f} Test accuracy {:.3f}'.format(iter, loss_test, acc_test))
    #print(loss_train)


Test set: Average loss: 2.2958 
Accuracy: 1855/10000 (18.55%)

Round   0, Average loss 2.296 Test accuracy 18.550

Test set: Average loss: 57.1782 
Accuracy: 3785/10000 (37.85%)

Round   1, Average loss 57.178 Test accuracy 37.850

Test set: Average loss: 2.2479 
Accuracy: 980/10000 (9.80%)

Round   2, Average loss 2.248 Test accuracy 9.800

Test set: Average loss: 1.3262 
Accuracy: 8581/10000 (85.81%)

Round   3, Average loss 1.326 Test accuracy 85.810

Test set: Average loss: 0.1843 
Accuracy: 9470/10000 (94.70%)

Round   4, Average loss 0.184 Test accuracy 94.700

Test set: Average loss: 0.3227 
Accuracy: 9526/10000 (95.26%)

Round   5, Average loss 0.323 Test accuracy 95.260

Test set: Average loss: 2.4643 
Accuracy: 9387/10000 (93.87%)

Round   6, Average loss 2.464 Test accuracy 93.870

Test set: Average loss: 1.5965 
Accuracy: 9532/10000 (95.32%)

Round   7, Average loss 1.597 Test accuracy 95.320

Test set: Average loss: 19.5999 
Accuracy: 9297/10000 (92.97%)

Round   8, Avera