In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torchvision import datasets, transforms
import random
import time
import sys

sys.path.append("..")
from utils.tool import GPU_info, write_log, print_label_stat
from models.resnet import ResNet8, ResNet18, ResNet50
from models.mobilenet import MobileNet_S, MobileNet_M, MobileNet_L
from framework.ours import Device
from framework.DSGD import DSGD_Device
from framework.SISA import SISA_Device
from framework.FedAvgUnl import FedAvgServer, FedAvgClient


# one in ['ours', 'DSGD', 'SISA', 'Fed']
Framework = 'ours'

# one in ['MNIST', 'FMNIST', 'CIFAR10']
DatasetName = 'FMNIST'

# one in ['resnet', 'mobilenet']
ModelType = 'resnet'

# one in [1,0]. 1 - Heterogeneous;  0 - Homogeneous
Heterogeneous = 1

In [2]:
train_set_o = None
test_set_o = None
num_classes = 10
device_num = 6
num_channel = 1
ref_size = 10000
train_test_total_size = int(60000/device_num)
test_ratio = 0.2
CIFAR10_segmentation = 0

train_batch_size = 256 if ModelType == 'resnet' else 32
save_path = './checkpoint'
data_path = '../data'
log_path = '../log/{}_{}_{}.txt'.format(Framework, ModelType, DatasetName)

my_seed = 1
torch.cuda.manual_seed(my_seed)

if DatasetName == 'CIFAR10':
    train_set_o = datasets.CIFAR10(data_path, train=True, download=True)
    test_set_o = datasets.CIFAR10(data_path, train=False, download=True)
    device_num = 5
    num_channel = 3
    CIFAR10_segmentation = 1
    train_test_total_size = int(50000/device_num)
    num_iter = 800 if ModelType == 'resnet' else 500
    

elif DatasetName == 'MNIST':    
    train_set_o = datasets.MNIST(data_path, train=True, download=True)
    test_set_o = datasets.MNIST(data_path, train=False, download=True)
    num_iter = 800 if ModelType == 'resnet' else 500

elif DatasetName == 'FMNIST':    
    train_set_o = datasets.FashionMNIST(data_path, train=True, download=True)
    test_set_o = datasets.FashionMNIST(data_path, train=False, download=True)
    num_iter = 500 if ModelType == 'resnet' else 500

train_set_o.transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

test_set_o.transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

ref_set = Subset(test_set_o, range(0,int(ref_size)))
ref_loader = torch.utils.data.DataLoader(ref_set, batch_size=train_batch_size,
                                         shuffle=False, num_workers=0)

In [3]:
# data manipulation
device_dict = {}
loader_dict = {}
print('Device', end='\t')
for class_id in range(num_classes):
    print('Class'+str(class_id), end='\t')
print('SUM')

for device_id in range(device_num):
    range_start = train_test_total_size * device_id
    range_end = range_start + train_test_total_size
    
    # remove one class from each local dataset
    class_to_remove = torch.tensor(device_id%10)
    indices = (torch.tensor(train_set_o.targets[range_start:range_end])[..., None] !=
               class_to_remove).any(-1).nonzero(as_tuple=True)[0]
    
    # split train&test
    train_test_border = int((1-test_ratio)*len(indices))
    train_set = Subset(train_set_o, indices[:train_test_border]+range_start)
    test_set = Subset(train_set_o, indices[train_test_border:]+range_start)
    train_loader = DataLoader(train_set, batch_size=train_batch_size,
                              shuffle=True, num_workers=0)
    test_loader = DataLoader(test_set, batch_size=train_batch_size,
                             shuffle=True, num_workers=0)
    
    loader_dict[device_id] = [train_loader, test_loader]
    print_label_stat(device_id, train_set, num_classes)
    
# initialize devices
for device_id in range(device_num):
    gpu_id = 0
    device_dict[device_id] = Device(device_id, gpu_id, num_classes, num_channel)
    
    
# assign neighbors
for k, v in device_dict.items():
    # full neighbor list
    neighbor_list = [x for x in device_dict.keys() if x != k]  
    v.neighbor_list = neighbor_list

Device	Class0	Class1	Class2	Class3	Class4	Class5	Class6	Class7	Class8	Class9	SUM
D-0	0	858	809	806	762	794	806	818	792	801	7246
D-1	800	0	778	793	791	817	823	794	779	826	7201
D-2	827	799	0	787	813	806	793	825	796	748	7194
D-3	821	768	764	0	797	803	790	795	831	827	7196
D-4	785	798	839	779	0	793	787	784	824	816	7205
D-5	829	801	798	828	826	0	781	776	759	805	7203


In [4]:
# heterogenerous scenario 
write_log(log_path, DatasetName+' heterogenerous')
if Heterogeneous == 1:
    # heterogeneous scenario
    write_log(log_path, 'heterogeneous')
    for k, v in device_dict.items():
        if k < 1:
            v.main_model = ResNet8(num_channel) if ModelType == 'resnet' else MobileNet_S(num_channel)
        elif k < 2:
            v.main_model = ResNet18(num_channel) if ModelType == 'resnet' else MobileNet_M(num_channel)
        else:
            v.main_model = ResNet50(num_channel) if ModelType == 'resnet' else MobileNet_L(num_channel)
        v.main_model.cuda(v.gpu_id)
        v.optimizer = optim.Adam(v.main_model.parameters(), lr=0.01)
else:
    # homogeneous scenario
    write_log(log_path, 'homogeneous')
    for k, v in device_dict.items():
        v.main_model = ResNet50(num_channel) if ModelType == 'resnet' else MobileNet_L(num_channel)
        v.main_model.cuda(v.gpu_id)
        v.optimizer = optim.Adam(v.main_model.parameters(), lr=0.01)

In [5]:
# train main models
metric = []
for k, v in device_dict.items():
    v.train_main_model(num_iter, loader_dict[k][0])
    metric.append(v.validate_main_model(loader_dict[k][1]))
    v.update_soft_label(ref_loader)
    GPU_info([v.gpu_id])
metric_arr = np.array(metric)
log_txt = 'Avg_acc: {:.4f}'.format(np.mean(metric_arr, axis=0)[0])
print(log_txt)


Device: 0 main model training
Epoch:  0		Loss: 1.21746373
Epoch: 10		Loss: 0.53577191
Epoch: 20		Loss: 0.32850245
Epoch: 30		Loss: 0.20228770
Epoch: 40		Loss: 0.13213044
Epoch: 50		Loss: 0.13277917
Epoch: 60		Loss: 0.14531061
Epoch: 70		Loss: 0.16610773
Epoch: 80		Loss: 0.07380203
Epoch: 90		Loss: 0.14383794
Epoch:100		Loss: 0.05892838
Epoch:110		Loss: 0.06962246
Epoch:120		Loss: 0.06628524
Epoch:130		Loss: 0.11647382
Epoch:140		Loss: 0.07558455
Epoch:150		Loss: 0.06281471
Epoch:160		Loss: 0.02802017
Epoch:170		Loss: 0.01629605
Epoch:180		Loss: 0.01393493
Epoch:190		Loss: 0.10623108
Epoch:200		Loss: 0.01650992
Epoch:210		Loss: 0.02659196
Epoch:220		Loss: 0.00240825
Epoch:230		Loss: 0.00559117
Epoch:240		Loss: 0.00283579
Epoch:250		Loss: 0.01416112
Epoch:260		Loss: 0.04713580
Epoch:270		Loss: 0.03538360
Epoch:280		Loss: 0.02858193
Epoch:290		Loss: 0.00392096
Epoch:300		Loss: 0.01569435
Epoch:310		Loss: 0.00914931
Epoch:320		Loss: 0.00770910
Epoch:330		Loss: 0.00236099
Epoch:340		Loss: 

Epoch:220		Loss: 0.02011359
Epoch:230		Loss: 0.00008360
Epoch:240		Loss: 0.03064401
Epoch:250		Loss: 0.00043667
Epoch:260		Loss: 0.00313426
Epoch:270		Loss: 0.01532616
Epoch:280		Loss: 0.00051865
Epoch:290		Loss: 0.06287570
Epoch:300		Loss: 0.00009575
Epoch:310		Loss: 0.00001067
Epoch:320		Loss: 0.03924227
Epoch:330		Loss: 0.00253400
Epoch:340		Loss: 0.04380498
Epoch:350		Loss: 0.00088102
Epoch:360		Loss: 0.00018745
Epoch:370		Loss: 0.00058371
Epoch:380		Loss: 0.06780052
Epoch:390		Loss: 0.00136377
Epoch:400		Loss: 0.01064054
Epoch:410		Loss: 0.06547683
Epoch:420		Loss: 0.00017595
Epoch:430		Loss: 0.00076400
Epoch:440		Loss: 0.00001085
Epoch:450		Loss: 0.03512583
Epoch:460		Loss: 0.00001868
Epoch:470		Loss: 0.00096993
Epoch:480		Loss: 0.00003526
Epoch:490		Loss: 0.00002178
Device: 5 Val_main - Avg_loss: 0.8095, Acc: 1602.0/1801 (0.8895)
GPU0-0.0566G  
Avg_acc: 0.8938


In [6]:
# train seed models
T_ = 1
num_iter = 800 if ModelType == 'resnet' else 3000
for k, v in device_dict.items():
    if Heterogeneous == 1:
        v.seed_model = ResNet8(num_channel) if ModelType == 'resnet' else MobileNet_S(num_channel)
    else:
        v.seed_model = ResNet18(num_channel) if ModelType == 'resnet' else MobileNet_M(num_channel)
    v.seed_model.cuda(v.gpu_id) 
    v.train_seed_model(ref_set, num_iter=num_iter, batch_size=int(ref_size*0.2), T=T_)
    print('\n')
    
# overall performance
write_log(log_path, time.ctime(time.time())+' T='+str(T_))
write_log(log_path, 'AvgAcc\tAvgPre\tAvgRec\tAvgf1')
for rho in range(0,11):
    metric = []
    for k, v in device_dict.items():
        metric.append(v.validate_ensamble(test_loader=loader_dict[k][1], device_dict=device_dict, rho=rho/10))
    metric_arr=np.array(metric)
    log_txt = 'rho = {:.2f}   Avg_acc: {:.4f}'.format(rho/10, np.mean(metric_arr, axis=0)[0])
    print(log_txt)
    write_log(log_path, '{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}'.format(np.mean(metric_arr, axis=0)[0], 
                                                                np.mean(metric_arr, axis=0)[1],
                                                                np.mean(metric_arr, axis=0)[2],
                                                                np.mean(metric_arr, axis=0)[3]))

Device:0 Trn_Seed - Epoch:   0 	Loss: 2.723957
Device:0 Trn_Seed - Epoch:  50 	Loss: 0.809132
Device:0 Trn_Seed - Epoch: 100 	Loss: 0.446049
Device:0 Trn_Seed - Epoch: 150 	Loss: 0.345833
Device:0 Trn_Seed - Epoch: 200 	Loss: 0.297020
Device:0 Trn_Seed - Epoch: 250 	Loss: 0.278978
Device:0 Trn_Seed - Epoch: 300 	Loss: 0.247915
Device:0 Trn_Seed - Epoch: 350 	Loss: 0.243090
Device:0 Trn_Seed - Epoch: 400 	Loss: 0.223334
Device:0 Trn_Seed - Epoch: 450 	Loss: 0.202852
Device:0 Trn_Seed - Epoch: 500 	Loss: 0.193118
Device:0 Trn_Seed - Epoch: 550 	Loss: 0.176410
Device:0 Trn_Seed - Epoch: 600 	Loss: 0.163427
Device:0 Trn_Seed - Epoch: 650 	Loss: 0.160858
Device:0 Trn_Seed - Epoch: 700 	Loss: 0.158687
Device:0 Trn_Seed - Epoch: 750 	Loss: 0.140783


Device:1 Trn_Seed - Epoch:   0 	Loss: 2.398191
Device:1 Trn_Seed - Epoch:  50 	Loss: 0.783617
Device:1 Trn_Seed - Epoch: 100 	Loss: 0.448096
Device:1 Trn_Seed - Epoch: 150 	Loss: 0.363692
Device:1 Trn_Seed - Epoch: 200 	Loss: 0.347044
Device:1 Tr

In [7]:
# train seed models
T_ = 3
num_iter = 800 if ModelType == 'resnet' else 3000
for k, v in device_dict.items():
    if Heterogeneous == 1:
        v.seed_model = ResNet8(num_channel) if ModelType == 'resnet' else MobileNet_S(num_channel)
    else:
        v.seed_model = ResNet18(num_channel) if ModelType == 'resnet' else MobileNet_M(num_channel)
    v.seed_model.cuda(v.gpu_id) 
    v.train_seed_model(ref_set, num_iter=num_iter, batch_size=int(ref_size*0.2), T=T_)
    print('\n')
    
# overall performance
write_log(log_path, time.ctime(time.time())+' T='+str(T_))
write_log(log_path, 'AvgAcc\tAvgPre\tAvgRec\tAvgf1')
for rho in range(0,11):
    metric = []
    for k, v in device_dict.items():
        metric.append(v.validate_ensamble(test_loader=loader_dict[k][1], device_dict=device_dict, rho=rho/10))
    metric_arr=np.array(metric)
    log_txt = 'rho = {:.2f}   Avg_acc: {:.4f}'.format(rho/10, np.mean(metric_arr, axis=0)[0])
    print(log_txt)
    write_log(log_path, '{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}'.format(np.mean(metric_arr, axis=0)[0], 
                                                                np.mean(metric_arr, axis=0)[1],
                                                                np.mean(metric_arr, axis=0)[2],
                                                                np.mean(metric_arr, axis=0)[3]))

Device:0 Trn_Seed - Epoch:   0 	Loss: 2.007414
Device:0 Trn_Seed - Epoch:  50 	Loss: 0.946596
Device:0 Trn_Seed - Epoch: 100 	Loss: 0.474521
Device:0 Trn_Seed - Epoch: 150 	Loss: 0.341199
Device:0 Trn_Seed - Epoch: 200 	Loss: 0.284803
Device:0 Trn_Seed - Epoch: 250 	Loss: 0.231432
Device:0 Trn_Seed - Epoch: 300 	Loss: 0.226968
Device:0 Trn_Seed - Epoch: 350 	Loss: 0.202605
Device:0 Trn_Seed - Epoch: 400 	Loss: 0.207311
Device:0 Trn_Seed - Epoch: 450 	Loss: 0.194402
Device:0 Trn_Seed - Epoch: 500 	Loss: 0.179443
Device:0 Trn_Seed - Epoch: 550 	Loss: 0.174899
Device:0 Trn_Seed - Epoch: 600 	Loss: 0.175990
Device:0 Trn_Seed - Epoch: 650 	Loss: 0.152079
Device:0 Trn_Seed - Epoch: 700 	Loss: 0.160150
Device:0 Trn_Seed - Epoch: 750 	Loss: 0.148931


Device:1 Trn_Seed - Epoch:   0 	Loss: 2.043421
Device:1 Trn_Seed - Epoch:  50 	Loss: 0.932607
Device:1 Trn_Seed - Epoch: 100 	Loss: 0.434382
Device:1 Trn_Seed - Epoch: 150 	Loss: 0.309748
Device:1 Trn_Seed - Epoch: 200 	Loss: 0.252910
Device:1 Tr

Device:  5 Val_ensamble - Acc: 1446.0/1801 (0.8029)
rho = 1.00   Avg_acc: 0.8263


In [8]:
# train seed models
T_ = 5
num_iter = 800 if ModelType == 'resnet' else 3000
for k, v in device_dict.items():
    if Heterogeneous == 1:
        v.seed_model = ResNet8(num_channel) if ModelType == 'resnet' else MobileNet_S(num_channel)
    else:
        v.seed_model = ResNet18(num_channel) if ModelType == 'resnet' else MobileNet_M(num_channel)
    v.seed_model.cuda(v.gpu_id) 
    v.train_seed_model(ref_set, num_iter=num_iter, batch_size=int(ref_size*0.2), T=T_)
    print('\n')
    
# overall performance
write_log(log_path, time.ctime(time.time())+' T='+str(T_))
write_log(log_path, 'AvgAcc\tAvgPre\tAvgRec\tAvgf1')
for rho in range(0,11):
    metric = []
    for k, v in device_dict.items():
        metric.append(v.validate_ensamble(test_loader=loader_dict[k][1], device_dict=device_dict, rho=rho/10))
    metric_arr=np.array(metric)
    log_txt = 'rho = {:.2f}   Avg_acc: {:.4f}'.format(rho/10, np.mean(metric_arr, axis=0)[0])
    print(log_txt)
    write_log(log_path, '{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}'.format(np.mean(metric_arr, axis=0)[0], 
                                                                np.mean(metric_arr, axis=0)[1],
                                                                np.mean(metric_arr, axis=0)[2],
                                                                np.mean(metric_arr, axis=0)[3]))

Device:0 Trn_Seed - Epoch:   0 	Loss: 1.637794
Device:0 Trn_Seed - Epoch:  50 	Loss: 0.810172
Device:0 Trn_Seed - Epoch: 100 	Loss: 0.443119
Device:0 Trn_Seed - Epoch: 150 	Loss: 0.309232
Device:0 Trn_Seed - Epoch: 200 	Loss: 0.236598
Device:0 Trn_Seed - Epoch: 250 	Loss: 0.199003
Device:0 Trn_Seed - Epoch: 300 	Loss: 0.186141
Device:0 Trn_Seed - Epoch: 350 	Loss: 0.174267
Device:0 Trn_Seed - Epoch: 400 	Loss: 0.148768
Device:0 Trn_Seed - Epoch: 450 	Loss: 0.152653
Device:0 Trn_Seed - Epoch: 500 	Loss: 0.148197
Device:0 Trn_Seed - Epoch: 550 	Loss: 0.135275
Device:0 Trn_Seed - Epoch: 600 	Loss: 0.131324
Device:0 Trn_Seed - Epoch: 650 	Loss: 0.135792
Device:0 Trn_Seed - Epoch: 700 	Loss: 0.133298
Device:0 Trn_Seed - Epoch: 750 	Loss: 0.125644


Device:1 Trn_Seed - Epoch:   0 	Loss: 1.658257
Device:1 Trn_Seed - Epoch:  50 	Loss: 0.805497
Device:1 Trn_Seed - Epoch: 100 	Loss: 0.486686
Device:1 Trn_Seed - Epoch: 150 	Loss: 0.343654
Device:1 Trn_Seed - Epoch: 200 	Loss: 0.270912
Device:1 Tr

In [9]:
# train seed models
T_ = 7
num_iter = 800 if ModelType == 'resnet' else 3000
for k, v in device_dict.items():
#     if Heterogeneous == 1:
#         v.seed_model = ResNet8(num_channel) if ModelType == 'resnet' else MobileNet_S(num_channel)
#     else:
#         v.seed_model = ResNet18(num_channel) if ModelType == 'resnet' else MobileNet_M(num_channel)
#     v.seed_model.cuda(v.gpu_id) 
    v.train_seed_model(ref_set, num_iter=num_iter, batch_size=int(ref_size*0.2), T=T_)
    print('\n')
    
# overall performance
write_log(log_path, time.ctime(time.time())+' T='+str(T_))
write_log(log_path, 'AvgAcc\tAvgPre\tAvgRec\tAvgf1')
for rho in range(0,11):
    metric = []
    for k, v in device_dict.items():
        metric.append(v.validate_ensamble(test_loader=loader_dict[k][1], device_dict=device_dict, rho=rho/10))
    metric_arr=np.array(metric)
    log_txt = 'rho = {:.2f}   Avg_acc: {:.4f}'.format(rho/10, np.mean(metric_arr, axis=0)[0])
    print(log_txt)
    write_log(log_path, '{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}'.format(np.mean(metric_arr, axis=0)[0], 
                                                                np.mean(metric_arr, axis=0)[1],
                                                                np.mean(metric_arr, axis=0)[2],
                                                                np.mean(metric_arr, axis=0)[3]))

Device:0 Trn_Seed - Epoch:   0 	Loss: 0.113685
Device:0 Trn_Seed - Epoch:  50 	Loss: 0.105949
Device:0 Trn_Seed - Epoch: 100 	Loss: 0.095891
Device:0 Trn_Seed - Epoch: 150 	Loss: 0.102157
Device:0 Trn_Seed - Epoch: 200 	Loss: 0.095500
Device:0 Trn_Seed - Epoch: 250 	Loss: 0.094774
Device:0 Trn_Seed - Epoch: 300 	Loss: 0.097985
Device:0 Trn_Seed - Epoch: 350 	Loss: 0.093088
Device:0 Trn_Seed - Epoch: 400 	Loss: 0.087803
Device:0 Trn_Seed - Epoch: 450 	Loss: 0.093174
Device:0 Trn_Seed - Epoch: 500 	Loss: 0.087349
Device:0 Trn_Seed - Epoch: 550 	Loss: 0.088441
Device:0 Trn_Seed - Epoch: 600 	Loss: 0.083604
Device:0 Trn_Seed - Epoch: 650 	Loss: 0.090219
Device:0 Trn_Seed - Epoch: 700 	Loss: 0.083697
Device:0 Trn_Seed - Epoch: 750 	Loss: 0.089488


Device:1 Trn_Seed - Epoch:   0 	Loss: 0.108530
Device:1 Trn_Seed - Epoch:  50 	Loss: 0.107777
Device:1 Trn_Seed - Epoch: 100 	Loss: 0.101080
Device:1 Trn_Seed - Epoch: 150 	Loss: 0.101764
Device:1 Trn_Seed - Epoch: 200 	Loss: 0.096844
Device:1 Tr

Device:  5 Val_ensamble - Acc: 1478.0/1801 (0.8207)
rho = 1.00   Avg_acc: 0.8405


In [10]:
# train seed models
T_ = 9
num_iter = 800 if ModelType == 'resnet' else 3000
for k, v in device_dict.items():
    if Heterogeneous == 1:
        v.seed_model = ResNet8(num_channel) if ModelType == 'resnet' else MobileNet_S(num_channel)
    else:
        v.seed_model = ResNet18(num_channel) if ModelType == 'resnet' else MobileNet_M(num_channel)
    v.seed_model.cuda(v.gpu_id) 
    v.train_seed_model(ref_set, num_iter=num_iter, batch_size=int(ref_size*0.2), T=T_)
    print('\n')
    
# overall performance
write_log(log_path, time.ctime(time.time())+' T='+str(T_))
write_log(log_path, 'AvgAcc\tAvgPre\tAvgRec\tAvgf1')
for rho in range(0,11):
    metric = []
    for k, v in device_dict.items():
        metric.append(v.validate_ensamble(test_loader=loader_dict[k][1], device_dict=device_dict, rho=rho/10))
    metric_arr=np.array(metric)
    log_txt = 'rho = {:.2f}   Avg_acc: {:.4f}'.format(rho/10, np.mean(metric_arr, axis=0)[0])
    print(log_txt)
    write_log(log_path, '{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}'.format(np.mean(metric_arr, axis=0)[0], 
                                                                np.mean(metric_arr, axis=0)[1],
                                                                np.mean(metric_arr, axis=0)[2],
                                                                np.mean(metric_arr, axis=0)[3]))

Device:0 Trn_Seed - Epoch:   0 	Loss: 1.000654
Device:0 Trn_Seed - Epoch:  50 	Loss: 0.531220
Device:0 Trn_Seed - Epoch: 100 	Loss: 0.278890
Device:0 Trn_Seed - Epoch: 150 	Loss: 0.202052
Device:0 Trn_Seed - Epoch: 200 	Loss: 0.151447
Device:0 Trn_Seed - Epoch: 250 	Loss: 0.120321
Device:0 Trn_Seed - Epoch: 300 	Loss: 0.108766
Device:0 Trn_Seed - Epoch: 350 	Loss: 0.099239
Device:0 Trn_Seed - Epoch: 400 	Loss: 0.094221
Device:0 Trn_Seed - Epoch: 450 	Loss: 0.086930
Device:0 Trn_Seed - Epoch: 500 	Loss: 0.083525
Device:0 Trn_Seed - Epoch: 550 	Loss: 0.082583
Device:0 Trn_Seed - Epoch: 600 	Loss: 0.079531
Device:0 Trn_Seed - Epoch: 650 	Loss: 0.078484
Device:0 Trn_Seed - Epoch: 700 	Loss: 0.083402
Device:0 Trn_Seed - Epoch: 750 	Loss: 0.076700


Device:1 Trn_Seed - Epoch:   0 	Loss: 0.895746
Device:1 Trn_Seed - Epoch:  50 	Loss: 0.489677
Device:1 Trn_Seed - Epoch: 100 	Loss: 0.256171
Device:1 Trn_Seed - Epoch: 150 	Loss: 0.185313
Device:1 Trn_Seed - Epoch: 200 	Loss: 0.153047
Device:1 Tr

In [11]:
# train seed models
T_ = 11
num_iter = 800 if ModelType == 'resnet' else 3000
for k, v in device_dict.items():
    if Heterogeneous == 1:
        v.seed_model = ResNet8(num_channel) if ModelType == 'resnet' else MobileNet_S(num_channel)
    else:
        v.seed_model = ResNet18(num_channel) if ModelType == 'resnet' else MobileNet_M(num_channel)
    v.seed_model.cuda(v.gpu_id) 
    v.train_seed_model(ref_set, num_iter=num_iter, batch_size=int(ref_size*0.2), T=T_)
    print('\n')
    
# overall performance
write_log(log_path, time.ctime(time.time())+' T='+str(T_))
write_log(log_path, 'AvgAcc\tAvgPre\tAvgRec\tAvgf1')
for rho in range(0,11):
    metric = []
    for k, v in device_dict.items():
        metric.append(v.validate_ensamble(test_loader=loader_dict[k][1], device_dict=device_dict, rho=rho/10))
    metric_arr=np.array(metric)
    log_txt = 'rho = {:.2f}   Avg_acc: {:.4f}'.format(rho/10, np.mean(metric_arr, axis=0)[0])
    print(log_txt)
    write_log(log_path, '{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}'.format(np.mean(metric_arr, axis=0)[0], 
                                                                np.mean(metric_arr, axis=0)[1],
                                                                np.mean(metric_arr, axis=0)[2],
                                                                np.mean(metric_arr, axis=0)[3]))

Device:0 Trn_Seed - Epoch:   0 	Loss: 0.731346
Device:0 Trn_Seed - Epoch:  50 	Loss: 0.368169
Device:0 Trn_Seed - Epoch: 100 	Loss: 0.225446
Device:0 Trn_Seed - Epoch: 150 	Loss: 0.166315
Device:0 Trn_Seed - Epoch: 200 	Loss: 0.114366
Device:0 Trn_Seed - Epoch: 250 	Loss: 0.089754
Device:0 Trn_Seed - Epoch: 300 	Loss: 0.083515
Device:0 Trn_Seed - Epoch: 350 	Loss: 0.076977
Device:0 Trn_Seed - Epoch: 400 	Loss: 0.071791
Device:0 Trn_Seed - Epoch: 450 	Loss: 0.070968
Device:0 Trn_Seed - Epoch: 500 	Loss: 0.070249
Device:0 Trn_Seed - Epoch: 550 	Loss: 0.065918
Device:0 Trn_Seed - Epoch: 600 	Loss: 0.067225
Device:0 Trn_Seed - Epoch: 650 	Loss: 0.062564
Device:0 Trn_Seed - Epoch: 700 	Loss: 0.063221
Device:0 Trn_Seed - Epoch: 750 	Loss: 0.063340


Device:1 Trn_Seed - Epoch:   0 	Loss: 0.654133
Device:1 Trn_Seed - Epoch:  50 	Loss: 0.408241
Device:1 Trn_Seed - Epoch: 100 	Loss: 0.200281
Device:1 Trn_Seed - Epoch: 150 	Loss: 0.147528
Device:1 Trn_Seed - Epoch: 200 	Loss: 0.115821
Device:1 Tr

Device:  5 Val_ensamble - Acc: 1505.0/1801 (0.8356)
rho = 1.00   Avg_acc: 0.8408
