In [12]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Import library & Setup

In [9]:
"""
In jupyter notebook simple logging to console
"""
import logging
import sys
import datetime

# To use differen't log level for file and console
timestamp = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')
filename=f'./log/tmp5a_{timestamp}.log'
formatter = logging.Formatter('[%(asctime)s] %(name)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s')

file_handler = logging.FileHandler(filename=filename)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)

stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(logging.INFO)

# The handlers have to be at a root level since they are the final output
logging.basicConfig(
    level=logging.DEBUG, 
    format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
    handlers=[
        file_handler,
        stream_handler
    ]
)

# Test
logger = logging.getLogger("simple_log")
logger.debug('This is hidden')
logger.info('So this is shown on the console')

[{<ipython-input-9-d5bc9e1b63a9>:33} INFO - So this is shown on the console


In [10]:
import tensorflow as tf
import copy
import importlib
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
import setup_clients

from tqdm import trange
from dataset import DATASET_ATTRIBUTES
from client import Client
from server import Server
from utils_misc import get_all_L_next

physical_devices = tf.config.experimental.get_visible_devices('GPU')
logger.info(physical_devices)
if len(physical_devices) > 0:
    for i in range(len(physical_devices)):
        tf.config.experimental.set_memory_growth(physical_devices[i], True)


[{<ipython-input-10-f96dad092cfb>:17} INFO - [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# Load data and all associated parameter for our exp

In [11]:
%%writefile main.py
DATASETS = ['mnist', 'femnist', 'celeba', 'cifar10']
exp_dataset = DATASETS[2]

mod = importlib.import_module(exp_dataset)
ClientModel = getattr(mod, "ClientModel")

attributes = DATASET_ATTRIBUTES[exp_dataset]
SEED = 4151971
input_shape = attributes['input_shape']
dimension = attributes['dimension']
'''
move this out if you are using tf 2
 
'''
tf.random.set_seed(SEED) 
np.random.seed(SEED)

eval_counter = 0
def get_global_data(set_to_use='test'):
    global eval_counter
    num_stacking = round(len(clients) /4)  
    id_start =  0 if eval_counter == 0 else num_stacking * eval_counter
    id_end = num_stacking * (eval_counter + 1)
    eval_counter += 1
    if eval_counter >= 4:
        eval_counter = 0 
    stack_list = [c.id for c in clients[id_start: id_end]] 
         
    for i in range(num_stacking):
        if i == 0:
            datax = test_data[stack_list[0]]['x']
            datay = test_data[stack_list[0]]['y']
        else:
            x = test_data[stack_list[i]]['x']
            datax = np.concatenate((datax, x), axis=0)
            datay = np.concatenate((datay, test_data[stack_list[i]]['y']), axis=0)
    
    return {'x': datax, 'y': datay}

def restore_set():
    with open("./glob_testset_femnist", "rb") as f:
        datax, datay = pickle.load(f)
        
    dataset = tf.data.Dataset.from_tensor_slices((datax, datay))
    global_data = dataset.batch(32)
    
    return global_data

def restore_mnist_test():
    dataset = tf.data.Dataset.from_tensor_slices(test_data)
    dataset = dataset.batch(32)
    return dataset

def get_history(d):
    get_mean = lambda x: np.mean(x)
    acc = [get_mean(i) for i in d['accuracy']]
    return np.array(acc)
    
def get_loss(d):
    get_mean = lambda x: np.mean(x)
    lo = [get_mean(i) for i in d['loss']] 
    return np.array(lo)

def cust_evaluate(batch_w):
    first_key = [k for k in batch_w][0]
    L_n = get_all_L_next(batch_w[first_key])
    weight = server.batch_weights[0]
    nn = current_model.create_CNNmodel(L_n)
    nn.build(ins_c_model.get_input_shape)
    nn.set_weights(weight)
    history = nn.evaluate(test_set, verbose=1)
    
def IMCK(server, local_epochs=10, sem=False, bbp_map=False):
    tf.keras.backend.clear_session()
    server.select_clients(clients, num_worker_per_round)
    if sem:
        server.init_sem_data(dimension)
        print("before sem running, data x looks like {}".format(np.array(server._sem_dataset).shape ))
    clients2key, key2clients, avg_w = server.train_model(local_epochs)
    server.update_weights(avg_w, key2clients, clients2key)
    server.evaluate_global_models(test_set)    
    for key in key2clients:
        logger.info("cluster_{} assigned {} clients".format(key, len(key2clients[key])))    
    return clients2key, key2clients, avg_w    


def IMCK_MA(server, local_epochs=10, sem=False):
    tf.keras.backend.clear_session()
    server.select_clients(clients, num_worker_per_round)
    if sem:
        server.init_sem_data(dimension)    
    clients2key, key2clients = server.train_model_with_ma(local_epochs)
    for key in key2clients:
        logger.info("cluster_{} assigned {} clients".format(key, len(key2clients[key])))       

iterations = 10
num_workers = len(clients)
num_worker_per_round = 15
num_clusters = 1
local_epochs = 5

Overwriting main.py


# Prepare data for training

In [None]:
mod = setup_clients
_setup_func = getattr(mod, 'setup_clients_{}'.format(exp_dataset))

lr = attributes['lr']
avg_batch_size = 10

op = tf.keras.optimizers.SGD(learning_rate=lr, momentum=0.9, decay=1e-6)
current_model = ClientModel(SEED, lr, train_bs=avg_batch_size, optimizer = op, input_shape=input_shape)
if exp_dataset in ['femnist', 'celeba']:
    clients, train_data, test_data = _setup_func(current_model)
    test_set = current_model.create_dataset(get_global_data(), 'test') 
else:
    clients, train_data, test_data = _setup_func(100, current_model)
    test_set = restore_mnist_test()

# Train Model

# fedavg (baseline)

# fedsgd (epoch 1, batch all)

# feddist

# fedsem (cluster = 2, sem=True, no bbp_map)

In [6]:
num_clusters = 6
local_epochs = 4
current_model.SGD = False
logging.info("=="*15)
logger.info("Start training on <{}>".format(exp_dataset))
server = Server(current_model, num_clusters = num_clusters)

for t in trange(iterations):
    clients2key, key2clients, avg_w = IMCK(server, local_epochs, sem=True)
    server.firstCommunicationRound = False

[{<ipython-input-6-c7f740eadbfb>:5} INFO - Start training on <celeba>


  0%|          | 0/4 [00:00<?, ?it/s]

[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
before sem running, data x looks like (5, 2306)
cluster  0 test accuracy 0.51
cluster  1 test accuracy 0.49
[{<ipython-input-4-2f8fd22cff75>:83} INFO - cluster_0 assigned 2 clients
[{<ipython-input-4-2f8fd22cff75>:83} INFO - cluster_1 assigned 3 clients


 25%|██▌       | 1/4 [01:31<04:35, 91.95s/it]

[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
before sem running, data x looks like (5, 2306)
cluster  0 test accuracy 0.49
cluster  1 test accuracy 0.49
[{<ipython-input-4-2f8fd22cff75>:83} INFO - cluster_0 assigned 5 clients


 50%|█████     | 2/4 [02:35<02:46, 83.37s/it]

[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
before sem running, data x looks like (5, 2306)
cluster  0 test accuracy 0.49
cluster  1 test accuracy 0.49
[{<ipython-input-4-2f8fd22cff75>:83} INFO - cluster_0 assigned 3 clients
[{<ipython-input-4-2f8fd22cff75>:83} INFO - cluster_1 assigned 2 clients


 75%|███████▌  | 3/4 [02:57<01:04, 64.98s/it]

[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
[{server.py:73} INFO - using gaus init to set client sem vectors and dimensions 1152, vec shape (2306,)
before sem running, data x looks like (5, 2306)
cluster  0 test accuracy 0.49
cluster  1 test accuracy 0.49
[{<ipython-input-4-2f8fd22cff75>:83} INFO - cluster_0 assigned 3 clients
[{<ipython-input-4-2f8fd22cff75>:83} INFO - cluster_1 assigned 2 clients


100%|██████████| 4/4 [03:43<00:00, 55.90s/it]


# fedsem (cluster = 2, sem=True and used bbp_map)

In [None]:
# from utils_misc import get_flatten_vec, get_all_L_next

# d = get_flatten_vec(server.shared_nn.get_weights(), server.fc_idx)
# print(d.shape)
# print(get_all_L_next(server.shared_nn.get_weights()))

In [None]:
# current_model.SGD = False
# current_model.optimizer.learning_rate = lr


# logging.info("=="*15)
# logger.info("Start training on <{}>".format(exp_dataset))

# server = Server(current_model, num_clusters = num_clusters)
# server.firstCommunicationRound = True
# for t in trange(iterations):
#     IMCK_MA(server, local_epochs, sem=True)
#     server.evaluate_global_models_with_ma(test_set)
#     server.firstCommunicationRound = False

# for _ in range(4):
#     ts_data = current_model.create_dataset(get_global_data(), 'test')
#     eval_model(server.c_model, ts_data)


# logger.info("Finished section")

In [None]:
def eval_model(model, data):  
    loss, acc = model.evaluate(data, verbose=1)
    print("loss {}, acc {}".format(loss, acc))

# hypcluster (cluster = 2, assign cluster label by model loss)

In [None]:
#@title Parameter config
#@markdown Forms support many types of fields.

iterations = 44 #@param
num_clusters = 2 #@param
local_epochs = 20 #@param
num_worker_per_round = 45 #@param
lr = 0.006
#@markdown ---


In [None]:
# current_model.SGD = False
# current_model.optimizer.learning_rate = lr


# logging.info("=="*15)
# logger.info("Start training on <{}>".format(exp_dataset))

# server = Server(current_model, num_clusters = num_clusters)
# server.firstCommunicationRound = True
# for t in trange(iterations):
#     clients2key, key2clients, avg_w = IMCK(server, local_epochs)
#     server.firstCommunicationRound = False

# for _ in range(4):
#     ts_data = current_model.create_dataset(get_global_data(), 'test')
#     eval_model(server.c_model, ts_data)


# logger.info("Finished section")

# Visualize

In [None]:

# summarize history for accuracy
plt.plot(get_history(server.history))
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(get_loss(server.history))
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train'], loc='upper left')
plt.show()

In [None]:
'''
Plot all baselines to a bar chart
'''
import matplotlib
import matplotlib.pyplot as plt
import numpy as np


labels = ['MNIST', 'FEMNIST']
men_means = [20, 34]
women_means = [25, 32]

x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, men_means, width, label='Men')
rects2 = ax.bar(x + width/2, women_means, width, label='Women')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)

fig.tight_layout()

plt.show()

In [None]:
# import pickle

# with open("./glob_testset_femnist", "wb+") as f:
#     glob_x, glob_y = get_global_data()
#     pickle.dump((glob_x, glob_y), f)


# model = celeba_model.create_model()
# ds = celeba_model.create_dataset(clients[0].train_data)
# history = model.fit(ds, epochs=3, verbose=2)

# All under this heading are code for colab vm

In [None]:
# import os
# os.chdir('/content/drive/My Drive/fed_data')