## Imports

In [1]:
import os
import errno
import argparse
import sys
import pickle

import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model

from data_utils import load_CIFAR_data, generate_partial_data, generate_bal_private_data
from FedMD import FedMD
from Neural_Networks import train_models, cnn_2layer_fc_model, cnn_3layer_fc_model

import numpy as np
from tensorflow.keras.models import clone_model, load_model
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

from data_utils import generate_alignment_data
from Neural_Networks import remove_last_layer
from utility import * 

## Functions

In [2]:


CANDIDATE_MODELS = {"2_layer_CNN": cnn_2layer_fc_model, 
                    "3_layer_CNN": cnn_3layer_fc_model} 

conf_file = os.path.abspath("conf/CIFAR_balance_conf.json")

with open(conf_file, "r") as f:
    conf_dict = eval(f.read())
    
    #n_classes = conf_dict["n_classes"]
    model_config = conf_dict["models"]
    pre_train_params = conf_dict["pre_train_params"]
    model_saved_dir = conf_dict["model_saved_dir"]
    model_saved_names = conf_dict["model_saved_names"]
    is_early_stopping = conf_dict["early_stopping"]
    public_classes = conf_dict["public_classes"]
    private_classes = conf_dict["private_classes"]
    n_classes = len(public_classes) + len(private_classes)
    
    emnist_data_dir = conf_dict["EMNIST_dir"]    
    N_parties = conf_dict["N_parties"]
    N_samples_per_class = conf_dict["N_samples_per_class"]
    
    N_rounds = conf_dict["N_rounds"]
    N_alignment = conf_dict["N_alignment"]
    N_private_training_round = conf_dict["N_private_training_round"]
    private_training_batchsize = conf_dict["private_training_batchsize"]
    N_logits_matching_round = conf_dict["N_logits_matching_round"]
    logits_matching_batchsize = conf_dict["logits_matching_batchsize"]
    aug = conf_dict["aug"]
    compress = conf_dict["compress"]
    
    
    result_save_dir = conf_dict["result_save_dir"]

del conf_dict, conf_file

In [3]:


X_train_CIFAR10, y_train_CIFAR10, X_test_CIFAR10, y_test_CIFAR10 \
= load_CIFAR_data(data_type="CIFAR10", 
                    standarized = True, verbose = True)

public_dataset = {"X": X_train_CIFAR10, "y": y_train_CIFAR10}


X_train_CIFAR100, y_train_CIFAR100, X_test_CIFAR100, y_test_CIFAR100 \
= load_CIFAR_data(data_type="CIFAR100",
                    standarized = True, verbose = True)

# only use those CIFAR100 data whose y_labels belong to private_classes
X_train_CIFAR100, y_train_CIFAR100 \
= generate_partial_data(X = X_train_CIFAR100, y= y_train_CIFAR100,
                        class_in_use = private_classes, 
                        verbose = True)


X_test_CIFAR100, y_test_CIFAR100 \
= generate_partial_data(X = X_test_CIFAR100, y= y_test_CIFAR100,
                        class_in_use = private_classes, 
                        verbose = True)

# relabel the selected CIFAR100 data for future convenience
for index, cls_ in enumerate(private_classes):        
    y_train_CIFAR100[y_train_CIFAR100 == cls_] = index + len(public_classes)
    y_test_CIFAR100[y_test_CIFAR100 == cls_] = index + len(public_classes)
del index, cls_

print(pd.Series(y_train_CIFAR100).value_counts())
mod_private_classes = np.arange(len(private_classes)) + len(public_classes)

print("="*60)
#generate private data
private_data, total_private_data\
=generate_bal_private_data(X_train_CIFAR100, y_train_CIFAR100,      
                            N_parties = N_parties,           
                            classes_in_use = mod_private_classes, 
                            N_samples_per_class = N_samples_per_class, 
                            data_overlap = False)

print("="*60)
X_tmp, y_tmp = generate_partial_data(X = X_test_CIFAR100, y= y_test_CIFAR100,
                                        class_in_use = mod_private_classes, 
                                        verbose = True)
private_test_data = {"X": X_tmp, "y": y_tmp}
del X_tmp, y_tmp

parties = []
if model_saved_dir is None:
    for i, item in enumerate(model_config):
        model_name = item["model_type"]
        model_params = item["params"]
        tmp = CANDIDATE_MODELS[model_name](n_classes=n_classes, 
                                            input_shape=(32,32,3),
                                            **model_params)
        print("size of model_{}: {}".format(i, size_of(tmp)))
        parties.append(tmp)


X_train shape : (50000, 32, 32, 3)
X_test shape : (10000, 32, 32, 3)
y_train shape : (50000,)
y_test shape : (10000,)
X_train shape : (50000, 32, 32, 3)
X_test shape : (10000, 32, 32, 3)
y_train shape : (50000,)
y_test shape : (10000,)
X shape : (3000, 32, 32, 3)
y shape : (3000,)
X shape : (600, 32, 32, 3)
y shape : (600,)
10    500
15    500
14    500
12    500
13    500
11    500
dtype: int64
X shape : (600, 32, 32, 3)
y shape : (600,)
Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB



2023-05-15 18:09:31.804952: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-05-15 18:09:31.806026: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
  super(Adam, self).__init__(name, **kwargs)


size of model_0: 1000.704
size of model_1: 2148.864
size of model_2: 2148.864
size of model_3: 1000.704
size of model_4: 1000.704
size of model_5: 1474.048
size of model_6: 1140.992
size of model_7: 400.644
size of model_8: 446.208
size of model_9: 409.344
size of model_10: 4887.552
size of model_11: 2148.864
size of model_12: 2148.864
size of model_13: 1000.704
size of model_14: 1000.704
size of model_15: 1474.048
size of model_16: 1140.992
size of model_17: 400.644
size of model_18: 446.208
size of model_19: 409.344


In [4]:
size_of(public_dataset['y']), len(public_dataset['y'])

(50.0, 50000)

In [5]:
fedmd = FedMD(parties, 
                public_dataset = public_dataset,
                private_data = private_data, 
                total_private_data = total_private_data,
                private_test_data = private_test_data,
                N_rounds = N_rounds,
                N_alignment = N_alignment, 
                N_logits_matching_round = N_logits_matching_round,
                logits_matching_batchsize = logits_matching_batchsize, 
                N_private_training_round = N_private_training_round, 
                private_training_batchsize = private_training_batchsize, aug = aug, compress = compress) 

# initialization_result = fedmd.init_result
# pooled_train_result = fedmd.pooled_train_result

collaboration_performance = fedmd.collaborative_training()

model  0
model  1
model  2
model  3
model  4
model  5
model  6
model  7
model  8
model  9
model  10
model  11
model  12
model  13
model  14
model  15
model  16
model  17
model  18
model  19
augmenting public dataset ... 
round  0
update logits ... 
aug:True, compress:False, N_alignment:1000
collaborative parties 20
size of alignment data 8.0, length: 1000


2023-05-15 18:09:42.980971: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-05-15 18:09:43.141330: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-05-15 18:09:43.537894: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-05-15 18:09:43.965784: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-05-15 18:09:44.338528: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-05-15 18:09:44.649701: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2023-05-15 18:09:44.962498: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113

size of local soft labels:64.0, size of global soft labels:64.0
length of local soft labels:1000, length of global soft labels:1000
type of local soft labels:float32, type of global soft labels:float32


## Play

In [None]:

local_trials = []
for i, item in enumerate(model_config):
    model_name = item["model_type"]
    model_params = item["params"]
    trials = [] 
    for trail in range(3) : 
        tf.keras.backend.clear_session()
        model_l = CANDIDATE_MODELS[model_name](n_classes=n_classes, 
                                            input_shape=(32,32,3),
                                            **model_params)
        model_l.compile(optimizer=tf.keras.optimizers.Adam(lr = 1e-5),
                            loss = "sparse_categorical_crossentropy", 
                            metrics = ["accuracy"])

        ub_history = model_l.fit(private_data[i]["X"], private_data[i]["y"],
                        batch_size = 32, epochs = 50, shuffle=True, verbose = False, 
                        validation_data = [private_test_data["X"], private_test_data["y"]],
                        callbacks=[EarlyStopping(monitor="val_accuracy", min_delta=0.0001, patience=10, restore_best_weights=True)])
        trials.append(ub_history.history["val_accuracy"][-1])
        print("trail {0} : {1}".format(trail, trials[-1]))
    avg_trial = np.mean(trials)
    local_trials.append(avg_trial)
    print("trials for model {0}: {1}".format(i, trials))
    print("Average of model {0} trials: {1}".format(i, avg_trial))
    print() 



In [None]:
print(local_trials)