In [1]:
import os
from os.path import join
import errno
import argparse
import sys
import pickle
import json 

import numpy as np
from tensorflow.keras.models import load_model
import tensorflow as tf

from data_utils import load_MNIST_data, load_FEMNIST_data, load_EMNIST_data, generate_bal_private_data
from data_utils import generate_partial_data, load_ready_data
from FedMD import FedMD, FedAvg
from FedSKD import FedSKD 
from Neural_Networks import train_models, cnn_2layer_fc_model, cnn_3layer_fc_model
from utility import * 

import pandas as pd            # For data manipulation
import seaborn as sns          # For plotting heatmap
import matplotlib.pyplot as plt  # For visualization and saving the plot
import logging

ModuleNotFoundError: No module named 'tensorflow'

## Load config file

In [None]:
private_dataset_name = 'MNIST' # 'CIFAR10', 'CIFAR100', 'FEMNIST', 'MNIST'



if private_dataset_name in ["CIFAR10", "CIFAR100"]:
    public_dataset_name = 'CIFAR10' if private_dataset_name == 'CIFAR100' else 'CIFAR100'
else : 
    public_dataset_name = 'MNIST' if private_dataset_name == 'FEMNIST' else 'FEMNIST'

print("private dataset: {0}".format(private_dataset_name))
print("public dataset: {0}".format(public_dataset_name))

CANDIDATE_MODELS = {"2_layer_CNN": cnn_2layer_fc_model, 
                    "3_layer_CNN": cnn_3layer_fc_model} 


if private_dataset_name in ["CIFAR10", "CIFAR100"]:
    conf_file = os.path.abspath("../conf/CIFAR_balance_conf.json")
else : 
    conf_file = os.path.abspath("../conf/MNIST_balance_conf.json")
with open(conf_file, "r") as f:
    conf_dict = json.load(f) 
    
    #n_classes = conf_dict["n_classes"]
    model_config = conf_dict["models"]
    pre_train_params = conf_dict["pre_train_params"]
    model_saved_dir = conf_dict["model_saved_dir"]
    model_saved_names = conf_dict["model_saved_names"]
    is_early_stopping = conf_dict["early_stopping"]
    public_classes = conf_dict["public_classes"]
    private_classes = conf_dict["private_classes"]
    n_classes = len(public_classes)
    
    
    N_parties = conf_dict["N_parties"]
    N_samples_per_class = conf_dict["N_samples_per_class"]
    
    N_rounds = conf_dict["N_rounds"]
    N_alignment = conf_dict["N_alignment"]
    N_private_training_round = conf_dict["N_private_training_round"]
    private_training_batchsize = conf_dict["private_training_batchsize"]
    N_logits_matching_round = conf_dict["N_logits_matching_round"]
    logits_matching_batchsize = conf_dict["logits_matching_batchsize"]
    aug = conf_dict["aug"]
    compress = conf_dict["compress"]
    select = conf_dict["select"]
    algorithm = conf_dict["algorithm"]
    
    dataset_dir = conf_dict["dataset_dir"]
    result_save_dir = conf_dict["result_save_dir"]
    
    if algorithm == 'fedavg':
        result_save_dir = result_save_dir + "_fedavg"
    
    elif algorithm == 'fedmd':
        result_save_dir = result_save_dir + "_fedmd"

    elif algorithm == 'fedskd':
        result_save_dir = result_save_dir + "_fedskd"

        if aug : 
            print("adding aug")
            result_save_dir = result_save_dir + "_aug"
        if compress:
            print("adding compress")
            result_save_dir = result_save_dir + "_compress"
        if select:
            print("adding select")
            result_save_dir = result_save_dir + "_select"
            
        print("Using {} alignment".format(N_alignment))
        result_save_dir = result_save_dir + "_exp{}".format(N_alignment)

    if os.path.exists(result_save_dir):
        result_save_dir = result_save_dir + "_{}".format(np.random.randint(1000))
    os.makedirs(result_save_dir)


del conf_dict, conf_file


## Function

In [None]:

from PIL import Image

def all_digit(x) : 
    return all([c.isdigit() for c in x])

# resize image to shape 
def resize_this_image(x, shape, denormalize = True, normalize_back = True) : 
    if denormalize : 
        x = (x+0.5) * 255.0
        x = x.astype(np.uint8)
    y = np.array(Image.fromarray(x).resize(shape), dtype = np.float32) 
    if normalize_back : 
        y = y / 255.0 - 0.5
    return y


def resize_dataset(x, new_shape) : 
    num_images = len(x) 
    new_x = []
    for image in range(num_images) : 
        new_x.append(resize_this_image(x[image, ...], new_shape))
    new_x = np.array(new_x)
    return new_x


# prepare data and models

In [None]:
private_X_train, private_y_train, private_X_test, private_y_test = load_FEMNIST_data(standarized=False, verbose = False) 
public_X_train, public_y_train, public_X_test, public_y_test = load_MNIST_data(standarized=False, verbose = False) 

private_classes = np.unique(private_y_train)
n_private_classes = len(np.unique(private_y_train))


private_data, total_private_data = generate_bal_private_data(private_X_train, private_y_train, N_parties, private_classes, N_samples_per_class)

In [None]:
algorithm = 'fedskd'
input_shape = private_X_train.shape[1:]
parties = [] 

for i in range(N_parties) : 
    model_idx = i if algorithm != 'fedavg' else 0
    item = model_config[model_idx] 
    model_name = item['model_type']
    model_params = item['params']
    model = CANDIDATE_MODELS[model_name](n_classes = n_private_classes,
                                         input_shape = input_shape,
                                         **model_params)
    parties.append(model) 


len(parties) 

# pretraining models


In [None]:

# Train all models on the public dataset
results = train_models(parties, public_X_train, public_y_train, public_X_test, public_y_test, verbose = 0) 

print("public training results")
for res in results : 
    print(res) 
print()

# Train each model on his private dataset 
for i, party in enumerate(parties) :
    X_train, y_train = private_data[i]
    X_test, y_test = total_private_data[i]
    party.fit(X_train, y_train, private_X_test, private_y_test, epochs = pre_train_params['epochs'], batch_size = pre_train_params['batch_size'], verbose = 1)


# Evaluate each model on his private dataset
private_scores = []
for i, party in enumerate(parties) :
    score = party.evaluate(private_X_test, private_y_test, verbose = 0)
    private_scores.append(score) 
    print("party {} : {}".format(i, score))

print("private training results")
for score in private_scores : 
    print(score) 
print() 


# Run FedSKD

In [None]:
fedskd = FedSKD(parties, private_data, (private_X_test, private_y_test), N_rounds = N_rounds,
                                    N_private_training_round = N_private_training_round,
                                    private_training_batchsize = private_training_batchsize)