In [6]:
import json
import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "1"

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn import utils
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from time import time
from tqdm import tqdm

In [7]:
# Generate data based on chosen setting
def generate_data(data, use_val, setting, n_clients):
     
    # Scale image to [0,1]
    def scale_image(input_array):
        input_array = input_array / 255.0
        
        return input_array
    
    # Load data from Tensorflow Library
    if data == "MNIST":
        (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    
    # Scale data
    x_train, x_test = scale_image(x_train), scale_image(x_test)

    # Expand dims to add batch axis
    x_train = np.expand_dims(x_train, axis=-1)
    x_test = np.expand_dims(x_test, axis=-1)
    
    # Divide train into val
    if use_val == True:
        x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=len(x_test), random_state=42)
    else:
        x_val, y_val = None, None
    
    # Distribute data based on setting
    if setting == "IID":
        # Shuffle train data
        x_train_shuffled, y_train_shuffled = utils.shuffle(x_train, y_train, random_state=21)
        
        # Partitioned proxy data to clients, each receiving 600 examples
        clients_data, clients_label = {}, {}
        list_data = np.array_split(x_train_shuffled, n_clients)
        list_label = np.array_split(y_train_shuffled, n_clients)

        # Distribute the data to all clients
        for i in range(1, len(list_data)+1):
            clients_data["client_%s" % i] = list_data[i-1]
            clients_label["client_%s" % i] = list_label[i-1]
            
    elif setting == "NONIID":
        # Sort the train data based on digits label
        sorted_index = np.argsort(y_train)
        y_train = y_train[sorted_index]
        x_train = x_train[sorted_index]
        
        # Split training data into to distribute to 100 clients
        data_shards = np.array_split(x_train, 100)
        label_shards = np.array_split(y_train, 100)
        
        # Distribute shards to all clients
        N_CLIENTS = 100
        clients_data = {}
        clients_label = {}
        for i in range(1, N_CLIENTS+1):
            clients_data["client_%s" % i] = data_shards[i-1]
            clients_label["client_%s" % i] = label_shards[i-1]
            
    return clients_data, clients_label, x_train, y_train, x_val, y_val, x_test, y_test

# Initiate global model
def initiate_model(model_fam, num_class):
    if model_fam == "2NN":
        nn = NN2Layers()
        active_nn = nn.initiate(num_class)
    elif model_fam == "CNN":
        nn = CNN()
        active_nn = nn.initiate(num_class)
        
    return active_nn

# Scaling weights to the dataset proportion
def scale_weights(num_local_samples, weights, chosen_clients):
    num_total_samples = len(chosen_clients) * num_local_samples
    scaling_factor = num_local_samples / num_total_samples

    # Loop through each layer weight & biases
    scaled_weights = []
    for component in weights:
        scaled_weights.append(scaling_factor * component)

    return scaled_weights

# Sum all the scaled weights from all clients
def sum_scaled_weights(scaled_weights):
    final_weights = []
    for component in zip(*scaled_weights):
        final_weights.append(tf.math.reduce_sum(component, axis=0))
  
    return final_weights

# Custom global model evaluation
def evaluate_model(model, test_data, test_label):
        
    # Predict label
    y_pred = model.predict(test_data)
    y_true = test_label

    # Calculate loss with SCCE
    scce = tf.keras.losses.SparseCategoricalCrossentropy()
    loss = scce(y_true, y_pred).numpy()

    # Calculate accuracy
    accuracy = accuracy_score(y_true, np.argmax(y_pred, axis=1))

    return round(loss, 4), round(accuracy, 4)

In [8]:
clients_data, clients_label, x_train, y_train, x_val, y_val, x_test, y_test = \
generate_data(data="MNIST", use_val=True, setting="IID", n_clients=100)

In [9]:
x_train.shape

(50000, 28, 28, 1)

In [10]:
x_val.shape

(10000, 28, 28, 1)

In [11]:
x_test.shape

(10000, 28, 28, 1)