**MNIST DATASET | IID | CNN**

In [None]:
!pip install -q flwr["simulation"]
import flwr as fl
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np

# create clients and assign data to them
def create_clients(image_list, label_list, num_clients, initial):

    # create a list of client names
    client_names = ['{}_{}'.format(initial, i) for i in range(num_clients)]
    # randomize the data
    data         = list(zip(image_list, label_list))
    np.random.shuffle(data)
    # shard data and assign to each client
    size         = len(data)//num_clients
    shards       = [data[i:i + size] for i in range(0, size*num_clients, size)]
    # number of clients must equal to number of shards
    assert(len(shards) == len(client_names))
    return {client_names[i] : shards[i] for i in range(len(client_names))}

# batch assigned data of clients
def batch_data(data_shard, batch_size):

    # seperate shard into data and labels lists
    data, label = zip(*data_shard)
    dataset     = tf.data.Dataset.from_tensor_slices((list(data), list(label)))
    dataset     = dataset.shuffle(len(label))
    return dataset.batch(batch_size)

# define initial CNN model for clients in Flower framework
def client_fn(cid):

    model = keras.Sequential(
        [
            layers.Conv2D(32,kernel_size=(3, 3),activation="relu",input_shape=input_shape),
            layers.MaxPool2D(pool_size=(2, 2)),
            layers.Conv2D(64,kernel_size=(3, 3),activation="relu"),
            layers.MaxPool2D(pool_size=(2, 2)),
            layers.Flatten(),
            layers.Dropout(0.5),
            layers.Dense(num_classes, activation="softmax")
        ]
    )
    model.compile(loss="categorical_crossentropy", optimizer=Adam(learning_rate=learning_rate, decay=learning_rate / comms_round, beta_1=momentum), metrics=["accuracy"])
    return FlowerClient(model, clients[f"client_{cid}"])

# configure FlowerClient parameters and functions
class FlowerClient(fl.client.NumPyClient):

    # define each client's model and batched data
    def __init__(self, model, data):

        self.model = model
        self.data  = batch_data(data, batch_size)

    def get_parameters(self, config):

        return self.model.get_weights()

    def fit(self, parameters, config):

        self.model.set_weights(parameters)
        self.model.fit(self.data, epochs=1, verbose=0, validation_data=(x_test, y_test))

        for(X_train, Y_train) in self.data:
            return self.model.get_weights(), len(X_train), {}

    def evaluate(self, parameters, config):

        self.model.set_weights(parameters)
        loss, accuracy = self.model.evaluate(x_test, y_test, verbose=0)
        return loss, len(x_test), {"accuracy": accuracy}

# define save strategy for showing aggregated accuracy and aggregated loss at the end of each round
class SaveModelStrategy(fl.server.strategy.FedAvg):

    def aggregate_evaluate(self,rnd,results,failures):

      if not results:
          return None, {}

      aggregated_loss, aggregated_metrics = super().aggregate_evaluate(rnd, results, failures)
      accuracies          = [r.metrics["accuracy"] * r.num_examples for _, r in results]
      examples            = [r.num_examples for _, r in results]
      aggregated_accuracy = sum(accuracies) / sum(examples)
      print(f"comm_round: {rnd} | global_accuracy: {aggregated_accuracy} | global_loss: {aggregated_loss}")
      accuracy.append(aggregated_accuracy)
      loss.append(aggregated_loss)
      return aggregated_loss, {"accuracy": aggregated_accuracy}

# define initial parameters
num_classes   = 10
num_clients   = 10
input_shape   = (28, 28, 1)
batch_size    = 128
learning_rate = 0.01
momentum      = 0.9
comms_round   = 10
accuracy      = list();
loss          = list();

# load dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# scale dataset from [0,255] to [0,1]
x_train = x_train.astype("float32") / 255
x_test  = x_test.astype("float32") / 255
# expand to 3d, add channels
x_train = np.expand_dims(x_train, -1)
x_test  = np.expand_dims(x_test, -1)
# change labels to categorical format
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test  = keras.utils.to_categorical(y_test, num_classes)

# create clients for simulation
clients = create_clients(x_train, y_train, num_clients, 'client')

# start Flower simulation
fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=num_clients,
    client_resources={"num_cpus": 2},
    config=fl.server.ServerConfig(num_rounds=comms_round),
    strategy=SaveModelStrategy()
)

# plot accuracy and loss of each round
plt.suptitle('Accuracy Comparison')
plt.scatter(range(1, comms_round+1), accuracy, c=["red"])
plt.show()
plt.suptitle('Loss Comparison')
plt.scatter(range(1, comms_round+1), loss, c=["red"])
plt.show()

**MNIST DATASET | NON-IID | CNN**

In [None]:
!pip install -q flwr["simulation"]
import flwr as fl
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np

# create clients and assign data to them
def create_clients(image_list, label_list, num_clients, initial, start):

    # create a list of client names
    client_names = ['{}_{}'.format(initial, i) for i in range(start, start+num_clients)]
    # randomize the data
    data         = list(zip(image_list, label_list))
    np.random.shuffle(data)
    # shard data and assign to each client
    size         = len(data)//num_clients
    shards       = [data[i:i + size] for i in range(0, size*num_clients, size)]
    # number of clients must equal to number of shards
    assert(len(shards) == len(client_names))
    return {client_names[i] : shards[i] for i in range(len(client_names))}

# make data of each client non-iid
def non_iid_x(image_list, label_list, severity, num_intraclass_clients):

    non_iid_x_clients = dict()
    # create unique label list and shuffle
    unique_labels     = np.unique(label_list, axis=0)
    np.random.shuffle(unique_labels)
    # create sub label lists based on x
    sub_lab_list      = [unique_labels[i:i + severity] for i in range(0, len(unique_labels), severity)]
    count             = 0

    for item in sub_lab_list:

        # get all images for this label
        class_data         = [(image, label) for (image, label) in list(zip(image_list, label_list)) if (item == label).all()]
        # decouple tuple list into seperate image and label lists
        images, labels     = zip(*class_data)
        # create num_intraclass_clients clients from the class
        intraclass_clients = create_clients(list(images), list(labels), num_intraclass_clients, 'client', count)
        # append intraclass clients to main clients'dict
        non_iid_x_clients.update(intraclass_clients)
        count += num_intraclass_clients

    return non_iid_x_clients

# batch assigned data of clients
def batch_data(data_shard, batch_size):

    # seperate shard into data and labels lists
    data, label = zip(*data_shard)
    dataset     = tf.data.Dataset.from_tensor_slices((list(data), list(label)))
    dataset     = dataset.shuffle(len(label))
    return dataset.batch(batch_size)

# define initial CNN model for clients in Flower framework
def client_fn(cid):

    model = keras.Sequential(
        [
            layers.Conv2D(32,kernel_size=(3, 3),activation="relu",input_shape=input_shape),
            layers.MaxPool2D(pool_size=(2, 2)),
            layers.Conv2D(64,kernel_size=(3, 3),activation="relu"),
            layers.MaxPool2D(pool_size=(2, 2)),
            layers.Flatten(),
            layers.Dropout(0.5),
            layers.Dense(num_classes, activation="softmax")
        ]
    )
    model.compile(loss="categorical_crossentropy", optimizer=Adam(learning_rate=learning_rate, decay=learning_rate / comms_round, beta_1=momentum), metrics=["accuracy"])
    return FlowerClient(model, clients[f"client_{cid}"])

# configure FlowerClient parameters and functions
class FlowerClient(fl.client.NumPyClient):

    # define each client's model and batched data
    def __init__(self, model, data):

        self.model = model
        self.data  = batch_data(data, batch_size)

    def get_parameters(self, config):

        return self.model.get_weights()

    def fit(self, parameters, config):

        self.model.set_weights(parameters)
        self.model.fit(self.data, epochs=1, verbose=0, validation_data=(x_test, y_test))

        for(X_train, Y_train) in self.data:
            return self.model.get_weights(), len(X_train), {}

    def evaluate(self, parameters, config):

        self.model.set_weights(parameters)
        loss, accuracy = self.model.evaluate(x_test, y_test, verbose=0)
        return loss, len(x_test), {"accuracy": accuracy}

# define save strategy for showing aggregated accuracy and aggregated loss at the end of each round
class SaveModelStrategy(fl.server.strategy.FedAvg):

    def aggregate_evaluate(self,rnd,results,failures):

      if not results:
          return None, {}

      aggregated_loss, aggregated_metrics = super().aggregate_evaluate(rnd, results, failures)
      accuracies          = [r.metrics["accuracy"] * r.num_examples for _, r in results]
      examples            = [r.num_examples for _, r in results]
      aggregated_accuracy = sum(accuracies) / sum(examples)
      print(f"comm_round: {rnd} | global_accuracy: {aggregated_accuracy} | global_loss: {aggregated_loss}")
      accuracy.append(aggregated_accuracy)
      loss.append(aggregated_loss)
      return aggregated_loss, {"accuracy": aggregated_accuracy}

# define initial parameters
num_classes            = 10
# number of sub clients to be created from each non-iid class
num_intraclass_clients = 1
num_clients            = num_classes * num_intraclass_clients
input_shape            = (28, 28, 1)
batch_size             = 128
# non-iid severity, 1 means each client will only have one class of data
severity               = 1
learning_rate          = 0.01
momentum               = 0.9
comms_round            = 10
accuracy               = list();
loss                   = list();

# load dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# scale dataset from [0,255] to [0,1]
x_train = x_train.astype("float32") / 255
x_test  = x_test.astype("float32") / 255
# expand to 3d, add channels
x_train = np.expand_dims(x_train, -1)
x_test  = np.expand_dims(x_test, -1)
# change labels to categorical format
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test  = keras.utils.to_categorical(y_test, num_classes)

# create non-iid clients for simulation
clients = non_iid_x(x_train, y_train, severity, num_intraclass_clients)

# start Flower simulation
fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=num_clients,
    client_resources={"num_cpus": 2},
    config=fl.server.ServerConfig(num_rounds=comms_round),
    strategy=SaveModelStrategy()
)

# plot accuracy and loss of each round
plt.suptitle('Accuracy Comparison')
plt.scatter(range(1, comms_round+1), accuracy, c=["red"])
plt.show()
plt.suptitle('Loss Comparison')
plt.scatter(range(1, comms_round+1), loss, c=["red"])
plt.show()

**MNIST DATASET | NON-IID | SGFL**

In [None]:
!pip install -q flwr["simulation"]
import flwr as fl
import tensorflow as tf
from tensorflow import keras
from keras.models import Model
from keras.models import load_model
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np

# create clients and assign data to them
def create_clients(image_list, label_list, num_clients, initial, start):

    # create a list of client names
    client_names = ['{}_{}'.format(initial, i) for i in range(start, start+num_clients)]
    # randomize the data
    data         = list(zip(image_list, label_list))
    np.random.shuffle(data)
    # shard data and assign to each client
    size         = len(data)//num_clients
    shards       = [data[i:i + size] for i in range(0, size*num_clients, size)]
    # number of clients must equal to number of shards
    assert(len(shards) == len(client_names))
    return {client_names[i] : shards[i] for i in range(len(client_names))}

# make data of each client non-iid
def non_iid_x(image_list, label_list, severity, num_intraclass_clients):

    non_iid_x_clients = dict()
    #create unique label list and shuffle
    unique_labels     = np.unique(label_list, axis=0)
    np.random.shuffle(unique_labels)
    # create sub label lists based on x
    sub_lab_list      = [unique_labels[i:i + severity] for i in range(0, len(unique_labels), severity)]
    count             = 0

    for item in sub_lab_list:

        # get all images for this label
        class_data         = [(image, label) for (image, label) in list(zip(image_list, label_list)) if (item == label).all()]
        # decouple tuple list into seperate image and label lists
        images, labels     = zip(*class_data)
        # create num_intraclass_clients clients from the class
        intraclass_clients = create_clients(list(images), list(labels), num_intraclass_clients, 'client', count)
        # append intraclass clients to main clients'dict
        non_iid_x_clients.update(intraclass_clients)
        count += num_intraclass_clients

    return non_iid_x_clients

# batch assigned data of clients
def batch_data(data_shard, batch_size):

    # seperate shard into data and labels lists
    data, label = zip(*data_shard)
    dataset     = tf.data.Dataset.from_tensor_slices((list(data), list(label)))
    dataset     = dataset.shuffle(len(label))
    return dataset.batch(batch_size)

# define initial SDCGAN model for clients in Flower framework
def client_fn(cid):

    model           = load_model('mnist_5990.h5')
    model.trainable = True
    model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5), metrics=['accuracy'])
    return FlowerClient(model, clients[f"client_{cid}"])

# configure FlowerClient parameters and functions
class FlowerClient(fl.client.NumPyClient):

    # define each client's model and batched data
    def __init__(self, model, data):

        self.model = model
        self.data  = batch_data(data, batch_size)

    def get_parameters(self, config):

        return self.model.get_weights()

    def fit(self, parameters, config):

        self.model.set_weights(parameters)
        self.model.fit(self.data, epochs=1, verbose=0, validation_data=(x_test, y_test))

        for(X_train, Y_train) in self.data:
            return self.model.get_weights(), len(X_train), {}

    def evaluate(self, parameters, config):

        self.model.set_weights(parameters)
        loss, accuracy = self.model.evaluate(x_test, y_test, verbose=0)
        return loss, len(x_test), {"accuracy": accuracy}

# define save strategy for showing aggregated accuracy and aggregated loss at the end of each round
class SaveModelStrategy(fl.server.strategy.FedAvg):

    def aggregate_evaluate(self,rnd,results,failures):

      if not results:
          return None, {}

      aggregated_loss, aggregated_metrics = super().aggregate_evaluate(rnd, results, failures)
      accuracies          = [r.metrics["accuracy"] * r.num_examples for _, r in results]
      examples            = [r.num_examples for _, r in results]
      aggregated_accuracy = sum(accuracies) / sum(examples)
      print(f"comm_round: {rnd} | global_accuracy: {aggregated_accuracy} | global_loss: {aggregated_loss}")
      accuracy.append(aggregated_accuracy)
      loss.append(aggregated_loss)
      return aggregated_loss, {"accuracy": aggregated_accuracy}

# custom activation function
def custom_activation(output):

    logexpsum = backend.sum(backend.exp(output), axis=-1, keepdims=True)
    result    = logexpsum / (logexpsum + 1.0)
    return result

# define the standalone supervised and unsupervised discriminator models
def define_discriminator(input_shape, num_classes):

     # image input
     input_image = layers.Input(shape=input_shape)
     # downsample
     fe          = layers.Conv2D(128, (3,3), strides=(2,2), padding='same')(input_image)
     fe          = layers.LeakyReLU(alpha=0.2)(fe)
     # downsample
     fe          = layers.Conv2D(128, (3,3), strides=(2,2), padding='same')(fe)
     fe          = layers.LeakyReLU(alpha=0.2)(fe)
     # downsample
     fe          = layers.Conv2D(128, (3,3), strides=(2,2), padding='same')(fe)
     fe          = layers.LeakyReLU(alpha=0.2)(fe)
     fe          = layers.Flatten()(fe)
     fe          = layers.Dropout(0.4)(fe)
     # output layer nodes
     fe          = layers.Dense(num_classes)(fe)
     # supervised output
     c_out_layer = layers.Activation('softmax')(fe)
     # define and compile supervised discriminator model
     c_model     = Model(input_image, c_out_layer)
     c_model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5), metrics=['accuracy'])
     # unsupervised output
     d_out_layer = layers.Lambda(custom_activation)(fe)
     # define and compile unsupervised discriminator model
     d_model     = Model(input_image, d_out_layer)
     d_model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5))
     return d_model, c_model

# define the standalone generator model
def define_generator(latent_dim):

     # image generator input
     input_latent = layers.Input(shape=(latent_dim,))
     # foundation for 7x7 image
     gen          = layers.Dense(128 * 7 * 7)(input_latent)
     gen          = layers.LeakyReLU(alpha=0.2)(gen)
     gen          = layers.Reshape((7, 7, 128))(gen)
     # upsample to 14x14
     gen          = layers.Conv2DTranspose(128, (4,4), strides=(2,2), padding='same')(gen)
     gen          = layers.LeakyReLU(alpha=0.2)(gen)
     # upsample to 28x28
     gen          = layers.Conv2DTranspose(128, (4,4), strides=(2,2), padding='same')(gen)
     gen          = layers.LeakyReLU(alpha=0.2)(gen)
     # output
     out_layer    = layers.Conv2D(1, (7,7), activation='tanh', padding='same')(gen)
     # define model
     model        = Model(input_latent, out_layer)
     return model

# define the combined generator and discriminator model, for updating the generator
def define_gan(g_model, d_model):

     # make weights in the discriminator not trainable
     d_model.trainable = False
     # connect image output from generator as input to discriminator
     gan_output        = d_model(g_model.output)
     # define gan model as taking noise and outputting a classification
     model             = Model(g_model.input, gan_output)
     # compile model
     model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5))
     return model

# select a supervised subset of the dataset, ensures classes are balanced
def select_supervised_samples(dataset, num_samples, num_classes):

    X, y           = dataset
    unique_labels  = np.unique(y, axis=0)
    X_list, y_list = list(), list()
    n_per_class    = int(num_samples / num_classes)

    for i in unique_labels:

        # get all images for this class
        class_data   = [(image, label) for (image, label) in list(zip(X, y)) if (i == label).all()]
        X_with_class, _ = zip(*class_data)
        # choose random instances
        ix           = np.random.randint(0, len(X_with_class), n_per_class)
        # add to list
        [X_list.append(X_with_class[j]) for j in ix]
        [y_list.append(i) for j in ix]

    return np.asarray(X_list), np.asarray(y_list)

# select real samples
def generate_real_samples(dataset, num_samples):

    # split into images and labels
    images, labels = dataset
    # choose random instances
    ix             = np.random.randint(0, images.shape[0], num_samples)
    # select images and labels
    X, labels      = images[ix], labels[ix]
    # generate class labels
    y              = np.ones((num_samples, 1))
    return [X, labels], y

# generate points in latent space as input for the generator
def generate_latent_points(latent_dim, num_samples):

    # generate points in the latent space
    z_input = np.random.randn(latent_dim * num_samples)
    # reshape into a batch of inputs for the network
    z_input = z_input.reshape(num_samples, latent_dim)
    return z_input

# use the generator to generate fake examples, with class labels
def generate_fake_samples(generator, latent_dim, num_samples):

    # generate points in latent space
    z_input = generate_latent_points(latent_dim, num_samples)
    # predict outputs
    images  = generator.predict(z_input)
    # create class labels
    y       = np.zeros((num_samples, 1))
    return images, y

# generate samples and save as a plot and save the model
def summarize_performance(step, g_model, c_model, latent_dim, dataset, num_samples):

    # prepare fake examples
    X, _ = generate_fake_samples(g_model, latent_dim, num_samples)
    # scale from [-1,1] to [0,1]
    X    = (X + 1) / 2.0

    # plot images
    for i in range(100):

        plt.subplot(10, 10, 1 + i)
        plt.axis('off')
        plt.imshow(X[i, :, :, 0], cmap='gray_r')

    # save plot to file
    #filename = 'generated_plot_%04d.png' % (step+1)
    #plt.savefig(filename)
    plt.show()
    # save the classifier model
    filename = 'mnist_%04d.h5' % (step+1)
    c_model.save(filename)

    # evaluate the classifier model
    X, y   = dataset
    _, acc = c_model.evaluate(X, y, verbose=0)
    print('Classifier Accuracy: %.3f%%' % (acc * 100))

# train the generator and discriminator
def train(g_model, d_model, c_model, gan_model, dataset, latent_dim, num_samples, num_classes, num_epochs, num_batch):

    # select supervised dataset
    X_sup, y_sup = select_supervised_samples(dataset, num_samples, num_classes)
    # calculate the number of batches per training epoch
    bat_per_epo  = int(dataset[0].shape[0] / num_batch)
     # calculate the number of training iterations
    n_steps      = bat_per_epo * num_epochs
    # calculate the size of half a batch of samples
    half_batch   = int(num_batch / 2)
    print('num_epochs=%d, num_batch=%d, 1/2=%d, b/e=%d, steps=%d' % (num_epochs, num_batch, half_batch, bat_per_epo, n_steps))

    for i in range(n_steps):

         # update supervised discriminator (c)
        [Xsup_real, ysup_real], _ = generate_real_samples([X_sup, y_sup], half_batch)
        c_loss, c_acc             = c_model.train_on_batch(Xsup_real, ysup_real)
        # update unsupervised discriminator (d)
        [X_real, _], y_real       = generate_real_samples(dataset, half_batch)
        d_loss1                   = d_model.train_on_batch(X_real, y_real)
        X_fake, y_fake            = generate_fake_samples(g_model, latent_dim, half_batch)
        d_loss2                   = d_model.train_on_batch(X_fake, y_fake)
        # update generator (g)
        X_gan, y_gan              = generate_latent_points(latent_dim, num_batch), np.ones((num_batch, 1))
        g_loss                    = gan_model.train_on_batch(X_gan, y_gan)
        # summarize loss on this batch
        print('>%d, c[%.3f,%.0f], d[%.3f,%.3f], g[%.3f]' % (i+1, c_loss, c_acc*100, d_loss1, d_loss2, g_loss))

        # evaluate the model performance every so often
        if (i+1) % bat_per_epo == 0:

            summarize_performance(i, g_model, c_model, latent_dim, dataset, num_samples)

# define initial parameters
num_classes            = 10
# number of sub clients to be created from each non-iid class
num_intraclass_clients = 1
num_clients            = num_classes * num_intraclass_clients
input_shape            = (28, 28, 1)
batch_size             = 128
# non-iid severity, 1 means each client will only have one class of data
severity               = 1
comms_round            = 10
clientDataPercent      = 5
# size of the latent space
latent_dim             = 128
num_samples            = 100
num_epochs             = 20
num_batch              = 100
accuracy               = list()
loss                   = list()
clientsData            = list()

# load dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# scale dataset from [0,255] to [-1,1]
x_train = x_train.astype("float32")
x_test  = x_test.astype("float32")
x_train = (x_train - 127.5) / 127.5
x_test  = (x_test - 127.5) / 127.5
# expand to 3d, add channels
x_train = np.expand_dims(x_train, -1)
x_test  = np.expand_dims(x_test, -1)
# change labels to categorical format
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test  = keras.utils.to_categorical(y_test, num_classes)

# create non-iid clients for simulation
clients = non_iid_x(x_train, y_train, severity, num_intraclass_clients)

# collect a percent of clients data
for (client_name, data) in clients.items():

    np.random.shuffle(data)
    data_temp                = data[:int(len(data) * clientDataPercent / 100)]
    x_client, y_client       = list(zip(*data_temp))
    x_client                 = np.asarray(x_client)
    y_client                 = np.asarray(y_client)
    clientgen                = ImageDataGenerator(rotation_range=5, width_shift_range=0.02, height_shift_range=0.02, shear_range=0.1, zoom_range=0.1, fill_mode='nearest')
    clientgen.fit(x_client)
    g                        = clientgen.flow(x_client, y_client, batch_size=len(x_client), shuffle=True)
    x_redundant, y_redundant = next(g)
    clientsData.extend(list(zip(x_redundant, y_redundant)))
    np.random.shuffle(clientsData)

np.random.shuffle(clientsData)
x_clients, y_clients = list(zip(*clientsData))
x_clients            = np.asarray(x_clients)
y_clients            = np.asarray(y_clients)

# augment the collected data from clients
datagen              = ImageDataGenerator(rotation_range=10, width_shift_range=0.05, height_shift_range=0.05, shear_range=0.2, zoom_range=0.2, fill_mode='nearest')
datagen.fit(x_clients)
g                    = datagen.flow(x_clients, y_clients, batch_size=len(x_clients), shuffle=True)
# create a new dataset based on augmented data
x_created, y_created = next(g)
last_step            = int(((len(x_clients) * 100) / clientDataPercent) / len(x_clients)) - 1
current_step         = 1

while current_step <= last_step:

    x_temp, y_temp = next(g)
    x_created = np.concatenate((x_created, x_temp))
    y_created = np.concatenate((y_created, y_temp))
    current_step += 1

# create the discriminator models
d_model, c_model = define_discriminator(input_shape, num_classes)
# create the generator
g_model          = define_generator(latent_dim)
# create the gan
gan_model        = define_gan(g_model, d_model)
# train model with the created dataset
train(g_model, d_model, c_model, gan_model, [x_created, y_created], latent_dim, num_samples, num_classes, num_epochs, num_batch)

# evaluate the model
_, train_acc = c_model.evaluate(x_train, y_train, verbose=0)
print('Train Accuracy: %.3f%%' % (train_acc * 100))
_, test_acc  = c_model.evaluate(x_test, y_test, verbose=0)
print('Test Accuracy: %.3f%%' % (test_acc * 100))

# start Flower simulation
fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=num_clients,
    client_resources={"num_cpus": 2},
    config=fl.server.ServerConfig(num_rounds=comms_round),
    strategy=SaveModelStrategy()
)

# plot accuracy and loss of each round
plt.suptitle('Accuracy Comparison')
plt.scatter(range(1, comms_round+1), accuracy, c=["red"])
plt.show()
plt.suptitle('Loss Comparison')
plt.scatter(range(1, comms_round+1), loss, c=["red"])
plt.show()

**EMNIST DATASET | IID | CNN**

In [None]:
!pip install -q flwr["simulation"]
import flwr as fl
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# create clients and assign data to them
def create_clients(image_list, label_list, num_clients, initial):

    # create a list of client names
    client_names = ['{}_{}'.format(initial, i) for i in range(num_clients)]
    # randomize the data
    data         = list(zip(image_list, label_list))
    np.random.shuffle(data)
    # shard data and assign to each client
    size         = len(data)//num_clients
    shards       = [data[i:i + size] for i in range(0, size*num_clients, size)]
    # number of clients must equal to number of shards
    assert(len(shards) == len(client_names))
    return {client_names[i] : shards[i] for i in range(len(client_names))}

# batch assigned data of clients
def batch_data(data_shard, batch_size):

    # seperate shard into data and labels lists
    data, label = zip(*data_shard)
    dataset     = tf.data.Dataset.from_tensor_slices((list(data), list(label)))
    dataset     = dataset.shuffle(len(label))
    return dataset.batch(batch_size)

# define initial CNN model for clients in Flower framework
def client_fn(cid):

    model = keras.Sequential(
        [
            layers.Conv2D(32,kernel_size=(3, 3),activation="relu",input_shape=input_shape),
            layers.MaxPool2D(pool_size=(2, 2)),
            layers.Conv2D(64,kernel_size=(3, 3),activation="relu"),
            layers.MaxPool2D(pool_size=(2, 2)),
            layers.Flatten(),
            layers.Dropout(0.5),
            layers.Dense(num_classes, activation="softmax")
        ]
    )
    model.compile(loss="categorical_crossentropy", optimizer=Adam(learning_rate=learning_rate, decay=learning_rate / comms_round, beta_1=momentum), metrics=["accuracy"])
    return FlowerClient(model, clients[f"client_{cid}"])

# configure FlowerClient parameters and functions
class FlowerClient(fl.client.NumPyClient):

    # define each client's model and batched data
    def __init__(self, model, data):

        self.model = model
        self.data  = batch_data(data, batch_size)

    def get_parameters(self, config):

        return self.model.get_weights()

    def fit(self, parameters, config):

        self.model.set_weights(parameters)
        self.model.fit(self.data, epochs=1, verbose=0, validation_data=(x_test, y_test))

        for(X_train, Y_train) in self.data:
            return self.model.get_weights(), len(X_train), {}

    def evaluate(self, parameters, config):

        self.model.set_weights(parameters)
        loss, accuracy = self.model.evaluate(x_test, y_test, verbose=0)
        return loss, len(x_test), {"accuracy": accuracy}

# define save strategy for showing aggregated accuracy and aggregated loss at the end of each round
class SaveModelStrategy(fl.server.strategy.FedAvg):

    def aggregate_evaluate(self,rnd,results,failures):

      if not results:
          return None, {}

      aggregated_loss, aggregated_metrics = super().aggregate_evaluate(rnd, results, failures)
      accuracies          = [r.metrics["accuracy"] * r.num_examples for _, r in results]
      examples            = [r.num_examples for _, r in results]
      aggregated_accuracy = sum(accuracies) / sum(examples)
      print(f"comm_round: {rnd} | global_accuracy: {aggregated_accuracy} | global_loss: {aggregated_loss}")
      accuracy.append(aggregated_accuracy)
      loss.append(aggregated_loss)
      return aggregated_loss, {"accuracy": aggregated_accuracy}

# define initial parameters
num_classes   = 47
num_clients   = 47
input_shape   = (28, 28, 1)
batch_size    = 128
learning_rate = 0.01
momentum      = 0.9
comms_round   = 10
accuracy      = list();
loss          = list();

# read dataset from csv files and reshape it to the input_shape
dateset_train = pd.read_csv('emnist-balanced-train.csv', header=None)
dateset_train = dateset_train.to_numpy()
x_train_temp  = dateset_train[:,1:785]

x_train       = np.zeros(shape=(112800, 28, 28), dtype=np.uint8)
for idx, n in enumerate(x_train_temp):
    x_train[idx] = np.reshape(n, (28, 28))

y_train       = dateset_train[:,0]

dateset_test  = pd.read_csv('emnist-balanced-test.csv', header=None)
dateset_test  = dateset_test.to_numpy()
x_test_temp   = dateset_test[:,1:785]

x_test        = np.zeros(shape=(18800, 28, 28), dtype=np.uint8)
for idx, n in enumerate(x_test_temp):
    x_test[idx] = np.reshape(n, (28, 28))

y_test        = dateset_test[:,0]

# scale dataset from [0,255] to [0,1]
x_train = x_train.astype("float32") / 255
x_test  = x_test.astype("float32") / 255
# expand to 3d, add channels
x_train = np.expand_dims(x_train, -1)
x_test  = np.expand_dims(x_test, -1)
# change labels to categorical format
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test  = keras.utils.to_categorical(y_test, num_classes)

# create clients for simulation
clients = create_clients(x_train, y_train, num_clients, 'client')

# start Flower simulation
fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=num_clients,
    client_resources={"num_cpus": 2},
    config=fl.server.ServerConfig(num_rounds=comms_round),
    strategy=SaveModelStrategy()
)

# plot accuracy and loss of each round
plt.suptitle('Accuracy Comparison')
plt.scatter(range(1, comms_round+1), accuracy, c=["red"])
plt.show()
plt.suptitle('Loss Comparison')
plt.scatter(range(1, comms_round+1), loss, c=["red"])
plt.show()

**EMNIST DATASET | NON-IID | CNN**

In [None]:
!pip install -q flwr["simulation"]
import flwr as fl
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# create clients and assign data to them
def create_clients(image_list, label_list, num_clients, initial, start):

    # create a list of client names
    client_names = ['{}_{}'.format(initial, i) for i in range(start, start+num_clients)]
    # randomize the data
    data         = list(zip(image_list, label_list))
    np.random.shuffle(data)
    # shard data and assign to each client
    size         = len(data)//num_clients
    shards       = [data[i:i + size] for i in range(0, size*num_clients, size)]
    # number of clients must equal to number of shards
    assert(len(shards) == len(client_names))
    return {client_names[i] : shards[i] for i in range(len(client_names))}

# make data of each client non-iid
def non_iid_x(image_list, label_list, severity, num_intraclass_clients):

    non_iid_x_clients = dict()
    #create unique label list and shuffle
    unique_labels     = np.unique(label_list, axis=0)
    np.random.shuffle(unique_labels)
    # create sub label lists based on x
    sub_lab_list      = [unique_labels[i:i + severity] for i in range(0, len(unique_labels), severity)]
    count             = 0

    for item in sub_lab_list:

        # get all images for this label
        class_data         = [(image, label) for (image, label) in list(zip(image_list, label_list)) if (item == label).all()]
        # decouple tuple list into seperate image and label lists
        images, labels     = zip(*class_data)
        # create num_intraclass_clients clients from the class
        intraclass_clients = create_clients(list(images), list(labels), num_intraclass_clients, 'client', count)
        # append intraclass clients to main clients'dict
        non_iid_x_clients.update(intraclass_clients)
        count += num_intraclass_clients

    return non_iid_x_clients

# batch assigned data of clients
def batch_data(data_shard, batch_size):

    # seperate shard into data and labels lists
    data, label = zip(*data_shard)
    dataset     = tf.data.Dataset.from_tensor_slices((list(data), list(label)))
    dataset     = dataset.shuffle(len(label))
    return dataset.batch(batch_size)

# define initial CNN model for clients in Flower framework
def client_fn(cid):

    model = keras.Sequential(
        [
            layers.Conv2D(32,kernel_size=(3, 3),activation="relu",input_shape=input_shape),
            layers.MaxPool2D(pool_size=(2, 2)),
            layers.Conv2D(64,kernel_size=(3, 3),activation="relu"),
            layers.MaxPool2D(pool_size=(2, 2)),
            layers.Flatten(),
            layers.Dropout(0.5),
            layers.Dense(num_classes, activation="softmax")
        ]
    )
    model.compile(loss="categorical_crossentropy", optimizer=Adam(learning_rate=learning_rate, decay=learning_rate / comms_round, beta_1=momentum), metrics=["accuracy"])
    return FlowerClient(model, clients[f"client_{cid}"])

# configure FlowerClient parameters and functions
class FlowerClient(fl.client.NumPyClient):

    # define each client's model and batched data
    def __init__(self, model, data):

        self.model = model
        self.data  = batch_data(data, batch_size)

    def get_parameters(self, config):

        return self.model.get_weights()

    def fit(self, parameters, config):

        self.model.set_weights(parameters)
        self.model.fit(self.data, epochs=1, verbose=0, validation_data=(x_test, y_test))

        for(X_train, Y_train) in self.data:
            return self.model.get_weights(), len(X_train), {}

    def evaluate(self, parameters, config):

        self.model.set_weights(parameters)
        loss, accuracy = self.model.evaluate(x_test, y_test, verbose=0)
        return loss, len(x_test), {"accuracy": accuracy}

# define save strategy for showing aggregated accuracy and aggregated loss at the end of each round
class SaveModelStrategy(fl.server.strategy.FedAvg):

    def aggregate_evaluate(self,rnd,results,failures):

      if not results:
          return None, {}

      aggregated_loss, aggregated_metrics = super().aggregate_evaluate(rnd, results, failures)
      accuracies          = [r.metrics["accuracy"] * r.num_examples for _, r in results]
      examples            = [r.num_examples for _, r in results]
      aggregated_accuracy = sum(accuracies) / sum(examples)
      print(f"comm_round: {rnd} | global_accuracy: {aggregated_accuracy} | global_loss: {aggregated_loss}")
      accuracy.append(aggregated_accuracy)
      loss.append(aggregated_loss)
      return aggregated_loss, {"accuracy": aggregated_accuracy}

# define initial parameters
num_classes            = 47
# number of sub clients to be created from each non-iid class
num_intraclass_clients = 1
num_clients            = num_classes * num_intraclass_clients
input_shape            = (28, 28, 1)
batch_size             = 128
# non-iid severity, 1 means each client will only have one class of data
severity               = 1
learning_rate          = 0.001
momentum               = 0.5
comms_round            = 10
accuracy               = list();
loss                   = list();

# read dataset from csv files and reshape it to the input_shape
dateset_train = pd.read_csv('emnist-balanced-train.csv', header=None)
dateset_train = dateset_train.to_numpy()
x_train_temp  = dateset_train[:,1:785]

x_train       = np.zeros(shape=(112800, 28, 28), dtype=np.uint8)
for idx, n in enumerate(x_train_temp):
    x_train[idx] = np.reshape(n, (28, 28))

y_train       = dateset_train[:,0]

dateset_test  = pd.read_csv('emnist-balanced-test.csv', header=None)
dateset_test  = dateset_test.to_numpy()
x_test_temp   = dateset_test[:,1:785]

x_test        = np.zeros(shape=(18800, 28, 28), dtype=np.uint8)
for idx, n in enumerate(x_test_temp):
    x_test[idx] = np.reshape(n, (28, 28))

y_test        = dateset_test[:,0]

# scale dataset from [0,255] to [0,1]
x_train = x_train.astype("float32") / 255
x_test  = x_test.astype("float32") / 255
# expand to 3d, add channels
x_train = np.expand_dims(x_train, -1)
x_test  = np.expand_dims(x_test, -1)
# change labels to categorical format
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test  = keras.utils.to_categorical(y_test, num_classes)

# create non-iid clients for simulation
clients = non_iid_x(x_train, y_train, severity, num_intraclass_clients)

# start Flower simulation
fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=num_clients,
    client_resources={"num_cpus": 2},
    config=fl.server.ServerConfig(num_rounds=comms_round),
    strategy=SaveModelStrategy()
)

# plot accuracy and loss of each round
plt.suptitle('Accuracy Comparison')
plt.scatter(range(1, comms_round+1), accuracy, c=["red"])
plt.show()
plt.suptitle('Loss Comparison')
plt.scatter(range(1, comms_round+1), loss, c=["red"])
plt.show()

**EMNIST DATASET | NON-IID | SGFL**

In [None]:
!pip install -q flwr["simulation"]
import flwr as fl
import tensorflow as tf
from tensorflow import keras
from keras.models import Model
from keras.models import load_model
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# create clients and assign data to them
def create_clients(image_list, label_list, num_clients, initial, start):

    # create a list of client names
    client_names = ['{}_{}'.format(initial, i) for i in range(start, start+num_clients)]
    # randomize the data
    data         = list(zip(image_list, label_list))
    np.random.shuffle(data)
    # shard data and assign to each client
    size         = len(data)//num_clients
    shards       = [data[i:i + size] for i in range(0, size*num_clients, size)]
    # number of clients must equal to number of shards
    assert(len(shards) == len(client_names))
    return {client_names[i] : shards[i] for i in range(len(client_names))}

# make data of each client non-iid
def non_iid_x(image_list, label_list, severity, num_intraclass_clients):

    non_iid_x_clients = dict()
    #create unique label list and shuffle
    unique_labels     = np.unique(label_list, axis=0)
    np.random.shuffle(unique_labels)
    # create sub label lists based on x
    sub_lab_list      = [unique_labels[i:i + severity] for i in range(0, len(unique_labels), severity)]
    count             = 0

    for item in sub_lab_list:

        # get all images for this label
        class_data         = [(image, label) for (image, label) in list(zip(image_list, label_list)) if (item == label).all()]
        # decouple tuple list into seperate image and label lists
        images, labels     = zip(*class_data)
        # create num_intraclass_clients clients from the class
        intraclass_clients = create_clients(list(images), list(labels), num_intraclass_clients, 'client', count)
        # append intraclass clients to main clients'dict
        non_iid_x_clients.update(intraclass_clients)
        count += num_intraclass_clients

    return non_iid_x_clients

# batch assigned data of clients
def batch_data(data_shard, batch_size):

    # seperate shard into data and labels lists
    data, label = zip(*data_shard)
    dataset     = tf.data.Dataset.from_tensor_slices((list(data), list(label)))
    dataset     = dataset.shuffle(len(label))
    return dataset.batch(batch_size)

# define initial SDCGAN model for clients in Flower framework
def client_fn(cid):

    model           = load_model('emnist_16920.h5')
    model.trainable = True
    model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001, beta_1=0.5), metrics=['accuracy'])
    return FlowerClient(model, clients[f"client_{cid}"])

# configure FlowerClient parameters and functions
class FlowerClient(fl.client.NumPyClient):

    # define each client's model and batched data
    def __init__(self, model, data):

        self.model = model
        self.data  = batch_data(data, batch_size)

    def get_parameters(self, config):

        return self.model.get_weights()

    def fit(self, parameters, config):

        self.model.set_weights(parameters)
        self.model.fit(self.data, epochs=1, verbose=0, validation_data=(x_test, y_test))

        for(X_train, Y_train) in self.data:
            return self.model.get_weights(), len(X_train), {}

    def evaluate(self, parameters, config):

        self.model.set_weights(parameters)
        loss, accuracy = self.model.evaluate(x_test, y_test, verbose=0)
        return loss, len(x_test), {"accuracy": accuracy}

# define save strategy for showing aggregated accuracy and aggregated loss at the end of each round
class SaveModelStrategy(fl.server.strategy.FedAvg):

    def aggregate_evaluate(self,rnd,results,failures):

      if not results:
          return None, {}

      aggregated_loss, aggregated_metrics = super().aggregate_evaluate(rnd, results, failures)
      accuracies          = [r.metrics["accuracy"] * r.num_examples for _, r in results]
      examples            = [r.num_examples for _, r in results]
      aggregated_accuracy = sum(accuracies) / sum(examples)
      print(f"comm_round: {rnd} | global_accuracy: {aggregated_accuracy} | global_loss: {aggregated_loss}")
      accuracy.append(aggregated_accuracy)
      loss.append(aggregated_loss)
      return aggregated_loss, {"accuracy": aggregated_accuracy}

# custom activation function
def custom_activation(output):

    logexpsum = backend.sum(backend.exp(output), axis=-1, keepdims=True)
    result    = logexpsum / (logexpsum + 1.0)
    return result

# define the standalone supervised and unsupervised discriminator models
def define_discriminator(input_shape, num_classes):

     # image input
     input_image = layers.Input(shape=input_shape)
     # downsample
     fe          = layers.Conv2D(128, (3,3), strides=(2,2), padding='same')(input_image)
     fe          = layers.LeakyReLU(alpha=0.2)(fe)
     # downsample
     fe          = layers.Conv2D(128, (3,3), strides=(2,2), padding='same')(fe)
     fe          = layers.LeakyReLU(alpha=0.2)(fe)
     # downsample
     fe          = layers.Conv2D(128, (3,3), strides=(2,2), padding='same')(fe)
     fe          = layers.LeakyReLU(alpha=0.2)(fe)
     fe          = layers.Flatten()(fe)
     fe          = layers.Dropout(0.4)(fe)
     # output layer nodes
     fe          = layers.Dense(num_classes)(fe)
     # supervised output
     c_out_layer = layers.Activation('softmax')(fe)
     # define and compile supervised discriminator model
     c_model     = Model(input_image, c_out_layer)
     c_model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5), metrics=['accuracy'])
     # unsupervised output
     d_out_layer = layers.Lambda(custom_activation)(fe)
     # define and compile unsupervised discriminator model
     d_model     = Model(input_image, d_out_layer)
     d_model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5))
     return d_model, c_model

# define the standalone generator model
def define_generator(latent_dim):

     # image generator input
     input_latent = layers.Input(shape=(latent_dim,))
     # foundation for 7x7 image
     gen          = layers.Dense(128 * 7 * 7)(input_latent)
     gen          = layers.LeakyReLU(alpha=0.2)(gen)
     gen          = layers.Reshape((7, 7, 128))(gen)
     # upsample to 14x14
     gen          = layers.Conv2DTranspose(128, (4,4), strides=(2,2), padding='same')(gen)
     gen          = layers.LeakyReLU(alpha=0.2)(gen)
     # upsample to 28x28
     gen          = layers.Conv2DTranspose(128, (4,4), strides=(2,2), padding='same')(gen)
     gen          = layers.LeakyReLU(alpha=0.2)(gen)
     # output
     out_layer    = layers.Conv2D(1, (7,7), activation='tanh', padding='same')(gen)
     # define model
     model        = Model(input_latent, out_layer)
     return model

# define the combined generator and discriminator model, for updating the generator
def define_gan(g_model, d_model):

     # make weights in the discriminator not trainable
     d_model.trainable = False
     # connect image output from generator as input to discriminator
     gan_output        = d_model(g_model.output)
     # define gan model as taking noise and outputting a classification
     model             = Model(g_model.input, gan_output)
     # compile model
     model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5))
     return model

# select a supervised subset of the dataset, ensures classes are balanced
def select_supervised_samples(dataset, num_samples, num_classes):

    X, y           = dataset
    unique_labels  = np.unique(y, axis=0)
    X_list, y_list = list(), list()
    n_per_class    = int(num_samples / num_classes)

    for i in unique_labels:

        # get all images for this class
        class_data   = [(image, label) for (image, label) in list(zip(X, y)) if (i == label).all()]
        X_with_class, _ = zip(*class_data)
        # choose random instances
        ix           = np.random.randint(0, len(X_with_class), n_per_class)
        # add to list
        [X_list.append(X_with_class[j]) for j in ix]
        [y_list.append(i) for j in ix]

    return np.asarray(X_list), np.asarray(y_list)

# select real samples
def generate_real_samples(dataset, num_samples):

    # split into images and labels
    images, labels = dataset
    # choose random instances
    ix             = np.random.randint(0, images.shape[0], num_samples)
    # select images and labels
    X, labels      = images[ix], labels[ix]
    # generate class labels
    y              = np.ones((num_samples, 1))
    return [X, labels], y

# generate points in latent space as input for the generator
def generate_latent_points(latent_dim, num_samples):

    # generate points in the latent space
    z_input = np.random.randn(latent_dim * num_samples)
    # reshape into a batch of inputs for the network
    z_input = z_input.reshape(num_samples, latent_dim)
    return z_input

# use the generator to generate fake examples, with class labels
def generate_fake_samples(generator, latent_dim, num_samples):

    # generate points in latent space
    z_input = generate_latent_points(latent_dim, num_samples)
    # predict outputs
    images  = generator.predict(z_input)
    # create class labels
    y       = np.zeros((num_samples, 1))
    return images, y

# generate samples and save as a plot and save the model
def summarize_performance(step, g_model, c_model, latent_dim, dataset, num_samples):

    # prepare fake examples
    X, _ = generate_fake_samples(g_model, latent_dim, num_samples)
    # scale from [-1,1] to [0,1]
    X    = (X + 1) / 2.0

    # plot images
    for i in range(100):

        plt.subplot(10, 10, 1 + i)
        plt.axis('off')
        plt.imshow(X[i, :, :, 0], cmap='gray_r')

    # save plot to file
    #filename = 'generated_plot_%04d.png' % (step+1)
    #plt.savefig(filename)
    plt.show()
    # save the classifier model
    filename = 'emnist_%04d.h5' % (step+1)
    c_model.save(filename)

    # evaluate the classifier model
    X, y   = dataset
    _, acc = c_model.evaluate(X, y, verbose=0)
    print('Classifier Accuracy: %.3f%%' % (acc * 100))

# train the generator and discriminator
def train(g_model, d_model, c_model, gan_model, dataset, latent_dim, num_samples, num_classes, num_epochs, num_batch):

    # select supervised dataset
    X_sup, y_sup = select_supervised_samples(dataset, num_samples, num_classes)
    # calculate the number of batches per training epoch
    bat_per_epo  = int(dataset[0].shape[0] / num_batch)
     # calculate the number of training iterations
    n_steps      = bat_per_epo * num_epochs
    # calculate the size of half a batch of samples
    half_batch   = int(num_batch / 2)
    print('num_epochs=%d, num_batch=%d, 1/2=%d, b/e=%d, steps=%d' % (num_epochs, num_batch, half_batch, bat_per_epo, n_steps))

    for i in range(n_steps):

         # update supervised discriminator (c)
        [Xsup_real, ysup_real], _ = generate_real_samples([X_sup, y_sup], half_batch)
        c_loss, c_acc             = c_model.train_on_batch(Xsup_real, ysup_real)
        # update unsupervised discriminator (d)
        [X_real, _], y_real       = generate_real_samples(dataset, half_batch)
        d_loss1                   = d_model.train_on_batch(X_real, y_real)
        X_fake, y_fake            = generate_fake_samples(g_model, latent_dim, half_batch)
        d_loss2                   = d_model.train_on_batch(X_fake, y_fake)
        # update generator (g)
        X_gan, y_gan              = generate_latent_points(latent_dim, num_batch), np.ones((num_batch, 1))
        g_loss                    = gan_model.train_on_batch(X_gan, y_gan)
        # summarize loss on this batch
        print('>%d, c[%.3f,%.0f], d[%.3f,%.3f], g[%.3f]' % (i+1, c_loss, c_acc*100, d_loss1, d_loss2, g_loss))

        # evaluate the model performance every so often
        if (i+1) % bat_per_epo == 0:

            summarize_performance(i, g_model, c_model, latent_dim, dataset, num_samples)

# define initial parameters
num_classes            = 47
# number of sub clients to be created from each non-iid class
num_intraclass_clients = 1
num_clients            = num_classes * num_intraclass_clients
input_shape            = (28, 28, 1)
batch_size             = 128
# non-iid severity, 1 means each client will only have one class of data
severity               = 1
comms_round            = 10
clientDataPercent      = 5
# size of the latent space
latent_dim             = 128
num_samples            = 1000
num_epochs             = 20
num_batch              = 100
accuracy               = list()
loss                   = list()
clientsData            = list()

# read dataset from csv files and reshape it to the input_shape
dateset_train = pd.read_csv('emnist-balanced-train.csv', header=None)
dateset_train = dateset_train.to_numpy()
x_train_temp  = dateset_train[:,1:785]

x_train       = np.zeros(shape=(112800, 28, 28), dtype=np.uint8)
for idx, n in enumerate(x_train_temp):
    x_train[idx] = np.reshape(n, (28, 28))

y_train       = dateset_train[:,0]

dateset_test  = pd.read_csv('emnist-balanced-test.csv', header=None)
dateset_test  = dateset_test.to_numpy()
x_test_temp   = dateset_test[:,1:785]

x_test        = np.zeros(shape=(18800, 28, 28), dtype=np.uint8)
for idx, n in enumerate(x_test_temp):
    x_test[idx] = np.reshape(n, (28, 28))

y_test        = dateset_test[:,0]

# scale dataset from [0,255] to [-1,1]
x_train = x_train.astype("float32")
x_test  = x_test.astype("float32")
x_train = (x_train - 127.5) / 127.5
x_test  = (x_test - 127.5) / 127.5
# expand to 3d, add channels
x_train = np.expand_dims(x_train, -1)
x_test  = np.expand_dims(x_test, -1)
# change labels to categorical format
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test  = keras.utils.to_categorical(y_test, num_classes)

# create non-iid clients for simulation
clients = non_iid_x(x_train, y_train, severity, num_intraclass_clients)

# collect a percent of clients data
for (client_name, data) in clients.items():

    np.random.shuffle(data)
    data_temp                = data[:int(len(data) * clientDataPercent / 100)]
    x_client, y_client       = list(zip(*data_temp))
    x_client                 = np.asarray(x_client)
    y_client                 = np.asarray(y_client)
    clientgen                = ImageDataGenerator(rotation_range=5, width_shift_range=0.02, height_shift_range=0.02, shear_range=0.1, zoom_range=0.1, fill_mode='nearest')
    clientgen.fit(x_client)
    g                        = clientgen.flow(x_client, y_client, batch_size=len(x_client), shuffle=True)
    x_redundant, y_redundant = next(g)
    clientsData.extend(list(zip(x_redundant, y_redundant)))
    np.random.shuffle(clientsData)

np.random.shuffle(clientsData)
x_clients, y_clients = list(zip(*clientsData))
x_clients            = np.asarray(x_clients)
y_clients            = np.asarray(y_clients)

# augment the collected data from clients
datagen              = ImageDataGenerator(rotation_range=10, width_shift_range=0.05, height_shift_range=0.05, shear_range=0.2, zoom_range=0.2, fill_mode='nearest')
datagen.fit(x_clients)
g                    = datagen.flow(x_clients, y_clients, batch_size=len(x_clients), shuffle=True)
# create a new dataset based on augmented data
x_created, y_created = next(g)
last_step            = int(((len(x_clients) * 100) / clientDataPercent) / len(x_clients)) - 1
current_step         = 1

while current_step <= last_step:

    x_temp, y_temp = next(g)
    x_created = np.concatenate((x_created, x_temp))
    y_created = np.concatenate((y_created, y_temp))
    current_step += 1

# create the discriminator models
d_model, c_model = define_discriminator(input_shape, num_classes)
# create the generator
g_model          = define_generator(latent_dim)
# create the gan
gan_model        = define_gan(g_model, d_model)
# train model with the created dataset
train(g_model, d_model, c_model, gan_model, [x_created, y_created], latent_dim, num_samples, num_classes, num_epochs, num_batch)

# evaluate the model
_, train_acc = c_model.evaluate(x_train, y_train, verbose=0)
print('Train Accuracy: %.3f%%' % (train_acc * 100))
_, test_acc  = c_model.evaluate(x_test, y_test, verbose=0)
print('Test Accuracy: %.3f%%' % (test_acc * 100))

# start Flower simulation
fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=num_clients,
    client_resources={"num_cpus": 2},
    config=fl.server.ServerConfig(num_rounds=comms_round),
    strategy=SaveModelStrategy()
)

# plot accuracy and loss of each round
plt.suptitle('Accuracy Comparison')
plt.scatter(range(1, comms_round+1), accuracy, c=["red"])
plt.show()
plt.suptitle('Loss Comparison')
plt.scatter(range(1, comms_round+1), loss, c=["red"])
plt.show()