In [None]:
# Uncomment if running on googlecolab 
# !pip install hickle
# from google.colab import drive
# drive.mount('/content/drive/')
# %cd drive/MyDrive/PerCom2021-FL-master/

In [None]:
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.layers import Conv1D, Conv2D, MaxPool1D
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras import Sequential
import tensorflow as tf
import csv
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.utils import shuffle
from sklearn.utils import class_weight
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import os
import hickle as hkl 
import copy
from scipy.spatial import distance_matrix
import sys
import random
import math

In [None]:
# which GPU to use
# "-1,0,1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# DNN,CNN
modelType = "CNN"

# algorithm = "FEDAVG,FEDPER"
algorithm = "FEDAVG"

# UCI,REALWORLD_CLIENT
dataSetName = 'REALWORLD_CLIENT'

#BALANCED, UNBALANCED
dataConfig = "BALANCED"

#ADAM, SGD
optimizer = "SGD"

# Have model evaluate on the Global testset
ClientAllTest = True

# Neuron distance measurement 
euclid = True

# Asynchronous client test
asyncTest = False

# if 0, uses 33% as starting pool
startingTrainPool = 0

clientDeleteCount = 3
clientAddCount = 5

# only needed to set if clientAddCount = clientDeleteCount, otherwise it follows a small algorithm to calculate automatically
asyncInterval = 5

# Generate results in seperate graph
seperateGraph = False

# Save the client models a .h5 file
savedClientModel = 0

# Show training verbose: 0,1
showTrainVerbose = 0

# input window size 
segment_size = 128

# input channel count
num_input_channels = 6

# client learning rate
learningRate = 0.01

# model drop out rate
dropout_rate = 0.5

# local epoch
localEpoch = 5

# communication round
communicationRound = 10

# Seed for data partioning and TF training
randomSeed = 1


In [None]:
# specifying activities and where the results will be stored 
if(dataSetName == 'UCI'):
    ACTIVITY_LABEL = ['WALKING', 'WALKING_UPSTAIRS','WALKING_DOWNSTAIRS', 'SITTING', 'STANDING', 'LAYING']
else:
    ACTIVITY_LABEL = ['climbingdown', 'climbingup', 'jumping','lying', 'running', 'sitting', 'standing', 'walking']
activityCount = len(ACTIVITY_LABEL)

architectureType = str(algorithm)+'_'+'LR_'+str(localEpoch)+'LE_'+str(communicationRound)+'CR_'+str(modelType)

mainDir = ''
filepath = mainDir + 'savedModels/'+architectureType+'/'+dataSetName+'/'
os.makedirs(filepath, exist_ok=True)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
if(dataSetName=='UCI'):
    clientCount = 5
else:
    clientCount = 15
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
np.random.seed(randomSeed)
tf.random.set_seed(randomSeed)

In [None]:
# Initializing data variables

clientDataTrain = []
clientLabelTrain = []
clientDataTest = []
clientLabelTest = []

centralTrainData = []
centralTrainLabel = []

centralTestData = []
centralTestLabel = []

In [None]:
# loading datasets
if(dataSetName == "UCI"):

    def load_file(filepath):
        dataframe = pd.read_csv(filepath, header=None)
        return dataframe.values


    def load_group(filenames, prefix=''):
        loaded = list()
        for name in filenames:
            data = load_file(prefix + name)
            loaded.append(data)
        loaded = np.dstack(loaded)
        return loaded


    def load_dataset(group, prefix=''):
        filepath = mainDir + 'datasetStandardized/'+prefix + '/' + group + '/'
        filenames = list()
        filenames += ['AccX'+prefix+'.csv', 'AccY' +
                      prefix+'.csv', 'AccZ'+prefix+'.csv']
        filenames += ['GyroX'+prefix+'.csv', 'GyroY' +
                      prefix+'.csv', 'GyroZ'+prefix+'.csv']
        X = load_group(filenames, filepath)
        y = load_file(mainDir + 'datasetStandardized/'+prefix +
                      '/' + group + '/Label'+prefix+'.csv')
        return X, y
    trainData, trainLabel = load_dataset('train', dataSetName)
    evalData, evalLabel = load_dataset('eval', dataSetName)
    allData = np.float32(np.vstack((trainData, evalData)))
    allLabel = np.vstack((trainLabel, evalLabel))

    # split data into 80 - 20 
    skf = StratifiedKFold(n_splits=5,shuffle = True)
    skf.get_n_splits(allData, allLabel)
    partitionedData = list()
    partitionedLabel = list()
    for train_index, test_index in skf.split(allData, allLabel):
        partitionedData.append(allData[test_index])
        partitionedLabel.append(allLabel[test_index])

    centralTrainData = np.vstack((partitionedData[:4]))
    centralTrainLabel = np.vstack((partitionedLabel[:4]))
    centralTestData = partitionedData[4]
    centralTestLabel = partitionedLabel[4]

    trainData = list()
    trainLabel = list()
    testData = list()
    testLabel = list()

    if(dataConfig == "BALANCED"):
        skf = StratifiedKFold(n_splits=clientCount,shuffle = True , random_state = randomSeed)
        skf.get_n_splits(centralTrainData, centralTrainLabel)
        for train_index, test_index in skf.split(centralTrainData, centralTrainLabel):
            trainData.append(centralTrainData[test_index])
            trainLabel.append(centralTrainLabel[test_index].ravel())
    else:
    # unbalanced
        kf = KFold(n_splits=clientCount, shuffle=True,random_state = randomSeed)
        kf.get_n_splits(centralTrainData)
        for train_index, test_index in kf.split(centralTrainData):
            trainData.append(centralTrainData[test_index])
            trainLabel.append(centralTrainLabel[test_index].ravel())

    #slittestSetInto5
    skf.get_n_splits(centralTestData, centralTestLabel)
    for train_index, test_index in skf.split(centralTestData, centralTestLabel):
        testData.append(centralTestData[test_index])
        testLabel.append(centralTestLabel[test_index].ravel())

    clientDataTrain = trainData
    clientLabelTrain = trainLabel
    clientDataTest = testData
    clientLabelTest = testLabel
    
    centralTrainData = (np.vstack((clientDataTrain)))
    centralTrainLabel = (np.hstack((clientLabelTrain)))

    centralTestData = (np.vstack((clientDataTest)))
    centralTestLabel = (np.hstack((clientLabelTest)))
else:
    clientData = []
    clientLabel = []

    dataSetName = 'REALWORLD_CLIENT'
    for i in range(0,15):
        accX = hkl.load('datasetStandardized/'+dataSetName+'/'+str(i)+'/AccX'+dataSetName+'.hkl')
        accY = hkl.load('datasetStandardized/'+dataSetName+'/'+str(i)+'/AccY'+dataSetName+'.hkl')
        accZ = hkl.load('datasetStandardized/'+dataSetName+'/'+str(i)+'/AccZ'+dataSetName+'.hkl')
        gyroX = hkl.load('datasetStandardized/'+dataSetName+'/'+str(i)+'/GyroX'+dataSetName+'.hkl')
        gyroY = hkl.load('datasetStandardized/'+dataSetName+'/'+str(i)+'/GyroY'+dataSetName+'.hkl')
        gyroZ = hkl.load('datasetStandardized/'+dataSetName+'/'+str(i)+'/GyroZ'+dataSetName+'.hkl')
        label = hkl.load('datasetStandardized/'+dataSetName+'/'+str(i)+'/Label'+dataSetName+'.hkl')
        clientData.append(np.dstack((accX,accY,accZ,gyroX,gyroY,gyroZ)))
        clientLabel.append(label)
    
    if(dataConfig == "BALANCED"):
        for i in range (0,15):
            skf = StratifiedKFold(n_splits=5, shuffle=True,random_state = randomSeed)
            skf.get_n_splits(clientData[i], clientLabel[i])
            partitionedData = list()
            partitionedLabel = list()    
            for train_index, test_index in skf.split(clientData[i], clientLabel[i]):
                partitionedData.append(clientData[i][test_index])
                partitionedLabel.append(clientLabel[i][test_index])
            clientDataTrain.append((np.vstack((partitionedData[:4]))))
            clientLabelTrain.append((np.hstack((partitionedLabel[:4]))))
            clientDataTest.append((partitionedData[4]))
            clientLabelTest.append((partitionedLabel[4]))
    else:
        for i in range (0,15):
            kf = KFold(n_splits=5, shuffle=True,random_state = randomSeed)
            kf.get_n_splits(clientData[i])
            partitionedData = list()
            partitionedLabel = list()    
            for train_index, test_index in kf.split(clientData[i]):
                partitionedData.append(clientData[i][test_index])
                partitionedLabel.append(clientLabel[i][test_index])
            clientDataTrain.append((np.vstack((partitionedData[:4]))))
            clientLabelTrain.append((np.hstack((partitionedLabel[:4]))))
            clientDataTest.append((partitionedData[4]))
            clientLabelTest.append((partitionedLabel[4]))
    centralTrainData = (np.vstack((clientDataTrain)))
    centralTrainLabel = (np.hstack((clientLabelTrain)))

    centralTestData = (np.vstack((clientDataTest)))
    centralTestLabel = (np.hstack((clientLabelTest)))

In [None]:
# initializing CNN model
if(modelType == "CNN"):
    def create_keras_model():
        return Sequential([
            Conv1D(196,  16, input_shape=(segment_size,num_input_channels), activation='relu', padding='same',name = 'base1'),
            MaxPool1D(pool_size=4, padding='same',name = 'base2'),
            Flatten(name = 'flatten'),
            Dense(units=1024, activation='relu'),
            Dropout(dropout_rate),
            Dense(activityCount, activation='softmax')
    ])
    serverModel = Sequential()
    serverModel.add(Conv1D(196,  16, activation='relu',name = 'base1',input_shape=(segment_size,num_input_channels), padding='same'))
    serverModel.add(MaxPool1D(pool_size=4, padding='same',name = 'base2'))
    serverModel.add(Flatten(name = 'flatten'))
    serverModel.add(Dense(1024, activation = 'relu'))
    serverModel.add(Dropout(dropout_rate))
    serverModel.add(Dense(activityCount, activation='softmax'))

In [None]:
# initializing DNN model
if(modelType == "DNN"):
    def create_keras_model():
        return Sequential([
            Flatten(input_shape=(segment_size,num_input_channels), name = 'flatten'),
            Dense(units=400, activation='relu',name = 'base'),
            Dropout(dropout_rate),
            Dense(units=100, activation='relu',name = 'personalized'),
            Dropout(dropout_rate),
            Dense(activityCount, activation='softmax', name ='clientSoft')
        ])
    # initialize server
    serverModel = Sequential()
    serverModel.add(Flatten(input_shape=(segment_size,num_input_channels), name = 'flatten' ))
    serverModel.add(Dense(400, activation = 'relu', name='base'))
    serverModel.add(Dropout(dropout_rate))
    serverModel.add(Dense(100, activation = 'relu',name='serverP'))
    serverModel.add(Dropout(dropout_rate))
    serverModel.add(Dense(activityCount, activation='softmax',name='serverSoft'))


In [None]:
# compiling the server model
if(optimizer == "SGD"):
    serverModel.compile(optimizer=SGD(learning_rate=learningRate),loss='sparse_categorical_crossentropy', metrics=['acc'])
else:
    serverModel.compile(optimizer=Adam(learning_rate=learningRate),loss='sparse_categorical_crossentropy', metrics=['acc'])


serverModel.save_weights(filepath+'serverWeights.h5')
weights = serverModel.get_weights()

In [None]:
# initializing client model
local_nets = {}
local_histories = {}

for i in range(0,clientCount):
    local_nets[i] = create_keras_model()

In [None]:
# Initialization of metrics during training
# client models test againts own test-set
trainLossHistory = []
trainAccHistory = []
testLossHistory = []
testAccHistory = []

stdTrainLossHistory = []
stdTrainAccHistory = []
stdTestLossHistory = []
stdTestAccHistory = []

# client models test againts all test-set

clientTrainLossHistory = []
clientTrainAccHistory = []
clientTestLossHistory = []
clientTestAccHistory = []

clientStdTrainLossHistory = []
clientStdTrainAccHistory = []
clientStdTestLossHistory = []
clientStdTestAccHistory = []


# server test againts all test-set

serverTrainLossHistory = []
serverTrainAccHistory = []
serverTestLossHistory = []
serverTestAccHistory = []

meanHistoryDist = []
stdHistoryDist = []

meanRoundLayerHistory = []
stdRoundLayerHistory = []

meanRoundGeneralLayerHistory = []
stdRoundGeneralLayerHistory = []

bestModelRound = 0
currentAccuracy = 0.0
serverCurrentAccuracy = 0.0
serverbestModelRound = 0
bestServerModel = None
bestServerModelWeights = None
best_local_nets = {}
best_local_weights = {}

stage = 1
clientParticipant = clientCount

In [None]:
# Generates an array to represent model type per layer
layerType = []
for idx in range(len(serverModel.layers)):
    temp = serverModel.get_layer(index = idx).__class__.__name__
    if("Conv" in temp):
        layerType.append(0)
    elif("Dense" in temp):
        layerType.append(1)

In [None]:
# reshaping model weights for 2d matrix multiplications
def computeWeights(modelWeight):
    modelWeight = np.asarray(modelWeight)
    modelWeightsPrep = []
    for i in range(int(modelWeight.shape[0]/ 2)):
    #   conv
        if(layerType[i] == 0):
            kernelSize = modelWeight[i*2].shape[0]
            inputChannel = modelWeight[i*2].shape[1]
            outputChannel = modelWeight[i*2].shape[2]
            weightReshaped = modelWeight[i*2].reshape(kernelSize*inputChannel,outputChannel).T
            biasReshaped = modelWeight[i*2+1].reshape(-1,1)
            modelWeightsPrep.append(np.hstack((weightReshaped,biasReshaped)))
    #    dense
        if(layerType[i] == 1):
            weightReshaped = modelWeight[i*2].T
            biasReshaped = modelWeight[i*2+1].reshape(-1,1)
            modelWeightsPrep.append(np.hstack((weightReshaped,biasReshaped)))
    modelWeightsPrep = np.asarray(modelWeightsPrep)
    return modelWeightsPrep

In [None]:
# calculating dataset size weight per client
local_coeffs = {}
for i in range(0,clientCount):
    local_coeffs[i] = np.float32(len(clientLabelTrain[i])) / np.float32(len(centralTrainLabel))

In [None]:
# calculating activities weight for weighted training per client
local_class_weights = {}
for i in range(0,clientCount):
    temp_weights = class_weight.compute_class_weight('balanced',
                                                     np.unique(clientLabelTrain[i]),
                                                     clientLabelTrain[i].ravel())
    local_class_weights[i] = {j : temp_weights[j] for j in range(len(temp_weights))}
    

In [None]:
# initialization for asynchronous client training, client selection
roundEnd = []
if(asyncTest):
    trainPool = []
    idlePool = []
    
    if(startingTrainPool == 0):
        initialClientCount = int(clientCount *0.34) 
        if(initialClientCount == 1):
            initialClientCount = 2
    else:
        initialClientCount = startingTrainPool
       
    trainPool = list(range(initialClientCount))
    idlePool = list(range(initialClientCount,clientCount))
    
    if(clientDeleteCount != clientAddCount):
        stages = math.ceil((clientCount  - len(trainPool)) / (clientAddCount - clientDeleteCount))
        intervals = int(communicationRound / (stages * 2))
    else:
        intervals = asyncInterval
        stages = int(communicationRound / intervals)
    for clientChangeRound in range(1,stages+1):
        roundEnd.append(intervals * clientChangeRound) 
else:
    trainPool = range(clientCount)

In [None]:
# initialization of client distance
clientEuclidDistMean = {}
clientEuclidDistStd = {}
for i in range(clientCount):
    clientEuclidDistMean[i] = np.zeros(communicationRound)
    clientEuclidDistStd[i] = np.zeros(communicationRound)

In [None]:
# Federated learning training
for roundNum in range(0,communicationRound):
    start_time = time.time()
    trainAcc = []
    trainLoss = []
    
    testAcc = []
    testLoss = []
    
    clientTrainAcc = []
    clientTrainLoss = []

    clientTestAcc = []
    clientTestLoss = []
    
    local_weights = {}
    
    if(asyncTest):
        if(roundNum in roundEnd):
            for i in range(clientDeleteCount):
                if(len(trainPool) != 0):
                    selection = random.choice(list(enumerate(trainPool)))
                    del trainPool[selection[0]]
                    idlePool.append(selection[1])
            for i in range(clientAddCount):
                if(len(idlePool) != 0):
                    selection = random.choice(list(enumerate(idlePool)))
                    del idlePool[selection[0]]
                    trainPool.append(selection[1])

        participantDataInstance = []
        for index,i in enumerate(trainPool):
            participantDataInstance.append(clientLabelTrain[i])       
        participantDataInstance = (np.hstack((participantDataInstance)))
        local_coeffs = {}
        for index, i in enumerate(trainPool):
            local_coeffs[i] = np.float32(len(clientLabelTrain[i])) / np.float32(len(participantDataInstance))
    for index,i in enumerate(trainPool):
        print("Status: Round #"+ str(roundNum)+ " Client #"+ str(i))

        if(algorithm=="FEDPER"):
            local_nets[i].load_weights(filepath+'serverWeights.h5',by_name=True)
        else:
            local_nets[i].load_weights(filepath+'serverWeights.h5',by_name=False)
        if(optimizer == "SGD"):
            local_nets[i].compile(optimizer=SGD(learning_rate=learningRate),loss='sparse_categorical_crossentropy', metrics=['acc'])
        else:
            local_nets[i].compile(optimizer=Adam(learning_rate=learningRate),loss='sparse_categorical_crossentropy', metrics=['acc'])
        local_histories[i] = local_nets[i].fit(clientDataTrain[i], clientLabelTrain[i], class_weight=local_class_weights[i], epochs = localEpoch,verbose=showTrainVerbose)


        local_weights[i] = local_nets[i].get_weights()
        trainAcc.append(local_histories[i].history['acc'])
        trainLoss.append(local_histories[i].history['loss'])
        testModelMetrics = local_nets[i].evaluate(clientDataTest[i], clientLabelTest[i],verbose = showTrainVerbose)
        testAcc.append(testModelMetrics[1])
        testLoss.append(testModelMetrics[0])
        
        
        if(ClientAllTest == True):
            clientTrainModelMetrics = local_nets[i].evaluate(centralTrainData, centralTrainLabel, verbose=showTrainVerbose)
            clientTrainAcc.append(clientTrainModelMetrics[1])
            clientTrainLoss.append(clientTrainModelMetrics[0])

            clientTestModelMetrics = local_nets[i].evaluate(centralTestData, centralTestLabel,verbose = showTrainVerbose)
            clientTestAcc.append(clientTestModelMetrics[1])
            clientTestLoss.append(clientTestModelMetrics[0])
    
        for j in range(0,len(local_weights[i])):
            local_weights[i][j] = local_weights[i][j] * local_coeffs[i]
            
        
        
        
    if(euclid):
        meanServerClient = []
        stdServerClient = []
        serverShape = np.asarray(computeWeights(serverModel.get_weights()))
        localMeanClientLayer = []
        localStdClientLayer = []
        for index, clientIndex in enumerate(trainPool):
            localMeanServerClient = []
            localStdServerClient = []

            localShape = np.asarray(computeWeights(local_nets[clientIndex].get_weights()))
            if(algorithm != 'FEDPER'):
                for i in range(serverShape.shape[0]):
                    newLayerDist = np.sqrt((serverShape[i] - localShape[i])**2)
                    localMeanServerClient.append(np.mean(newLayerDist))
                    localStdServerClient.append(np.std(newLayerDist))
            else:
                newLayerDist = np.sqrt((serverShape[0] - localShape[0])**2)
                localMeanServerClient.append(np.mean(newLayerDist))
                localStdServerClient.append(np.std(newLayerDist))
                
            localMeanClientLayer.append(localMeanServerClient)
            localStdClientLayer.append(localStdServerClient)
            meanServerClient.append(np.mean(localMeanServerClient))
            stdServerClient.append(np.mean(localStdServerClient))
            clientEuclidDistMean[clientIndex][roundNum] = np.mean(localMeanServerClient)
            clientEuclidDistStd[clientIndex][roundNum] = np.mean(localStdServerClient)
            
#         15 clients 
        meanHistoryDist.append(np.asarray(meanServerClient))
        stdHistoryDist.append(np.asarray(stdServerClient))

#         per layer distance
        meanRoundLayerHistory.append(np.mean(localMeanClientLayer,axis = 0))
        stdRoundLayerHistory.append(np.mean(localStdClientLayer,axis=0))
        
#         all layer distance
        meanRoundGeneralLayerHistory.append(np.mean(localMeanClientLayer))
        stdRoundGeneralLayerHistory.append(np.mean(localStdClientLayer))
        

    trainAccHistory.append(np.mean(trainAcc))
    stdTrainAccHistory.append(np.std(trainAcc))
    trainLossHistory.append(np.mean(trainLoss))
    stdTrainLossHistory.append(np.std(trainLoss))

    
    meanTestAcc = np.mean(testAcc)
    
    testAccHistory.append(meanTestAcc)
    stdTestAccHistory.append(np.std(testAcc))
    testLossHistory.append(np.mean(testLoss))
    stdTestLossHistory.append(np.std(testLoss))
    
    
    if(meanTestAcc > currentAccuracy):
        for index,net in enumerate(local_nets):
            best_local_nets[index] = copy.copy(local_nets[index])
        currentAccuracy = meanTestAcc
        bestModelRound = roundNum + 1
    
    
    if(ClientAllTest == True):
        clientTrainLossHistory.append(np.mean(clientTrainLoss))
        clientTrainAccHistory.append(np.mean(clientTrainAcc))
        clientTestLossHistory.append(np.mean(clientTestLoss))
        clientTestAccHistory.append(np.mean(clientTestAcc))

        clientStdTrainLossHistory.append(np.std(clientTrainLoss))
        clientStdTrainAccHistory.append(np.std(clientTrainAcc))
        clientStdTestLossHistory.append(np.std(clientTestLoss))
        clientStdTestAccHistory.append(np.std(clientTestAcc))

    # return weights to server and sum all the model weights 
    
    
    weights = []
    for i in local_weights:
        weights.append(local_weights[i])
    new_weights = list()
    for weights_list_tuple in zip(*weights):
        new_weights.append(np.asarray(
            [np.array(weights_).sum(axis=0)\
                for weights_ in zip(*weights_list_tuple)]))
    
    serverModel.set_weights(np.asarray(new_weights))
    serverModel.save_weights(filepath+'serverWeights.h5')
    if(algorithm != 'FEDPER'):
        serverTrainMetrics = serverModel.evaluate(centralTrainData, centralTrainLabel,verbose = showTrainVerbose)
        serverTrainLossHistory.append(serverTrainMetrics[0])
        serverTrainAccHistory.append(serverTrainMetrics[1])
        serverTestMetrics = serverModel.evaluate(centralTestData, centralTestLabel,verbose = showTrainVerbose)
        serverTestLossHistory.append(serverTestMetrics[0])
        serverTestAccHistory.append(serverTestMetrics[1])
        if(serverTestMetrics[1]>serverCurrentAccuracy):
            serverCurrentAccuracy = serverTestMetrics[1]
            serverbestModelRound = roundNum + 1
            bestServerModel = copy.copy(serverModel)
endTime = time.time() - start_time 

In [None]:
# Shifting round end for the asyncronous test 
for index in range(len(roundEnd)):
    roundEnd[index] += 1

In [None]:
# convert datatypes to a np formats
# std of all clients
stdTrainLossHistory = np.asarray(stdTrainLossHistory)
stdTrainAccHistory = np.asarray(stdTrainAccHistory)
stdTestLossHistory = np.asarray(stdTestLossHistory)
stdTestAccHistory = np.asarray(stdTestAccHistory)


clientStdTrainLossHistory = np.asarray(clientStdTrainLossHistory)
clientStdTrainAccHistory = np.asarray(clientStdTrainAccHistory)
clientStdTestLossHistory = np.asarray(clientStdTestLossHistory)
clientStdTestAccHistory = np.asarray(clientStdTestAccHistory)


if(euclid):
    meanHistoryDist = np.asarray(meanHistoryDist).T
    stdHistoryDist = np.asarray(stdHistoryDist).T
    meanRoundLayerHistory = np.asarray(meanRoundLayerHistory).T
    stdRoundLayerHistory = np.asarray(stdRoundLayerHistory).T
    meanRoundGeneralLayerHistory = np.asarray(meanRoundGeneralLayerHistory)
    stdRoundGeneralLayerHistory = np.asarray(stdRoundGeneralLayerHistory)
# mean
trainLossHistory = np.asarray(trainLossHistory)
trainAccHistory = np.asarray(trainAccHistory)
testLossHistory = np.asarray(testLossHistory)
testAccHistory = np.asarray(testAccHistory)

clientTrainLossHistory = np.asarray(clientTrainLossHistory)
clientTrainAccHistory = np.asarray(clientTrainAccHistory)
clientTestLossHistory = np.asarray(clientTestLossHistory)
clientTestAccHistory = np.asarray(clientTestAccHistory)


if(algorithm != 'FEDPER'):
    serverTrainLossHistory = np.asarray(serverTrainLossHistory)
    serverTrainAccHistory = np.asarray(serverTrainAccHistory)
    serverTestLossHistory = np.asarray(serverTestLossHistory)
    serverTestAccHistory = np.asarray(serverTestAccHistory)

In [None]:
# Saving the training statistics and results
os.makedirs(filepath+'trainingStats', exist_ok=True)

hkl.dump(trainLossHistory,filepath + "trainingStats/trainLossHistory.hkl" )
hkl.dump(trainAccHistory,filepath + "trainingStats/trainAccHistory.hkl" )
hkl.dump(stdTrainLossHistory,filepath + "trainingStats/stdTrainLossHistory.hkl" )
hkl.dump(stdTrainAccHistory,filepath + "trainingStats/stdTrainAccHistory.hkl" )

hkl.dump(testLossHistory,filepath + "trainingStats/testLossHistory.hkl" )
hkl.dump(testAccHistory,filepath + "trainingStats/testAccHistory.hkl" )
hkl.dump(stdTestLossHistory,filepath + "trainingStats/stdTestLossHistory.hkl" )
hkl.dump(stdTestAccHistory,filepath + "trainingStats/stdTestAccHistory.hkl" )

    
if(euclid):
    hkl.dump(meanHistoryDist.tolist(),filepath + "trainingStats/meanHistoryDist.hkl" )
    hkl.dump(stdHistoryDist.tolist(),filepath + "trainingStats/stdHistoryDist.hkl" )
    hkl.dump(meanRoundLayerHistory,filepath + "trainingStats/meanRoundLayerHistory.hkl" )
    hkl.dump(stdRoundLayerHistory,filepath + "trainingStats/stdRoundLayerHistory.hkl" )
    hkl.dump(meanRoundGeneralLayerHistory,filepath + "trainingStats/meanRoundGeneralLayerHistory.hkl" )
    hkl.dump(stdRoundGeneralLayerHistory,filepath + "trainingStats/stdRoundGeneralLayerHistory.hkl" )
    
if(ClientAllTest == True):
    hkl.dump(clientStdTrainLossHistory,filepath + "trainingStats/clientStdTrainLossHistory.hkl" )
    hkl.dump(clientStdTrainAccHistory,filepath + "trainingStats/clientStdTrainAccHistory.hkl" )
    hkl.dump(clientStdTestLossHistory,filepath + "trainingStats/clientStdTestLossHistory.hkl" )
    hkl.dump(clientStdTestAccHistory,filepath + "trainingStats/clientStdTestAccHistory.hkl" )

    hkl.dump(clientTrainLossHistory,filepath + "trainingStats/clientTrainLossHistory.hkl" )
    hkl.dump(clientTrainAccHistory,filepath + "trainingStats/clientTrainAccHistory.hkl" )
    hkl.dump(clientTestLossHistory,filepath + "trainingStats/clientTestLossHistory.hkl" )
    hkl.dump(clientTestAccHistory,filepath + "trainingStats/clientTestAccHistory.hkl" )

if(algorithm != 'FEDPER'):
    hkl.dump(serverTrainLossHistory,filepath + "trainingStats/serverTrainLossHistory.hkl" )
    hkl.dump(serverTrainAccHistory,filepath + "trainingStats/serverTrainAccHistory.hkl" )
    hkl.dump(serverTestLossHistory,filepath + "trainingStats/serverTestLossHistory.hkl" )
    hkl.dump(serverTestAccHistory,filepath + "trainingStats/serverTestAccHistory.hkl" )

In [None]:
# generate line chart function
def saveGraph(title = "",accuracyOrLoss = "Accuracy",asyTest = False,legendLoc = 'lower right'):
    if(asyTest):
        for stage in range(len(roundEnd)):
            plt.axvline(roundEnd[stage], 0, 1,color ="blue")
    plt.title(title)
    plt.ylabel(accuracyOrLoss)
    plt.xlabel('Communication Round')
    plt.legend(loc=legendLoc)
    plt.savefig(filepath+title.replace(" ", "")+'.png', dpi=100)
    plt.clf()


In [None]:
# Plotting results
epoch_range = range(1, communicationRound+1)
if(seperateGraph):
    if(algorithm != "FEDPER"):
        plt.plot(epoch_range, serverTrainAccHistory, label = 'Server Train')
        plt.plot(epoch_range, serverTestAccHistory, label= 'Server Test')
        plt.plot(epoch_range, serverTrainAccHistory,markevery=[np.argmax(serverTrainAccHistory)], ls="", marker="o",color="blue")
        plt.plot(epoch_range, serverTestAccHistory,markevery=[np.argmax(serverTestAccHistory)], ls="", marker="o",color="orange")
        saveGraph("Server accuracy","Accuracy",asyTest = asyncTest)
        
    plt.errorbar(epoch_range, trainAccHistory, yerr=stdTrainAccHistory, label='Client Own Train',alpha=0.6)
    plt.errorbar(epoch_range, testAccHistory, yerr=stdTestAccHistory, label='Client Own Test',alpha=0.6)
    plt.plot(epoch_range, trainAccHistory,markevery=[np.argmax(trainAccHistory)], ls="", marker="o",color="green")
    plt.plot(epoch_range, testAccHistory,markevery=[np.argmax(testAccHistory)], ls="", marker="o",color="red")  
    saveGraph("Client own accuracy","Accuracy",asyTest = asyncTest)


    if(ClientAllTest == True):
        plt.errorbar(epoch_range, clientTrainAccHistory, yerr=clientStdTrainAccHistory, label='Client All Train',alpha=0.6)
        plt.errorbar(epoch_range, clientTestAccHistory, yerr=clientStdTestAccHistory, label='Client All Test',alpha=0.6)
        plt.plot(epoch_range, clientTrainAccHistory,markevery=[np.argmax(clientTrainAccHistory)], ls="", marker="o",color="purple")
        plt.plot(epoch_range, clientTestAccHistory,markevery=[np.argmax(clientTestAccHistory)], ls="", marker="o",color="brown")  
        saveGraph("Client all accuracy","Accuracy",asyTest = asyncTest)

    if(algorithm != "FEDPER"):
        plt.plot(epoch_range, serverTrainLossHistory, label = 'Server Train')
        plt.plot(epoch_range, serverTestLossHistory, label= 'Server Test')
        plt.plot(epoch_range, serverTrainLossHistory,markevery=[np.argmax(serverTrainLossHistory)], ls="", marker="o",color="blue")
        plt.plot(epoch_range, serverTestLossHistory,markevery=[np.argmax(serverTestLossHistory)], ls="", marker="o",color="orange") 
        saveGraph("Server loss","Loss",asyTest = asyncTest,legendLoc = 'upper right')


    plt.errorbar(epoch_range, trainLossHistory, yerr=stdTrainLossHistory, label='Client Own Train',alpha=0.6)
    plt.errorbar(epoch_range, testLossHistory, yerr=stdTestLossHistory, label='Client Own Test',alpha=0.6)
    plt.plot(epoch_range, trainLossHistory,markevery=[np.argmax(trainLossHistory)], ls="", marker="o",color="green")
    plt.plot(epoch_range, testLossHistory,markevery=[np.argmax(testLossHistory)], ls="", marker="o",color="red") 
    
    saveGraph("Client own loss","Loss",asyTest = asyncTest,legendLoc = 'upper right')



    if(ClientAllTest == True):
        plt.errorbar(epoch_range, clientTrainLossHistory, yerr=clientStdTrainLossHistory, label='Client All Train',alpha=0.6)
        plt.errorbar(epoch_range, clientTestLossHistory, yerr=clientStdTestLossHistory, label='Client All Test',alpha=0.6)
        plt.plot(epoch_range, clientTrainLossHistory,markevery=[np.argmax(clientTrainLossHistory)], ls="", marker="o",color="purple")
        plt.plot(epoch_range, clientTestLossHistory,markevery=[np.argmax(clientTestLossHistory)], ls="", marker="o",color="brown")  
        saveGraph("Client all loss","Loss",asyTest = asyncTest,legendLoc = 'upper right')
else:
    if(algorithm != "FEDPER"):
        plt.plot(epoch_range, serverTrainAccHistory, label = 'Server Train')
        plt.plot(epoch_range, serverTestAccHistory, label= 'Server Test')
        plt.plot(epoch_range, serverTrainAccHistory,markevery=[np.argmax(serverTrainAccHistory)], ls="", marker="o",color="blue")
        plt.plot(epoch_range, serverTestAccHistory,markevery=[np.argmax(serverTestAccHistory)], ls="", marker="o",color="orange") 

    plt.errorbar(epoch_range, trainAccHistory, yerr=stdTrainAccHistory, label='Client Own Train',alpha=0.6, color= "green")
    plt.errorbar(epoch_range, testAccHistory, yerr=stdTestAccHistory, label='Client Own Test',alpha=0.6, color='red')

    plt.plot(epoch_range, trainAccHistory,markevery=[np.argmax(trainAccHistory)], ls="", marker="o",color="green")
    plt.plot(epoch_range, testAccHistory,markevery=[np.argmax(testAccHistory)], ls="", marker="o",color="red")  

    if(ClientAllTest == True):
        plt.errorbar(epoch_range, clientTrainAccHistory, yerr=clientStdTrainAccHistory, label='Client All Train',alpha=0.6, color="purple")
        plt.errorbar(epoch_range, clientTestAccHistory, yerr=clientStdTestAccHistory, label='Client All Test',alpha=0.6, color="brown")
        plt.plot(epoch_range, clientTrainAccHistory,markevery=[np.argmax(clientTrainAccHistory)], ls="", marker="o",color="purple")
        plt.plot(epoch_range, clientTestAccHistory,markevery=[np.argmax(clientTestAccHistory)], ls="", marker="o",color="brown")  

        
    if(asyncTest):
        for stage in range(len(roundEnd)):
            plt.axvline(roundEnd[stage], 0, 1,color ="blue")
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Communication Round')
    plt.legend(loc='lower right')
    plt.savefig(filepath+'LearningAccuracy.png', dpi=100)
    plt.clf()

    if(algorithm != "FEDPER"):
        plt.plot(epoch_range, serverTrainLossHistory, label = 'Server Train')
        plt.plot(epoch_range, serverTestLossHistory, label= 'Server Test')
        plt.plot(epoch_range, serverTrainLossHistory,markevery=[np.argmin(serverTrainLossHistory)], ls="", marker="o",color="blue")
        plt.plot(epoch_range, serverTestLossHistory,markevery=[np.argmin(serverTestLossHistory)], ls="", marker="o",color="orange") 

    plt.errorbar(epoch_range, trainLossHistory, yerr=stdTrainLossHistory, label='Client Own Train',alpha=0.6, color='green')
    plt.errorbar(epoch_range, testLossHistory, yerr=stdTestLossHistory, label='Client Own Test',alpha=0.6, color='red')
    plt.plot(epoch_range, trainLossHistory,markevery=[np.argmin(trainLossHistory)], ls="", marker="o",color="green")
    plt.plot(epoch_range, testLossHistory,markevery=[np.argmin(testLossHistory)], ls="", marker="o",color="red")  



    if(ClientAllTest == True):
        plt.errorbar(epoch_range, clientTrainLossHistory, yerr=clientStdTrainLossHistory, label='Client All Train',alpha=0.6,color="purple")
        plt.errorbar(epoch_range, clientTestLossHistory, yerr=clientStdTestLossHistory, label='Client All Test',alpha=0.6,color="brown")
        plt.plot(epoch_range, clientTrainLossHistory,markevery=[np.argmin(clientTrainLossHistory)], ls="", marker="o",color="purple")
        plt.plot(epoch_range, clientTestLossHistory,markevery=[np.argmin(clientTestLossHistory)], ls="", marker="o",color="brown")  




    if(asyncTest):
        for stage in range(len(roundEnd)):
            plt.axvline(roundEnd[stage], 0, 1,color ="blue")

    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Communication Round')
    plt.legend(loc= 'upper right')
    plt.savefig(filepath+'LearningLoss.png', dpi=100)
    plt.clf()

In [None]:
# cohenD effect size normalize function
def cohenDNormalize(mean1,mean2,std1,std2):
    numerator = (mean1 - mean2)
    denominater = np.sqrt(((std1**2) + (std2 **2)/2))
    cohenDs = numerator / denominater 
    meanNormalized = mean1 * cohenDs 
    stdNormalized = std1 * cohenDs
    return meanNormalized,stdNormalized


In [None]:
# generating charts and graphs for dissimilarity mesasurements

    
if(euclid):
    roundEndIndex = 0
    for stage in range(len(roundEnd)):
        plt.axvline(roundEnd[stage], 0, 1,color ="blue")
    for i in range(clientCount):
        if(asyncTest):
            for client in range(clientCount):
                maskedIndex = []
                for comRound in range(communicationRound):
                    if(clientEuclidDistMean[client][comRound] == 0):
                        maskedIndex.append(1)
                    else:
                        maskedIndex.append(0)
                meanMask = np.ma.masked_array(clientEuclidDistMean[client], mask=maskedIndex)
                stdMask = np.ma.masked_array(clientEuclidDistStd[client], mask=maskedIndex)
                plt.errorbar(epoch_range, meanMask, yerr=stdMask, label='Client '+str(i+1),alpha=0.6)
        else:
            plt.errorbar(epoch_range, meanHistoryDist[i], yerr=stdHistoryDist[i], label='Client '+str(i+1))
        
    plt.title('Distance between client & server model')
    plt.ylabel('Euclidiance Distance')
    plt.xlabel('Communication Round')
    plt.savefig(filepath+'allClientEuclid.png', dpi=100)
    plt.clf()
    
    
    if(algorithm != "FEDPER"):
        for i in range(len(layerType)):
#             hardcoded
            if(i != 1):
                meanRoundLayerHistory[i],stdRoundLayerHistory[i] = cohenDNormalize(meanRoundLayerHistory[i],meanRoundLayerHistory[1],stdRoundLayerHistory[i],stdRoundLayerHistory[1])
            plt.errorbar(epoch_range, meanRoundLayerHistory[i], yerr=stdRoundLayerHistory[i], label='Layer '+str(i+1),alpha=0.6) 
    else:
        plt.errorbar(epoch_range, meanRoundLayerHistory[0], yerr=stdRoundLayerHistory[0], label='Layer '+str(0+1),alpha=0.6) 
    if(asyncTest):
        for stage in range(len(roundEnd)):
            plt.axvline(roundEnd[stage], 0, 1,color ="blue")
    plt.title('Layer distance between client & server model')
    plt.ylabel('Euclidiance Distance')
    plt.xlabel('Communication Round')
    plt.legend(loc='upper right')
    plt.savefig(filepath+'LayerClientEuclid.png', dpi=100)
    plt.clf()

In [None]:
# Rounding number function 
def roundNumber(toRoundNb):
    return round(np.mean(toRoundNb), 4)

In [None]:
#Generating personalized accuracy
indiAccTest = []
indiWeightedTest = []
indiMicroTest = []
indiMacroTest = []
os.makedirs(filepath+'models/' , exist_ok=True)
for i in range(len(best_local_nets)):
    best_local_nets[i].compile(optimizer=SGD(learning_rate=learningRate),loss='sparse_categorical_crossentropy', metrics=['acc'])
    results = best_local_nets[i].evaluate(clientDataTest[i], clientLabelTest[i])
    y_pred = best_local_nets[i].predict_classes(clientDataTest[i]) 
    y_test = clientLabelTest[i]
    
    _weightVal_f1 = f1_score(y_test, y_pred,average='weighted' )
    _microVal_f1 = f1_score(y_test, y_pred,average='micro' )
    _macroVal_f1 = f1_score(y_test, y_pred,average='macro' )
    indiAccTest.append(results[1])
    indiWeightedTest.append(_weightVal_f1)
    indiMicroTest.append(_microVal_f1)
    indiMacroTest.append(_macroVal_f1)
    if(savedClientModel == 1):
        best_local_nets[i].save(filepath+'models/clientModel'+str(i+1)+'.h5')
    
modelStatistics = {
    "Results on individual client models on their own tests" : '',
    "BestModelRound:": bestModelRound,
    "accuracy:" : roundNumber(np.mean(indiAccTest)),
    "weighted f1:" : roundNumber(np.mean(indiWeightedTest)),
    "micro f1:": roundNumber(np.mean(indiMicroTest)),
    "macro f1:": roundNumber(np.mean(indiMacroTest)),
}    
with open(filepath +'indivualClientsMeasure.csv','w') as f:
    w = csv.writer(f)
    w.writerows(modelStatistics.items())

In [None]:
#Generating generalized accuracy
indiAccTest = []
indiWeightedTest = []
indiMicroTest = []
indiMacroTest = []

for i in range(len(best_local_nets)):
    results = best_local_nets[i].evaluate(centralTestData, centralTestLabel)
    y_pred = best_local_nets[i].predict_classes(centralTestData) 
    y_test = centralTestLabel
    
    _weightVal_f1 = f1_score(y_test, y_pred,average='weighted' )
    _microVal_f1 = f1_score(y_test, y_pred,average='micro' )
    _macroVal_f1 = f1_score(y_test, y_pred,average='macro' )
    indiAccTest.append(results[1])
    indiWeightedTest.append(_weightVal_f1)
    indiMicroTest.append(_microVal_f1)
    indiMacroTest.append(_macroVal_f1)

modelStatistics = {
"Results on individual client models on ALL testsets" : '',
"Client Best Model Round:": bestModelRound,
"Client Accuracy:" : roundNumber(np.mean(indiAccTest)),
"Client weighted f1:" : roundNumber(np.mean(indiWeightedTest)),
"Client micro f1:": roundNumber(np.mean(indiMicroTest)),
"Client macro f1:": roundNumber(np.mean(indiMacroTest)),
}    
with open(filepath +'AllClientsMeasure.csv','w') as f:
    w = csv.writer(f)
    w.writerows(modelStatistics.items())

In [None]:
# Generating Global accuracy
if(algorithm != "FEDPER"):
    results = bestServerModel.evaluate(centralTestData, centralTestLabel)
    y_pred = bestServerModel.predict_classes(centralTestData)
    y_test = centralTestLabel
    weightVal_f1 = f1_score(y_test, y_pred,average='weighted' )
    microVal_f1 = f1_score(y_test, y_pred,average='micro')
    macroVal_f1 = f1_score(y_test, y_pred,average='macro')
    
    bestServerModel.save(filepath+'models/serverModel.h5')
    modelStatistics = {
    "Results on server model on ALL testsets" : '',
    "Server Best Model Round": serverbestModelRound,
    "Server Accuracy:" : roundNumber(serverCurrentAccuracy),
    "Server weighted f1:" : roundNumber(weightVal_f1),
    "Server micro f1:": roundNumber(microVal_f1),
    "Server macro f1:": roundNumber(macroVal_f1),
    }    
    with open(filepath +'ServerMeasure.csv','w') as f:
        w = csv.writer(f)
        w.writerows(modelStatistics.items())

In [None]:
# Saves the training time per round
modelStatistics = {
    "Training Time:": endTime,    
}
with open(filepath +'traingTime.csv','w') as f:
    w = csv.writer(f)
    w.writerows(modelStatistics.items())

In [None]:
print("Training finished")