In [206]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.metrics import confusion_matrix
import sys
import matplotlib.pyplot as plt
import tensorflow as tf
import plotly.express as px
%matplotlib inline

In [207]:
def genesis_train(file):
    data = pd.read_csv(file)
    del data['Unnamed: 32']
    print('Number of datapoints in Training dataset: ',len(data))
    X_train = data.iloc[:, 2:].values
    y_train = data.iloc[:, 1].values
    
    test = pd.read_csv('./data/test.csv')
    del test['Unnamed: 32']
    print('Number of datapoints in Testing dataset: ',len(test))
    X_test = test.iloc[:, 2:].values
    y_test = test.iloc[:, 1].values

    labelencoder = LabelEncoder()
    y_train = labelencoder.fit_transform(y_train)
    y_test = labelencoder.fit_transform(y_test)

    model = Sequential()

    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, batch_size=100, epochs=5)

    scores = model.evaluate(X_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./weights/global1.h5")
    return len(data), scores[1]

In [208]:
def local_train(name,file, globalId):
    data = pd.read_csv(file)
    del data['Unnamed: 32']
    X_train = data.iloc[:, 2:].values
    y_train = data.iloc[:, 1].values
    
    test = pd.read_csv('./data/test.csv')
    del test['Unnamed: 32']
    print('Number of datapoints in Testing dataset: ',len(test))
    X_test = test.iloc[:, 2:].values
    y_test = test.iloc[:, 1].values

    labelencoder = LabelEncoder()
    y_train = labelencoder.fit_transform(y_train)
    y_test = labelencoder.fit_transform(y_test)

    model = Sequential()

    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))

    model.load_weights("./weights/global"+str(globalId)+".h5")

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, batch_size=100, epochs=5)

    scores = model.evaluate(X_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./weights/" + str(name) + ".h5")
    return len(data), float(scores[1])

In [209]:
#### FedAvg ####

def getDataLen(trainingDf):
    n = 0
    for w in trainingDf.iloc():
        n += w.DataSize
    print('Total number of data points after this round: ', n)
    return n

def assignWeights(trainingDf):
    n = getDataLen(trainingDf)
    trainingDf['Weightage'] = trainingDf['DataSize'].apply(lambda x: x/n)
    return trainingDf, n
    
def scale(weight, scaler):
    scaledWeights = []
    for i in range(len(weight)):
        scaledWeights.append(scaler * weight[i])
    return scaledWeights

def getScaledWeight(m, scaler):
    model = Sequential()
    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))
    fpath = "./weights/"+m+".h5"
    model.load_weights(fpath)
    weight = model.get_weights()
    return scale(weight, scaler)

def avgWeights(scaledWeights):
    avg = list()
    for weight_list_tuple in zip(*scaledWeights):
        layer_mean = tf.math.reduce_sum(weight_list_tuple, axis=0)
        avg.append(layer_mean)
    return avg

def FedAvg(trainingDict):
    trainingDf = pd.DataFrame.from_dict(trainingDict, orient='index', columns=['DataSize', 'Accuracy']) 
    models = []
    for i in trainingDict.keys():
        if 'global' not in i:
            models.append(i)
    scaledWeights = []
    trainingDf, dataLen = assignWeights(trainingDf)
    for m in models:
        scaledWeights.append(getScaledWeight(m, trainingDf.loc[m]['Weightage']))
    fedAvgWeight = avgWeights(scaledWeights)
    return fedAvgWeight, dataLen

def saveModel(weights, n):
    
    test = pd.read_csv('./data/test.csv')
    del test['Unnamed: 32']
    print('Number of datapoints in Testing dataset: ',len(test))
    X_test = test.iloc[:, 2:].values
    y_test = test.iloc[:, 1].values

    labelencoder = LabelEncoder()
    y_test = labelencoder.fit_transform(y_test)

    model = Sequential()

    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))
    
    model.set_weights(weights)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    scores = model.evaluate(X_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    fpath = "./weights/global"+n+".h5"
    model.save(fpath)
    return float(scores[1])

In [210]:
# Genesis Training

In [211]:
globalDict = dict()
trainingDict = dict()
trainingDict['global1'] = genesis_train('./data/genesis.csv')
globalDict['global1'] = trainingDict['global1']

Number of datapoints in Training dataset:  20
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  9.955900192260742
Accuracy:  0.746666669845581


In [212]:
# Three Local 

In [213]:
trainingDict['A'] = local_train('A','./data/dataA.csv', 1)
trainingDict['B'] = local_train('B','./data/dataB.csv', 1)
trainingDict['C'] = local_train('C','./data/dataC.csv', 1)

Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.6843255758285522
Accuracy:  0.7866666913032532
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.7082008123397827
Accuracy:  0.7866666913032532
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.64097660779953
Accuracy:  0.8266666531562805


In [214]:
trainingDict

{'global1': (20, 0.746666669845581),
 'A': (49, 0.7866666913032532),
 'B': (46, 0.7866666913032532),
 'C': (44, 0.8266666531562805)}

In [215]:
NewGlobal, dataLen = FedAvg(trainingDict)
# print(NewGlobal)

Total number of data points after this round:  159.0


In [216]:
trainingDict = {}
trainingDict['global2'] = (dataLen, saveModel(NewGlobal, '2'))
globalDict['global2'] = trainingDict['global2']
trainingDict['D'] = local_train('D','./data/dataD.csv', 2)
trainingDict['E'] = local_train('E','./data/dataE.csv', 2)
trainingDict['F'] = local_train('F','./data/dataF.csv', 2)
NewGlobal, dataLen = FedAvg(trainingDict)

Number of datapoints in Testing dataset:  75
Loss:  0.5066019892692566
Accuracy:  0.7866666913032532
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  3.0815844535827637
Accuracy:  0.3866666555404663
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  1.4230366945266724
Accuracy:  0.6800000071525574
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  1.3793429136276245
Accuracy:  0.6133333444595337
Total number of data points after this round:  264.0


In [217]:
trainingDict = {}
trainingDict['global3'] = (dataLen, saveModel(NewGlobal, '3'))
globalDict['global3'] = trainingDict['global3']
trainingDict['G'] = local_train('G','./data/dataG.csv', 3)
trainingDict['H'] = local_train('H','./data/dataH.csv', 3)
trainingDict['I'] = local_train('I','./data/dataI.csv', 3)
NewGlobal, dataLen = FedAvg(trainingDict)

Number of datapoints in Testing dataset:  75
Loss:  0.6305674314498901
Accuracy:  0.5333333611488342
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.7048617601394653
Accuracy:  0.36000001430511475
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.6833865642547607
Accuracy:  0.746666669845581
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.4546230137348175
Accuracy:  0.8799999952316284
Total number of data points after this round:  408.0


In [218]:
trainingDict = {}
trainingDict['global4'] = (dataLen, saveModel(NewGlobal, '4'))
globalDict['global4'] = trainingDict['global4']
trainingDict['J'] = local_train('J','./data/dataG.csv', 4)
trainingDict['K'] = local_train('K','./data/dataH.csv', 4)
NewGlobal, dataLen = FedAvg(trainingDict)

Number of datapoints in Testing dataset:  75
Loss:  0.6801868677139282
Accuracy:  0.8799999952316284
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.7060788869857788
Accuracy:  0.25333333015441895
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.6848666667938232
Accuracy:  0.746666669845581
Total number of data points after this round:  505.0


In [219]:
trainingDict = {}
trainingDict['global5'] = (dataLen, saveModel(NewGlobal, '5'))
globalDict['global5'] = trainingDict['global5']

Number of datapoints in Testing dataset:  75
Loss:  0.6928359270095825
Accuracy:  0.9066666960716248


In [220]:
globalDict

{'global1': (20, 0.746666669845581),
 'global2': (159.0, 0.7866666913032532),
 'global3': (264.0, 0.5333333611488342),
 'global4': (408.0, 0.8799999952316284),
 'global5': (505.0, 0.9066666960716248)}