In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.metrics import confusion_matrix
import sys
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import plotly.express as px

In [3]:
def genesis_train(file):
    data = pd.read_csv(file)
    del data['Unnamed: 32']
    print('Number of datapoints in Training dataset: ',len(data))
    X_train = data.iloc[:, 2:].values
    y_train = data.iloc[:, 1].values
    
    test = pd.read_csv('./data/test.csv')
    del test['Unnamed: 32']
    print('Number of datapoints in Testing dataset: ',len(test))
    X_test = test.iloc[:, 2:].values
    y_test = test.iloc[:, 1].values

    labelencoder = LabelEncoder()
    y_train = labelencoder.fit_transform(y_train)
    y_test = labelencoder.fit_transform(y_test)

#     sc = StandardScaler()
#     X_train = sc.fit_transform(X_train)
#     X_test = sc.transform(X_test)

    model = Sequential()

    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, batch_size=100, epochs=5)

    scores = model.evaluate(X_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./weights/output.h5")
    return len(data), scores[1]

In [4]:
def update_train(name,file):
    data = pd.read_csv(file)
    del data['Unnamed: 32']
    X_train = data.iloc[:, 2:].values
    y_train = data.iloc[:, 1].values
    
    test = pd.read_csv('./data/test.csv')
    del test['Unnamed: 32']
    print('Number of datapoints in Testing dataset: ',len(test))
    X_test = test.iloc[:, 2:].values
    y_test = test.iloc[:, 1].values

    labelencoder = LabelEncoder()
    y_train = labelencoder.fit_transform(y_train)
    y_test = labelencoder.fit_transform(y_test)

#     sc = StandardScaler()
#     X_train = sc.fit_transform(X_train)
#     X_test = sc.transform(X_test)

    model = Sequential()

    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))

    model.load_weights("./weights/output.h5")

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, batch_size=100, epochs=5)

    scores = model.evaluate(X_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./weights/" + str(name) + ".h5")
    return len(data), float(scores[1])

In [5]:
FLAccuracy = dict()
FLAccuracy['Genesis'] = genesis_train('./data/genesis.csv')
FLAccuracy['A'] = update_train('A','./data/dataA.csv')
FLAccuracy['B'] = update_train('B','./data/dataB.csv')
FLAccuracy['C'] = update_train('C','./data/dataC.csv')
FLAccuracy['D'] = update_train('D','./data/dataD.csv')
FLAccuracy['E'] = update_train('E','./data/dataE.csv')
FLAccuracy['F'] = update_train('F','./data/dataF.csv')
FLAccuracy['G'] = update_train('G','./data/dataG.csv')
FLAccuracy['H'] = update_train('H','./data/dataH.csv')
FLAccuracy['I'] = update_train('I','./data/dataI.csv')
FLAccuracy['J'] = update_train('J','./data/dataJ.csv')
FLAccuracy['K'] = update_train('K','./data/dataK.csv')

Number of datapoints in Training dataset:  20
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  18.744049072265625
Accuracy:  0.746666669845581
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  12.368022918701172
Accuracy:  0.746666669845581
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  12.4560546875
Accuracy:  0.746666669845581
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  12.383003234863281
Accuracy:  0.746666669845581
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  12.357634544372559
Accuracy:  0.746666669845581
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  12.734356880187988
Accuracy:  0.746666669845581
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoc

Loss:  12.341400146484375
Accuracy:  0.746666669845581
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  12.538736343383789
Accuracy:  0.746666669845581
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  12.556865692138672
Accuracy:  0.746666669845581
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  12.435154914855957
Accuracy:  0.746666669845581
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  12.660170555114746
Accuracy:  0.746666669845581


In [6]:
FLAccuracy

{'Genesis': (20, 0.746666669845581),
 'A': (49, 0.746666669845581),
 'B': (46, 0.746666669845581),
 'C': (44, 0.746666669845581),
 'D': (41, 0.746666669845581),
 'E': (18, 0.746666669845581),
 'F': (46, 0.746666669845581),
 'G': (52, 0.746666669845581),
 'H': (45, 0.746666669845581),
 'I': (47, 0.746666669845581),
 'J': (42, 0.746666669845581),
 'K': (44, 0.746666669845581)}

In [7]:
FLAccuracyDF = pd.DataFrame.from_dict(FLAccuracy, orient='index', columns=['DataSize', 'Accuracy'])
FLAccuracyDF

Unnamed: 0,DataSize,Accuracy
Genesis,20,0.746667
A,49,0.746667
B,46,0.746667
C,44,0.746667
D,41,0.746667
E,18,0.746667
F,46,0.746667
G,52,0.746667
H,45,0.746667
I,47,0.746667


In [8]:
FLAccuracyDF.index

Index(['Genesis', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K'], dtype='object')

In [9]:
n = 0
for w in FLAccuracyDF.iloc():
    n += w.DataSize
print('Total number of data points in this round: ', n)

NotImplementedError: ix is not iterable

In [169]:
FLAccuracyDF['Weightage'] = FLAccuracyDF['DataSize'].apply(lambda x: x/n)

In [170]:
FLAccuracyDF

Unnamed: 0,DataSize,Accuracy,Weightage
Genesis,20,0.24,0.040486
A,49,0.253333,0.09919
B,46,0.253333,0.093117
C,44,0.293333,0.089069
D,41,0.253333,0.082996
E,18,0.253333,0.036437
F,46,0.32,0.093117
G,52,0.253333,0.105263
H,45,0.733333,0.091093
I,47,0.293333,0.095142


In [171]:
def scale(weight, scaler):
    scaledWeights = []
    for i in range(len(weight)):
        scaledWeights.append(scaler * weight[i])
    return scaledWeights

def getScaledWeight(m, scaler):
    
    model = Sequential()
    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))
    
    fpath = "./weights/"+m+".h5"
    model.load_weights(fpath)
    weight = model.get_weights()
    scaledWeight = scale(weight, scaler)

    return scaledWeight

In [172]:
def avgWeights(scaledWeights):
    avg = list()
    for weight_list_tuple in zip(*scaledWeights):
        layer_mean = tf.math.reduce_sum(weight_list_tuple, axis=0)
        avg.append(layer_mean)
    return avg

In [173]:
def FedAvg(models):
    
    scaledWeights = []
    for m in models:
        scaledWeights.append(getScaledWeight(m, FLAccuracyDF.loc[m]['Weightage']))
    
    avgWeight = avgWeights(scaledWeights)
    return avgWeight

In [174]:
models = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K']
avgWeight = FedAvg(models)
print(avgWeight)

[<tf.Tensor: shape=(30, 16), dtype=float32, numpy=
array([[ 0.2057464 , -0.2657927 , -0.25285888, -0.24868095, -0.27177823,
         0.32082295,  0.17813817, -0.2762046 ,  0.15874425, -0.19060329,
         0.04024782,  0.2358241 , -0.06689931, -0.18333879,  0.2137917 ,
         0.27995768],
       [-0.26607943, -0.15886399,  0.00093831, -0.28260505, -0.00071617,
         0.0422643 ,  0.18987222,  0.19548495,  0.06701235, -0.07441676,
        -0.303952  ,  0.01502862, -0.21088326, -0.22428352, -0.23013252,
        -0.25867918],
       [-0.08973237,  0.24541888,  0.00931878,  0.18099858,  0.20778519,
         0.04198923,  0.21883336,  0.33724394,  0.0916836 , -0.10704709,
        -0.15457837,  0.29787248, -0.25978634,  0.1385961 , -0.08112531,
        -0.09184094],
       [ 0.17914766,  0.05103307,  0.12961589,  0.00284287,  0.19499469,
        -0.19313732,  0.09373255,  0.17487596,  0.07619544, -0.15862216,
        -0.31945598, -0.10518299, -0.09112987,  0.1427899 , -0.15514548,
       

In [175]:
def testNewGlobal(weight):

    test = pd.read_csv('./data/test.csv')
    del test['Unnamed: 32']
    print('Number of datapoints in Testing dataset: ',len(test))
    X_test = test.iloc[:, 2:].values
    y_test = test.iloc[:, 1].values

    labelencoder = LabelEncoder()
    y_test = labelencoder.fit_transform(y_test)

#     sc = StandardScaler()
#     X_test = sc.transform(X_test)

    model = Sequential()

    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))
    
    model.set_weights(weight)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    scores = model.evaluate(X_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./weights/output.h5")
    return scores[1]

In [176]:
testNewGlobal(avgWeight)

Number of datapoints in Testing dataset:  75
Loss:  5.1235551834106445
Accuracy:  0.25333333015441895


0.25333333015441895