In [174]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.metrics import confusion_matrix
import sys
import matplotlib.pyplot as plt
import tensorflow as tf
import plotly.express as px
%matplotlib inline

In [228]:
def genesis_train(file):
    data = pd.read_csv(file)
    del data['Unnamed: 32']
    print('Number of datapoints in Training dataset: ',len(data))
    X_train = data.iloc[:, 2:].values
    y_train = data.iloc[:, 1].values
    
    test = pd.read_csv('./data/test.csv')
    del test['Unnamed: 32']
    print('Number of datapoints in Testing dataset: ',len(test))
    X_test = test.iloc[:, 2:].values
    y_test = test.iloc[:, 1].values

    labelencoder = LabelEncoder()
    y_train = labelencoder.fit_transform(y_train)
    y_test = labelencoder.fit_transform(y_test)

    model = Sequential()

    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, batch_size=100, epochs=5)

    scores = model.evaluate(X_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./weights/global1.h5")
    return len(data), scores[0], scores[1]

In [229]:
def local_train(name,file, globalId):
    data = pd.read_csv(file)
    del data['Unnamed: 32']
    print('Number of datapoints in Testing dataset: ',len(data))
    X_train = data.iloc[:, 2:].values
    y_train = data.iloc[:, 1].values
    
    test = pd.read_csv('./data/test.csv')
    del test['Unnamed: 32']
    print('Number of datapoints in Testing dataset: ',len(test))
    X_test = test.iloc[:, 2:].values
    y_test = test.iloc[:, 1].values

    labelencoder = LabelEncoder()
    y_train = labelencoder.fit_transform(y_train)
    y_test = labelencoder.fit_transform(y_test)

    model = Sequential()

    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))

    model.load_weights("./weights/global"+str(globalId)+".h5")

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, batch_size=100, epochs=5)

    scores = model.evaluate(X_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./weights/" + str(name) + ".h5")
    return len(data), scores[0], float(scores[1])

In [230]:
def getDataLen(trainingDf):
    n = 0
    for w in trainingDf.iloc():
        n += w.DataSize
    print('Total number of data points after this round: ', n)
    return n

def assignWeights(trainingDf):
    n = getDataLen(trainingDf)
    trainingDf['Weightage'] = trainingDf['DataSize'].apply(lambda x: x/n)
    return trainingDf, n
    
def scale(weight, scaler):
    scaledWeights = []
    for i in range(len(weight)):
        scaledWeights.append(scaler * weight[i])
    return scaledWeights

def getScaledWeight(m, scaler):
    model = Sequential()
    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))
    fpath = "./weights/"+m+".h5"
    model.load_weights(fpath)
    weight = model.get_weights()
    return scale(weight, scaler)

def avgWeights(scaledWeights):
    avg = list()
    for weight_list_tuple in zip(*scaledWeights):
        layer_mean = tf.math.reduce_sum(weight_list_tuple, axis=0)
        avg.append(layer_mean)
    return avg

def getWeights(m):
    model = Sequential()
    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))
    fpath = "./weights/"+m+".h5"
    model.load_weights(fpath)
    weight = model.get_weights()
    return weight

def avgWeights(weights):
    avg = list()
    for weight_list_tuple in zip(*weights):
        layer_mean = tf.math.reduce_sum(weight_list_tuple, axis=0)
        avg.append(layer_mean)
    return avg

def FedAvg(trainingDict):
    trainingDf = pd.DataFrame.from_dict(trainingDict, orient='index', columns=['DataSize','Loss', 'Accuracy']) 
    models = []
    for i in trainingDict.keys():
        if 'global' not in i:
            models.append(i)
    scaledWeights = []
    trainingDf, dataLen = assignWeights(trainingDf)
    for m in models:
        scaledWeights.append(getScaledWeight(m, trainingDf.loc[m]['Weightage']))
    fedAvgWeight = avgWeights(scaledWeights)
    return fedAvgWeight, dataLen

def ZenoScore(local, g_loss, p):
    scored = {}
    for l in local.keys():
        score = g_loss - local[l][1] - (local[l][0]*p)
        scored[l] = score
    return scored
        

def zeno(trainingDict, p, b):
    
    models = list(trainingDict.keys())
    
    trainingDf = pd.DataFrame.from_dict(trainingDict, orient='index', columns=['DataSize', 'Loss', 'Accuracy']) 
    
    g_loss = 0    
    local = {}
    
    for i in trainingDict.keys():
        if 'global' in i:
            g_loss = trainingDict[i][1]
        else:
            local[i] = (trainingDict[i])
            
    
    scores = ZenoScore(local, g_loss, p)
    b = int(len(scores)*b)
    sortedScores = {k: v for k, v in sorted(scores.items(), key=lambda item: item[1])}
    
    zenoedModels = []
    
    for i in range(b):
        zenoedModels.append((sortedScores.popitem())[0])
        
    newDict = {}
    for i in trainingDict.keys():
        if i not in zenoedModels:
            newDict[i] = trainingDict[i]
            
    print('Zeno Selections: ', zenoedModels)
        
    NewGlobal, dataLen = FedAvg(trainingDict)
    
    return NewGlobal, dataLen

def saveModel(weights, n):
    
    test = pd.read_csv('./data/test.csv')
    del test['Unnamed: 32']
    print('Number of datapoints in Testing dataset: ',len(test))
    X_test = test.iloc[:, 2:].values
    y_test = test.iloc[:, 1].values

    labelencoder = LabelEncoder()
    y_test = labelencoder.fit_transform(y_test)

    model = Sequential()

    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))
    
    model.set_weights(weights)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    scores = model.evaluate(X_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    fpath = "./weights/global"+n+".h5"
    model.save(fpath)
    return (scores[0], scores[1])

In [231]:
#### Zeno ####
globalDict = dict()
trainingDict = dict()
trainingDict['global1'] = genesis_train('./data/genesis.csv')
globalDict['global1'] = trainingDict['global1']
trainingDict['A'] = local_train('A','./data/dataA.csv', 1)
trainingDict['B'] = local_train('B','./data/dataB.csv', 1)
trainingDict['C'] = local_train('C','./data/dataC.csv', 1)
trainingDict['D'] = local_train('B','./data/dataD.csv', 1)
trainingDict['E'] = local_train('C','./data/dataE.csv', 1)

Number of datapoints in Training dataset:  20
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  66.0795669555664
Accuracy:  0.25333333015441895
Number of datapoints in Testing dataset:  49
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  48.39285659790039
Accuracy:  0.25333333015441895
Number of datapoints in Testing dataset:  46
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  47.581783294677734
Accuracy:  0.25333333015441895
Number of datapoints in Testing dataset:  44
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  47.828617095947266
Accuracy:  0.25333333015441895
Number of datapoints in Testing dataset:  41
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  47.59942626953125
Accuracy:  0.25333333015441895
Number of datapoin

Loss:  47.89811706542969
Accuracy:  0.25333333015441895


In [232]:
trainingDict

{'global1': (20, 66.0795669555664, 0.25333333015441895),
 'A': (49, 48.39285659790039, 0.25333333015441895),
 'B': (46, 47.581783294677734, 0.25333333015441895),
 'C': (44, 47.828617095947266, 0.25333333015441895),
 'D': (41, 47.59942626953125, 0.25333333015441895),
 'E': (18, 47.89811706542969, 0.25333333015441895)}

In [233]:
NewGlobal, dataLen = zeno(trainingDict, 0.0005, 0.75)

Zeno Selections:  ['B', 'D', 'C']
Total number of data points after this round:  218.0


In [234]:
trainingDict = {}
newModelLoss, newModelAccuracy = saveModel(NewGlobal, '2')
trainingDict['global2'] = (dataLen, newModelLoss, newModelAccuracy)
globalDict['global2'] = trainingDict['global2']
trainingDict['F'] = local_train('F','./data/dataF.csv', 2)
trainingDict['G'] = local_train('G','./data/dataG.csv', 2)
trainingDict['H'] = local_train('H','./data/dataH.csv', 2)
trainingDict['I'] = local_train('I','./data/dataI.csv', 2)
trainingDict['J'] = local_train('K','./data/dataJ.csv', 2)

Number of datapoints in Testing dataset:  75
Loss:  8.928487777709961
Accuracy:  0.746666669845581
Number of datapoints in Testing dataset:  46
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.584122896194458
Accuracy:  0.9066666960716248
Number of datapoints in Testing dataset:  52
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.5824934840202332
Accuracy:  0.9066666960716248
Number of datapoints in Testing dataset:  45
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.6103257536888123
Accuracy:  0.9066666960716248
Number of datapoints in Testing dataset:  47
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.5847291946411133
Accuracy:  0.9066666960716248
Number of datapoints in Testing dataset:  42
Number of datapoints in Testing dataset:  75
Epoch 1/5
Epoch 2/5
Epoch 3/

Loss:  0.6300356388092041
Accuracy:  0.9200000166893005


In [235]:
NewGlobal, dataLen = zeno(trainingDict, 0.0005, 0.75)

Zeno Selections:  ['F', 'I', 'G']
Total number of data points after this round:  450.0


In [236]:
trainingDict = {}
newModelLoss, newModelAccuracy = saveModel(NewGlobal, '3')
trainingDict['global3'] = (dataLen, newModelLoss, newModelAccuracy)
globalDict['global3'] = trainingDict['global3']

Number of datapoints in Testing dataset:  75
Loss:  0.4605107009410858
Accuracy:  0.8266666531562805
