In [44]:
from PIL import Image
import numpy as np
import os
import cv2
import keras
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
import pandas as pd
import sys
import tensorflow as tf
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.express as px

In [45]:
def readData(filepath, label):
    cells = []
    labels = []
    file = os.listdir(filepath)
    for img in file:
        try:
            image = cv2.imread(filepath + img)
            image_from_array = Image.fromarray(image, 'RGB')
            size_image = image_from_array.resize((50, 50))
            cells.append(np.array(size_image))
            labels.append(label)
        except AttributeError as e:
            print('Skipping file: ', img, e)
    print(len(cells), ' Data Points Read!')
    return np.array(cells), np.array(labels)

In [46]:
def genesis_train(file):
    
    print('Reading Training Data')
    
    ParasitizedCells, ParasitizedLabels = readData(file + '/Parasitized/', 1)
    UninfectedCells, UninfectedLabels  = readData(file + '/Uninfected/', 0)
    Cells = np.concatenate((ParasitizedCells, UninfectedCells))
    Labels = np.concatenate((ParasitizedLabels, UninfectedLabels))
    
    print('Reading Testing Data')
    
    TestParasitizedCells, TestParasitizedLabels = readData('./input/fed/test/Parasitized/', 1)
    TestUninfectedCells, TestUninfectedLabels  = readData('./input/fed/test/Uninfected/', 0)
    TestCells = np.concatenate((TestParasitizedCells, TestUninfectedCells))
    TestLabels = np.concatenate((TestParasitizedLabels, TestUninfectedLabels))
    
    s = np.arange(Cells.shape[0])
    np.random.shuffle(s)
    Cells = Cells[s]
    Labels = Labels[s]
    
    sTest = np.arange(TestCells.shape[0])
    np.random.shuffle(sTest)
    TestCells = TestCells[sTest]
    TestLabels = TestLabels[sTest]
    
    num_classes=len(np.unique(Labels))
    len_data=len(Cells)
    print(len_data, ' Data Points')
    
    (x_train,x_test)=Cells, TestCells
    (y_train,y_test)=Labels, TestLabels
    
    # Since we're working on image data, we normalize data by divinding 255.
    x_train = x_train.astype('float32')/255 
    x_test = x_test.astype('float32')/255
    train_len=len(x_train)
    test_len=len(x_test)
    
    #Doing One hot encoding as classifier has multiple classes
    y_train=keras.utils.to_categorical(y_train,num_classes)
    y_test=keras.utils.to_categorical(y_test,num_classes)
    
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
#     model.summary()

    # compile the model with loss as categorical_crossentropy and using adam optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    #Fit the model with min batch size as 50[can tune batch size to some factor of 2^power ] 
    model.fit(x_train, y_train, batch_size=100, epochs=5, verbose=1)
    
    scores = model.evaluate(x_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./output.h5")
    return len_data, scores[1]

In [47]:
def update_train(file, d):
    
    print('Reading Training Data')
    
    ParasitizedCells, ParasitizedLabels = readData(file + '/Parasitized/', 1)
    UninfectedCells, UninfectedLabels  = readData(file + '/Uninfected/', 0)
    Cells = np.concatenate((ParasitizedCells, UninfectedCells))
    Labels = np.concatenate((ParasitizedLabels, UninfectedLabels))
    
    print('Reading Testing Data')
    
    TestParasitizedCells, TestParasitizedLabels = readData('./input/fed/test/Parasitized/', 1)
    TestUninfectedCells, TestUninfectedLabels  = readData('./input/fed/test/Uninfected/', 0)
    TestCells = np.concatenate((TestParasitizedCells, TestUninfectedCells))
    TestLabels = np.concatenate((TestParasitizedLabels, TestUninfectedLabels))
    
    s = np.arange(Cells.shape[0])
    np.random.shuffle(s)
    Cells = Cells[s]
    Labels = Labels[s]
    
    sTest = np.arange(TestCells.shape[0])
    np.random.shuffle(sTest)
    TestCells = TestCells[sTest]
    TestLabels = TestLabels[sTest]
    
    num_classes=len(np.unique(Labels))
    len_data=len(Cells)
    print(len_data, ' Data Points')
    
    (x_train,x_test)=Cells, TestCells
    (y_train,y_test)=Labels, TestLabels
    
    # Since we're working on image data, we normalize data by divinding 255.
    x_train = x_train.astype('float32')/255 
    x_test = x_test.astype('float32')/255
    train_len=len(x_train)
    test_len=len(x_test)
    
    #Doing One hot encoding as classifier has multiple classes
    y_train=keras.utils.to_categorical(y_train,num_classes)
    y_test=keras.utils.to_categorical(y_test,num_classes)
    
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
    # model.summary()

    model.load_weights("./output.h5")
    
    # compile the model with loss as categorical_crossentropy and using adam optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    #Fit the model with min batch size as 50[can tune batch size to some factor of 2^power ] 
    model.fit(x_train, y_train, batch_size=100, epochs=5, verbose=1)
    
    
    scores = model.evaluate(x_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./weights/" + str(d) + ".h5")
    return len_data, scores[1]

In [48]:
FLAccuracy = {}
# FLAccuracy['Complete Dataset'] = genesis_train('./input/cell_images')
FLAccuracy['Genesis'] = genesis_train('./input/fed/genesis')
FLAccuracy['d1'] = update_train('./input/fed/d1', 'd1')
FLAccuracy['d2'] = update_train('./input/fed/d2', 'd2')
FLAccuracy['d3'] = update_train('./input/fed/d3', 'd3')
FLAccuracy['d4'] = update_train('./input/fed/d4', 'd4')
FLAccuracy['d5'] = update_train('./input/fed/d5', 'd5')
FLAccuracy['d6'] = update_train('./input/fed/d6', 'd6')
FLAccuracy['d7'] = update_train('./input/fed/d7', 'd7')
FLAccuracy['d8'] = update_train('./input/fed/d8', 'd8')
FLAccuracy['d9'] = update_train('./input/fed/d9', 'd9')
FLAccuracy['d10'] = update_train('./input/fed/d10', 'd10')
FLAccuracy['d11'] = update_train('./input/fed/d11', 'd11')
FLAccuracy['d12'] = update_train('./input/fed/d12', 'd12')
FLAccuracy['d13'] = update_train('./input/fed/d13', 'd13')
FLAccuracy['d14'] = update_train('./input/fed/d14', 'd14')
FLAccuracy['d15'] = update_train('./input/fed/d15', 'd15')
FLAccuracy['d16'] = update_train('./input/fed/d16', 'd16')
FLAccuracy['d17'] = update_train('./input/fed/d17', 'd17')
FLAccuracy['d18'] = update_train('./input/fed/d18', 'd18')
FLAccuracy['d19'] = update_train('./input/fed/d19', 'd19')
FLAccuracy['d20'] = update_train('./input/fed/d20', 'd20')

Reading Training Data
686  Data Points Read!
696  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1382  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.47488918900489807
Accuracy:  0.7899692058563232
Reading Training Data
528  Data Points Read!
533  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1061  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.3024612367153168
Accuracy:  0.8748868107795715
Reading Training Data
522  Data Points Read!
528  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1050  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.31824126839637756
Accuracy:  0.8803186416625977
Reading Training Data
692  Data Points Read!
655  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1347  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.2

Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.26874953508377075
Accuracy:  0.8852072954177856
Reading Training Data
557  Data Points Read!
577  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1134  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.3197242021560669
Accuracy:  0.8569617867469788
Reading Training Data
827  Data Points Read!
796  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1623  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.2423737496137619
Accuracy:  0.8998732566833496
Reading Training Data
395  Data Points Read!
425  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
820  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.3773098886013031
Accuracy:  0.8334238529205322
Reading Training Data
513  Data Points Read!
528  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points 

Loss:  0.42869099974632263
Accuracy:  0.816947340965271


In [49]:
FLAccuracy

{'Genesis': (1382, 0.7899692058563232),
 'd1': (1061, 0.8748868107795715),
 'd2': (1050, 0.8803186416625977),
 'd3': (1347, 0.8839398622512817),
 'd4': (858, 0.8479087352752686),
 'd5': (1676, 0.901502788066864),
 'd6': (1166, 0.8725330233573914),
 'd7': (813, 0.8647474050521851),
 'd8': (1445, 0.8971573710441589),
 'd9': (1102, 0.8913633823394775),
 'd10': (1396, 0.8852072954177856),
 'd11': (1134, 0.8569617867469788),
 'd12': (1623, 0.8998732566833496),
 'd13': (820, 0.8334238529205322),
 'd14': (1041, 0.843925416469574),
 'd15': (565, 0.8428390622138977),
 'd16': (828, 0.8680065274238586),
 'd17': (831, 0.8768784999847412),
 'd18': (521, 0.8326995968818665),
 'd19': (814, 0.8772406578063965),
 'd20': (562, 0.816947340965271)}

In [50]:
FLAccuracyDF = pd.DataFrame.from_dict(FLAccuracy, orient='index', columns=['DataSize', 'Accuracy'])
FLAccuracyDF

Unnamed: 0,DataSize,Accuracy
Genesis,1382,0.789969
d1,1061,0.874887
d2,1050,0.880319
d3,1347,0.88394
d4,858,0.847909
d5,1676,0.901503
d6,1166,0.872533
d7,813,0.864747
d8,1445,0.897157
d9,1102,0.891363


In [51]:
FLAccuracyDF.index

Index(['Genesis', 'd1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8', 'd9', 'd10',
       'd11', 'd12', 'd13', 'd14', 'd15', 'd16', 'd17', 'd18', 'd19', 'd20'],
      dtype='object')

In [52]:
n = 0
for w in FLAccuracy:
    if 'Complete' in w:
        continue
    n += FLAccuracy[w][0]
print('Total number of data points in this round: ', n)

Total number of data points in this round:  22035


In [53]:
FLAccuracyDF['Weightage'] = FLAccuracyDF['DataSize'].apply(lambda x: x/n)

In [54]:
FLAccuracyDF

Unnamed: 0,DataSize,Accuracy,Weightage
Genesis,1382,0.789969,0.062718
d1,1061,0.874887,0.048151
d2,1050,0.880319,0.047651
d3,1347,0.88394,0.06113
d4,858,0.847909,0.038938
d5,1676,0.901503,0.076061
d6,1166,0.872533,0.052916
d7,813,0.864747,0.036896
d8,1445,0.897157,0.065577
d9,1102,0.891363,0.050011


In [55]:
def scale(weight, scaler):
    scaledWeights = []
    for i in range(len(weight)):
        scaledWeights.append(scaler * weight[i])
    return scaledWeights

def getScaledWeight(d, scaler):
    
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
    
    fpath = "./weights/"+d+".h5"
    model.load_weights(fpath)
    weight = model.get_weights()
    scaledWeight = scale(weight, scaler)

    return scaledWeight

In [56]:
def avgWeights(scaledWeights):
    avg = list()
    for weight_list_tuple in zip(*scaledWeights):
        layer_mean = tf.math.reduce_sum(weight_list_tuple, axis=0)
        avg.append(layer_mean)
    return avg

def FedAvg(models):
    
    scaledWeights = []
    for m in models:
        scaledWeights.append(getScaledWeight(m, FLAccuracyDF.loc[m]['Weightage']))
    avgWeight = avgWeights(scaledWeights)
    return avgWeight

In [57]:
models = ['d1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8', 'd9', 'd10', 'd11', 'd12', 'd13', 'd14', 'd15', 'd16', 'd17', 'd18', 'd19', 'd20']
avgWeight = FedAvg(models)
print(avgWeight)

[<tf.Tensor: shape=(2, 2, 3, 16), dtype=float32, numpy=
array([[[[ 6.29709959e-02,  3.05600781e-02,  4.73186262e-02,
          -4.84639332e-02, -1.09765619e-01, -2.40126792e-02,
          -2.00001881e-01,  4.64794785e-03, -9.27647501e-02,
          -1.66123241e-01,  2.24855661e-01, -7.29905367e-02,
           1.65846452e-01, -1.77608833e-01,  8.64765197e-02,
          -2.02627137e-01],
         [ 1.22623229e-02, -2.69816726e-01, -8.74007195e-02,
          -2.52788186e-01, -1.04290135e-01,  1.57856569e-01,
          -2.61925399e-01, -1.27280667e-01,  1.26418844e-01,
          -1.46524444e-01, -2.43654959e-02,  2.42929533e-01,
          -2.60935754e-01,  2.79806722e-02,  1.43053783e-02,
          -1.92507386e-01],
         [-1.32187217e-01, -1.47404790e-01, -3.16053033e-02,
           1.14077598e-01, -1.20645165e-01,  1.56122614e-02,
           2.14044124e-01,  1.66435450e-01, -1.01623848e-01,
          -2.04821333e-01, -9.87461675e-03, -2.57626027e-01,
          -2.19773769e-01, -1.4948

In [58]:
def testNewGlobal(weight):
    
    print('Reading Testing Data')
    
    TestParasitizedCells, TestParasitizedLabels = readData('./input/fed/test/Parasitized/', 1)
    TestUninfectedCells, TestUninfectedLabels  = readData('./input/fed/test/Uninfected/', 0)
    TestCells = np.concatenate((TestParasitizedCells, TestUninfectedCells))
    TestLabels = np.concatenate((TestParasitizedLabels, TestUninfectedLabels))
    
    
    sTest = np.arange(TestCells.shape[0])
    np.random.shuffle(sTest)
    TestCells = TestCells[sTest]
    TestLabels = TestLabels[sTest]
    
    num_classes=len(np.unique(TestLabels))
    
    (x_test) = TestCells
    (y_test) = TestLabels
    
    # Since we're working on image data, we normalize data by divinding 255.
    x_test = x_test.astype('float32')/255
    test_len=len(x_test)
    
    #Doing One hot encoding as classifier has multiple classes
    y_test=keras.utils.to_categorical(y_test,num_classes)
    
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
#     model.summary()

    model.set_weights(weight)

    # compile the model with loss as categorical_crossentropy and using adam optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    scores = model.evaluate(x_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./output.h5")
    return scores[1]

In [59]:
testNewGlobal(avgWeight)

Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.3326050043106079
Accuracy:  0.8701792359352112


0.8701792359352112

In [60]:
FLAccuracyDF

Unnamed: 0,DataSize,Accuracy,Weightage
Genesis,1382,0.789969,0.062718
d1,1061,0.874887,0.048151
d2,1050,0.880319,0.047651
d3,1347,0.88394,0.06113
d4,858,0.847909,0.038938
d5,1676,0.901503,0.076061
d6,1166,0.872533,0.052916
d7,813,0.864747,0.036896
d8,1445,0.897157,0.065577
d9,1102,0.891363,0.050011
