In [42]:
from PIL import Image
import numpy as np
import os
import cv2
import keras
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
import pandas as pd
import sys
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.express as px
import tensorflow as tf

In [43]:
def readData(filepath, label):
    cells = []
    labels = []
    file = os.listdir(filepath)
    for img in file:
        try:
            image = cv2.imread(filepath + img)
            image_from_array = Image.fromarray(image, 'RGB')
            size_image = image_from_array.resize((50, 50))
            cells.append(np.array(size_image))
            labels.append(label)
        except AttributeError as e:
            print('Skipping file: ', img, e)
    print(len(cells), ' Data Points Read!')
    return np.array(cells), np.array(labels)

In [44]:
def genesis_train(file):
    
    print('Reading Training Data')
    
    ParasitizedCells, ParasitizedLabels = readData(file + '/Parasitized/', 1)
    UninfectedCells, UninfectedLabels  = readData(file + '/Uninfected/', 0)
    Cells = np.concatenate((ParasitizedCells, UninfectedCells))
    Labels = np.concatenate((ParasitizedLabels, UninfectedLabels))
    
    print('Reading Testing Data')
    
    TestParasitizedCells, TestParasitizedLabels = readData('./input/fed/test/Parasitized/', 1)
    TestUninfectedCells, TestUninfectedLabels  = readData('./input/fed/test/Uninfected/', 0)
    TestCells = np.concatenate((TestParasitizedCells, TestUninfectedCells))
    TestLabels = np.concatenate((TestParasitizedLabels, TestUninfectedLabels))
    
    s = np.arange(Cells.shape[0])
    np.random.shuffle(s)
    Cells = Cells[s]
    Labels = Labels[s]
    
    sTest = np.arange(TestCells.shape[0])
    np.random.shuffle(sTest)
    TestCells = TestCells[sTest]
    TestLabels = TestLabels[sTest]
    
    num_classes=len(np.unique(Labels))
    len_data=len(Cells)
    print(len_data, ' Data Points')
    
    (x_train,x_test)=Cells, TestCells
    (y_train,y_test)=Labels, TestLabels
    
    # Since we're working on image data, we normalize data by divinding 255.
    x_train = x_train.astype('float32')/255 
    x_test = x_test.astype('float32')/255
    train_len=len(x_train)
    test_len=len(x_test)
    
    #Doing One hot encoding as classifier has multiple classes
    y_train=keras.utils.to_categorical(y_train,num_classes)
    y_test=keras.utils.to_categorical(y_test,num_classes)
    
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
#     model.summary()

    # compile the model with loss as categorical_crossentropy and using adam optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    #Fit the model with min batch size as 50[can tune batch size to some factor of 2^power ] 
    model.fit(x_train, y_train, batch_size=100, epochs=5, verbose=1)
    
    scores = model.evaluate(x_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./weights/global1.h5")
    return len_data, scores[1]

In [45]:
def local_train(file, d, globalId):
    
    print('Reading Training Data')
    
    ParasitizedCells, ParasitizedLabels = readData(file + '/Parasitized/', 1)
    UninfectedCells, UninfectedLabels  = readData(file + '/Uninfected/', 0)
    Cells = np.concatenate((ParasitizedCells, UninfectedCells))
    Labels = np.concatenate((ParasitizedLabels, UninfectedLabels))
    
    print('Reading Testing Data')
    
    TestParasitizedCells, TestParasitizedLabels = readData('./input/fed/test/Parasitized/', 1)
    TestUninfectedCells, TestUninfectedLabels  = readData('./input/fed/test/Uninfected/', 0)
    TestCells = np.concatenate((TestParasitizedCells, TestUninfectedCells))
    TestLabels = np.concatenate((TestParasitizedLabels, TestUninfectedLabels))
    
    s = np.arange(Cells.shape[0])
    np.random.shuffle(s)
    Cells = Cells[s]
    Labels = Labels[s]
    
    sTest = np.arange(TestCells.shape[0])
    np.random.shuffle(sTest)
    TestCells = TestCells[sTest]
    TestLabels = TestLabels[sTest]
    
    num_classes=len(np.unique(Labels))
    len_data=len(Cells)
    print(len_data, ' Data Points')
    
    (x_train,x_test)=Cells, TestCells
    (y_train,y_test)=Labels, TestLabels
    
    # Since we're working on image data, we normalize data by divinding 255.
    x_train = x_train.astype('float32')/255 
    x_test = x_test.astype('float32')/255
    train_len=len(x_train)
    test_len=len(x_test)
    
    #Doing One hot encoding as classifier has multiple classes
    y_train=keras.utils.to_categorical(y_train,num_classes)
    y_test=keras.utils.to_categorical(y_test,num_classes)
    
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
    # model.summary()

    model.load_weights("./weights/global"+str(globalId)+".h5")
    
    # compile the model with loss as categorical_crossentropy and using adam optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    #Fit the model with min batch size as 50[can tune batch size to some factor of 2^power ] 
    model.fit(x_train, y_train, batch_size=100, epochs=5, verbose=1)
    
    
    scores = model.evaluate(x_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./weights/" + str(d) + ".h5")
    return len_data, scores[1]

In [46]:
#### FedAvg ####

def getDataLen(trainingDict):
    n = 0
    for w in trainingDict:
        n += trainingDict[w][0]
    print('Total number of data points after this round: ', n)
    return n

def assignWeights(trainingDf, trainingDict):
    n = getDataLen(trainingDict)
    trainingDf['Weightage'] = trainingDf['DataSize'].apply(lambda x: x/n)
    return trainingDf, n
    
def scale(weight, scaler):
    scaledWeights = []
    for i in range(len(weight)):
        scaledWeights.append(scaler * weight[i])
    return scaledWeights

def getScaledWeight(d, scaler):
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
    # model.summary()
    
    fpath = "./weights/"+d+".h5"
    model.load_weights(fpath)
    weight = model.get_weights()
    return scale(weight, scaler)

def avgWeights(scaledWeights):
    avg = list()
    for weight_list_tuple in zip(*scaledWeights):
        layer_mean = tf.math.reduce_sum(weight_list_tuple, axis=0)
        avg.append(layer_mean)
    return avg

def FedAvg(trainingDict):
    trainingDf = pd.DataFrame.from_dict(trainingDict, orient='index', columns=['DataSize', 'Accuracy']) 
    models = []
    for i in trainingDict.keys():
#       if 'global' not in i:
        models.append(i)
    scaledWeights = []
    trainingDf, dataLen = assignWeights(trainingDf, trainingDict)
    for m in models:
        scaledWeights.append(getScaledWeight(m, trainingDf.loc[m]['Weightage']))
    fedAvgWeight = avgWeights(scaledWeights)
    return fedAvgWeight, dataLen

# def FedAvg(trainingDict):
#     trainingDf = pd.DataFrame.from_dict(trainingDict, orient='index', columns=['DataSize', 'Accuracy']) 
#     models = []
#     for i in trainingDict.keys():
#         if 'global' not in i:
#             models.append(i)
#     scaledWeights = []
#     trainingDf, dataLen = assignWeights(trainingDf, trainingDict)
#     for m in models:
#         scaledWeights.append(getScaledWeight(m, trainingDf.loc[m]['Weightage']))
#     fedAvgWeight = avgWeights(scaledWeights)
#     return fedAvgWeight, dataLen

def saveModel(weight, n):
    
    print('Reading Testing Data')
    
    TestParasitizedCells, TestParasitizedLabels = readData('./input/fed/test/Parasitized/', 1)
    TestUninfectedCells, TestUninfectedLabels  = readData('./input/fed/test/Uninfected/', 0)
    TestCells = np.concatenate((TestParasitizedCells, TestUninfectedCells))
    TestLabels = np.concatenate((TestParasitizedLabels, TestUninfectedLabels))
    
    
    sTest = np.arange(TestCells.shape[0])
    np.random.shuffle(sTest)
    TestCells = TestCells[sTest]
    TestLabels = TestLabels[sTest]
    
    num_classes=len(np.unique(TestLabels))
    
    (x_test) = TestCells
    (y_test) = TestLabels
    
    # Since we're working on image data, we normalize data by divinding 255.
    x_test = x_test.astype('float32')/255
    test_len=len(x_test)
    
    #Doing One hot encoding as classifier has multiple classes
    y_test=keras.utils.to_categorical(y_test,num_classes)

    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
    # model.summary()
    
    model.set_weights(weight)

    # compile the model with loss as categorical_crossentropy and using adam optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    scores = model.evaluate(x_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    fpath = "./weights/global"+n+".h5"
    model.save(fpath)
    return scores[1]

In [47]:
globalDict = dict()
trainingDict = dict()
trainingDict['global1'] = genesis_train('./input/fed/genesis')
globalDict['global1'] = trainingDict['global1']

Reading Training Data
686  Data Points Read!
696  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1382  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.6675447225570679
Accuracy:  0.5933369398117065


In [48]:
trainingDict['d1'] = local_train('./input/fed/d1', 'd1', 1)
trainingDict['d2'] = local_train('./input/fed/d2', 'd2', 1)
trainingDict['d3'] = local_train('./input/fed/d3', 'd3', 1)
trainingDict['d4'] = local_train('./input/fed/d4', 'd4', 1)
trainingDict['d5'] = local_train('./input/fed/d5', 'd5', 1)

Reading Training Data
528  Data Points Read!
533  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1061  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.5806125998497009
Accuracy:  0.6949121952056885
Reading Training Data
522  Data Points Read!
528  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1050  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.5845358967781067
Accuracy:  0.6891182065010071
Reading Training Data
692  Data Points Read!
655  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1347  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.5760866403579712
Accuracy:  0.7131993770599365
Reading Training Data
448  Data Points Read!
410  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
858  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.5865

In [49]:
trainingDict

{'global1': (1382, 0.5933369398117065),
 'd1': (1061, 0.6949121952056885),
 'd2': (1050, 0.6891182065010071),
 'd3': (1347, 0.7131993770599365),
 'd4': (858, 0.6959985494613647),
 'd5': (1676, 0.730762243270874)}

In [50]:
NewGlobal, dataLen = FedAvg(trainingDict)
trainingDict = {}
trainingDict['global2'] = (dataLen, saveModel(NewGlobal, '2'))

Total number of data points after this round:  7374
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.5865991711616516
Accuracy:  0.6860402226448059


In [51]:
globalDict['global2'] = trainingDict['global2']
trainingDict['d6'] = local_train('./input/fed/d6', 'd6', 2)
trainingDict['d7'] = local_train('./input/fed/d7', 'd7', 2)
trainingDict['d8'] = local_train('./input/fed/d8', 'd8', 2)
trainingDict['d9'] = local_train('./input/fed/d9', 'd9', 2)
trainingDict['d10'] = local_train('./input/fed/d10', 'd10', 2)

Reading Training Data
599  Data Points Read!
567  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1166  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.5192921161651611
Accuracy:  0.7512221336364746
Reading Training Data
418  Data Points Read!
395  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
813  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.5596891045570374
Accuracy:  0.7164584398269653
Reading Training Data
716  Data Points Read!
729  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1445  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.517432689666748
Accuracy:  0.7325728535652161
Reading Training Data
530  Data Points Read!
572  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1102  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.57054

In [52]:
NewGlobal, dataLen = FedAvg(trainingDict)
trainingDict = {}
trainingDict['global3'] = (dataLen, saveModel(NewGlobal, '3'))

Total number of data points after this round:  13296
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.5690057873725891
Accuracy:  0.6912909746170044


In [53]:
globalDict['global3'] = trainingDict['global3']
trainingDict['d11'] = local_train('./input/fed/d11', 'd11', 3)
trainingDict['d12'] = local_train('./input/fed/d12', 'd12', 3)
trainingDict['d13'] = local_train('./input/fed/d13', 'd13', 3)
trainingDict['d14'] = local_train('./input/fed/d14', 'd14', 3)
trainingDict['d15'] = local_train('./input/fed/d15', 'd15', 3)

Reading Training Data
557  Data Points Read!
577  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1134  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.5010061860084534
Accuracy:  0.763715386390686
Reading Training Data
827  Data Points Read!
796  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1623  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.4237591326236725
Accuracy:  0.7879775762557983
Reading Training Data
395  Data Points Read!
425  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
820  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.5612853765487671
Accuracy:  0.7023357152938843
Reading Training Data
513  Data Points Read!
528  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
1041  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.49740

In [54]:
NewGlobal, dataLen = FedAvg(trainingDict)
trainingDict = {}
trainingDict['global4'] = (dataLen, saveModel(NewGlobal, '4'))

Total number of data points after this round:  18479
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.548358142375946
Accuracy:  0.7090349197387695


In [55]:
globalDict['global4'] = trainingDict['global4']
trainingDict['d16'] = local_train('./input/fed/d16', 'd16', 4)
trainingDict['d17'] = local_train('./input/fed/d17', 'd17', 4)
trainingDict['d18'] = local_train('./input/fed/d18', 'd18', 4)
trainingDict['d19'] = local_train('./input/fed/d19', 'd19', 4)
trainingDict['d20'] = local_train('./input/fed/d20', 'd20', 4)

Reading Training Data
412  Data Points Read!
416  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
828  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.5196879506111145
Accuracy:  0.7284084558486938
Reading Training Data
417  Data Points Read!
414  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
831  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.46278688311576843
Accuracy:  0.7868911623954773
Reading Training Data
269  Data Points Read!
252  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
521  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.5006759166717529
Accuracy:  0.7593699097633362
Reading Training Data
407  Data Points Read!
407  Data Points Read!
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
814  Data Points
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss:  0.496591

In [57]:
NewGlobal, dataLen = FedAvg(trainingDict)
trainingDict = {}
trainingDict['global5'] = (dataLen, saveModel(NewGlobal, '5'))
globalDict['global5'] = trainingDict['global5']

Total number of data points after this round:  22035
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.4008854031562805
Accuracy:  0.8142313957214355


In [58]:
globalDict

{'global1': (1382, 0.5933369398117065),
 'global2': (7374, 0.6860402226448059),
 'global3': (13296, 0.6912909746170044),
 'global4': (18479, 0.7090349197387695),
 'global5': (22035, 0.8142313957214355)}