In [1]:
# Reference: https://www.kaggle.com/sharp1/malaria-cells-classification-through-keras
from PIL import Image
import numpy as np
import os
import cv2
import keras
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
import pandas as pd
import sys
import tensorflow as tf
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.express as px

In [2]:
def readData(filepath, label):
    cells = []
    labels = []
    file = os.listdir(filepath)
    for img in file:
        try:
            image = cv2.imread(filepath + img)
            image_from_array = Image.fromarray(image, 'RGB')
            size_image = image_from_array.resize((50, 50))
            cells.append(np.array(size_image))
            labels.append(label)
        except AttributeError as e:
            print('Skipping file: ', img, e)
    print(len(cells), ' Data Points Read!')
    return np.array(cells), np.array(labels)

In [3]:
TestParasitizedCells, TestParasitizedLabels = readData('./input/fed/test/Parasitized/', 1)
TestUninfectedCells, TestUninfectedLabels  = readData('./input/fed/test/Uninfected/', 0)

2740  Data Points Read!
2783  Data Points Read!


In [4]:
def genesis_train(file):
    
    print('Reading Training Data')
    
    ParasitizedCells, ParasitizedLabels = readData(file + '/Parasitized/', 1)
    UninfectedCells, UninfectedLabels  = readData(file + '/Uninfected/', 0)
    
    Cells = np.concatenate((ParasitizedCells, UninfectedCells))
    Labels = np.concatenate((ParasitizedLabels, UninfectedLabels))
    
    print('Reading Testing Data')
    
    TestCells = np.concatenate((TestParasitizedCells, TestUninfectedCells))
    TestLabels = np.concatenate((TestParasitizedLabels, TestUninfectedLabels))
    
    s = np.arange(Cells.shape[0])
    np.random.shuffle(s)
    Cells = Cells[s]
    Labels = Labels[s]
    
    sTest = np.arange(TestCells.shape[0])
    np.random.shuffle(sTest)
    TestCells = TestCells[sTest]
    TestLabels = TestLabels[sTest]
    
    num_classes=len(np.unique(Labels))
    len_data=len(Cells)
    print(len_data, ' Data Points')
    
    (x_train,x_test)=Cells, TestCells
    (y_train,y_test)=Labels, TestLabels
    
    # Since we're working on image data, we normalize data by divinding 255.
    x_train = x_train.astype('float32')/255 
    x_test = x_test.astype('float32')/255
    train_len=len(x_train)
    test_len=len(x_test)
    
    #Doing One hot encoding as classifier has multiple classes
    y_train=keras.utils.to_categorical(y_train,num_classes)
    y_test=keras.utils.to_categorical(y_test,num_classes)
    
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
#     model.summary()

    # compile the model with loss as categorical_crossentropy and using adam optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    #Fit the model with min batch size as 50[can tune batch size to some factor of 2^power ] 
    model.fit(x_train, y_train, batch_size=100, epochs=3, verbose=1)
    
    scores = model.evaluate(x_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./output.h5")
    return len_data, scores[1]

In [5]:
def update_train(file, d):
    
    print('Reading Training Data')
    ParasitizedCells, ParasitizedLabels = readData(file + '/Parasitized/', 1)
    UninfectedCells, UninfectedLabels  = readData(file + '/Uninfected/', 0)
    
    Cells = np.concatenate((ParasitizedCells, UninfectedCells))
    Labels = np.concatenate((ParasitizedLabels, UninfectedLabels))
    
    print('Reading Testing Data')
    
    TestCells = np.concatenate((TestParasitizedCells, TestUninfectedCells))
    TestLabels = np.concatenate((TestParasitizedLabels, TestUninfectedLabels))
    
    s = np.arange(Cells.shape[0])
    np.random.shuffle(s)
    Cells = Cells[s]
    Labels = Labels[s]
    
    sTest = np.arange(TestCells.shape[0])
    np.random.shuffle(sTest)
    TestCells = TestCells[sTest]
    TestLabels = TestLabels[sTest]
    
    num_classes=len(np.unique(Labels))
    len_data=len(Cells)
    print(len_data, ' Data Points')
    
    (x_train,x_test)=Cells, TestCells
    (y_train,y_test)=Labels, TestLabels
    
    # Since we're working on image data, we normalize data by divinding 255.
    x_train = x_train.astype('float32')/255 
    x_test = x_test.astype('float32')/255
    train_len=len(x_train)
    test_len=len(x_test)
    
    #Doing One hot encoding as classifier has multiple classes
    y_train=keras.utils.to_categorical(y_train,num_classes)
    y_test=keras.utils.to_categorical(y_test,num_classes)
    
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
    # model.summary()

    model.load_weights("./output.h5")
    
    # compile the model with loss as categorical_crossentropy and using adam optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    #Fit the model with min batch size as 50[can tune batch size to some factor of 2^power ] 
    model.fit(x_train, y_train, batch_size=100, epochs=3, verbose=1)
    
    
    scores = model.evaluate(x_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./weights/" + str(d) + ".h5")
    return len_data, scores[1]

In [6]:
FLAccuracy = {}
# FLAccuracy['Complete Dataset'] = genesis_train('./input/cell_images')
FLAccuracy['Genesis'] = genesis_train('./input/fed/genesis')
FLAccuracy['d1'] = update_train('./input/fed/d1', 'd1')
FLAccuracy['d2'] = update_train('./input/fed/d2', 'd2')
FLAccuracy['d3'] = update_train('./input/fed/d3', 'd3')
FLAccuracy['d4'] = update_train('./input/fed/d4', 'd4')
FLAccuracy['d5'] = update_train('./input/fed/d5', 'd5')
FLAccuracy['d6'] = update_train('./input/fed/d6', 'd6')
FLAccuracy['d7'] = update_train('./input/fed/d7', 'd7')
FLAccuracy['d8'] = update_train('./input/fed/d8', 'd8')
FLAccuracy['d9'] = update_train('./input/fed/d9', 'd9')
FLAccuracy['d10'] = update_train('./input/fed/d10', 'd10')
FLAccuracy['d11'] = update_train('./input/fed/d11', 'd11')
FLAccuracy['d12'] = update_train('./input/fed/d12', 'd12')
FLAccuracy['d13'] = update_train('./input/fed/d13', 'd13')
FLAccuracy['d14'] = update_train('./input/fed/d14', 'd14')
FLAccuracy['d15'] = update_train('./input/fed/d15', 'd15')
FLAccuracy['d16'] = update_train('./input/fed/d16', 'd16')
FLAccuracy['d17'] = update_train('./input/fed/d17', 'd17')
FLAccuracy['d18'] = update_train('./input/fed/d18', 'd18')
FLAccuracy['d19'] = update_train('./input/fed/d19', 'd19')
FLAccuracy['d20'] = update_train('./input/fed/d20', 'd20')

Reading Training Data
686  Data Points Read!
696  Data Points Read!
Reading Testing Data
1382  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Loss:  0.6367493867874146
Accuracy:  0.6527249813079834
Reading Training Data
528  Data Points Read!
533  Data Points Read!
Reading Testing Data
1061  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Loss:  0.5931140184402466
Accuracy:  0.6719174385070801
Reading Training Data
522  Data Points Read!
528  Data Points Read!
Reading Testing Data
1050  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Loss:  0.5853269696235657
Accuracy:  0.6912909746170044
Reading Training Data
692  Data Points Read!
655  Data Points Read!
Reading Testing Data
1347  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Loss:  0.5811020135879517
Accuracy:  0.6959985494613647
Reading Training Data
448  Data Points Read!
410  Data Points Read!
Reading Testing Data
858  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Loss:  0.6002377867698669
Accuracy:  0.7079485654830933
Reading Training Data
838  Data Poin

284  Data Points Read!
281  Data Points Read!
Reading Testing Data
565  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Loss:  0.6365188360214233
Accuracy:  0.6291870474815369
Reading Training Data
412  Data Points Read!
416  Data Points Read!
Reading Testing Data
828  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Loss:  0.5948529243469238
Accuracy:  0.6940068602561951
Reading Training Data
417  Data Points Read!
414  Data Points Read!
Reading Testing Data
831  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Loss:  0.5907285213470459
Accuracy:  0.68966144323349
Reading Training Data
269  Data Points Read!
252  Data Points Read!
Reading Testing Data
521  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Loss:  0.620616614818573
Accuracy:  0.6690204739570618
Reading Training Data
407  Data Points Read!
407  Data Points Read!
Reading Testing Data
814  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Loss:  0.5924577713012695
Accuracy:  0.6733659505844116
Reading Training Data
286  Data Points Read!
276  Data Points Rea

In [7]:
FLAccuracy

{'Genesis': (1382, 0.6527249813079834),
 'd1': (1061, 0.6719174385070801),
 'd2': (1050, 0.6912909746170044),
 'd3': (1347, 0.6959985494613647),
 'd4': (858, 0.7079485654830933),
 'd5': (1676, 0.7238819599151611),
 'd6': (1166, 0.7052326798439026),
 'd7': (813, 0.6925584077835083),
 'd8': (1445, 0.7153720855712891),
 'd9': (1102, 0.6692014932632446),
 'd10': (1396, 0.7077675461769104),
 'd11': (1134, 0.6775303483009338),
 'd12': (1623, 0.7320296764373779),
 'd13': (820, 0.6800651550292969),
 'd14': (1041, 0.7101213335990906),
 'd15': (565, 0.6291870474815369),
 'd16': (828, 0.6940068602561951),
 'd17': (831, 0.68966144323349),
 'd18': (521, 0.6690204739570618),
 'd19': (814, 0.6733659505844116),
 'd20': (562, 0.6541734337806702)}

In [8]:
FLAccuracyDF = pd.DataFrame.from_dict(FLAccuracy, orient='index', columns=['DataSize', 'Accuracy'])
FLAccuracyDF

Unnamed: 0,DataSize,Accuracy
Genesis,1382,0.652725
d1,1061,0.671917
d2,1050,0.691291
d3,1347,0.695999
d4,858,0.707949
d5,1676,0.723882
d6,1166,0.705233
d7,813,0.692558
d8,1445,0.715372
d9,1102,0.669201


In [9]:
FLAccuracyDF.index

Index(['Genesis', 'd1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8', 'd9', 'd10',
       'd11', 'd12', 'd13', 'd14', 'd15', 'd16', 'd17', 'd18', 'd19', 'd20'],
      dtype='object')

In [10]:
n = 0
for w in FLAccuracy:
    if 'Complete' in w:
        continue
    n += FLAccuracy[w][0]
print('Total number of data points in this round: ', n)

Total number of data points in this round:  22035


In [11]:
FLAccuracyDF['Weightage'] = FLAccuracyDF['DataSize'].apply(lambda x: x/n)

In [12]:
FLAccuracyDF

Unnamed: 0,DataSize,Accuracy,Weightage
Genesis,1382,0.652725,0.062718
d1,1061,0.671917,0.048151
d2,1050,0.691291,0.047651
d3,1347,0.695999,0.06113
d4,858,0.707949,0.038938
d5,1676,0.723882,0.076061
d6,1166,0.705233,0.052916
d7,813,0.692558,0.036896
d8,1445,0.715372,0.065577
d9,1102,0.669201,0.050011


In [13]:
def scale(weight, scaler):
    scaledWeights = []
    for i in range(len(weight)):
        scaledWeights.append(scaler * weight[i])
    return scaledWeights

def getScaledWeight(d, scaler):
    
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
    
    fpath = "./weights/"+d+".h5"
    model.load_weights(fpath)
    weight = model.get_weights()
    scaledWeight = scale(weight, scaler)

    return scaledWeight

In [14]:
def avgWeights(scaledWeights):
    avg = list()
    for weight_list_tuple in zip(*scaledWeights):
        layer_mean = tf.math.reduce_sum(weight_list_tuple, axis=0)
        avg.append(layer_mean)
    return avg

def FedAvg(models):
    
    scaledWeights = []
    for m in models:
        scaledWeights.append(getScaledWeight(m, FLAccuracyDF.loc[m]['Weightage']))
    avgWeight = avgWeights(scaledWeights)
    return avgWeight

In [15]:
models = ['d1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8', 'd9', 'd10', 'd11', 'd12', 'd13', 'd14', 'd15', 'd16', 'd17', 'd18', 'd19', 'd20']
avgWeight = FedAvg(models)
print(avgWeight)

[<tf.Tensor: shape=(2, 2, 3, 16), dtype=float32, numpy=
array([[[[-0.01278405,  0.1480821 , -0.18427667,  0.02990142,
          -0.09053817,  0.25427815,  0.19300961,  0.01253956,
          -0.04350141, -0.06555912, -0.04546903, -0.15567143,
           0.25756803,  0.14353284, -0.04832796,  0.19892983],
         [ 0.2230724 ,  0.20676409, -0.23267385, -0.14192563,
           0.05304413, -0.02690437,  0.05242201,  0.1132592 ,
          -0.02114039,  0.08733422,  0.05635345, -0.01730666,
           0.05016673, -0.04955962,  0.21671656,  0.08668724],
         [-0.17331281,  0.18996766,  0.17913473, -0.10117243,
           0.15795584,  0.23815058, -0.08311746, -0.2694573 ,
           0.10260476, -0.06708112,  0.00096423,  0.07271762,
          -0.2258053 ,  0.17079078, -0.17367807,  0.09707605]],

        [[ 0.20702285,  0.05424436,  0.20706458, -0.17511858,
           0.11578032, -0.11868624,  0.25299823, -0.09753612,
           0.24851714,  0.13223283, -0.21930216, -0.24676576,
         

In [16]:
def testNewGlobal(weight):
    
    print('Reading Testing Data')
    
    TestCells = np.concatenate((TestParasitizedCells, TestUninfectedCells))
    TestLabels = np.concatenate((TestParasitizedLabels, TestUninfectedLabels))
    
    
    sTest = np.arange(TestCells.shape[0])
    np.random.shuffle(sTest)
    TestCells = TestCells[sTest]
    TestLabels = TestLabels[sTest]
    
    num_classes=len(np.unique(TestLabels))
    
    (x_test) = TestCells
    (y_test) = TestLabels
    
    # Since we're working on image data, we normalize data by divinding 255.
    x_test = x_test.astype('float32')/255
    test_len=len(x_test)
    
    #Doing One hot encoding as classifier has multiple classes
    y_test=keras.utils.to_categorical(y_test,num_classes)
    
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
#     model.summary()

    model.set_weights(weight)

    # compile the model with loss as categorical_crossentropy and using adam optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    scores = model.evaluate(x_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./output.h5")
    return scores[1]

In [17]:
testNewGlobal(avgWeight)

Reading Testing Data
Loss:  0.5878175497055054
Accuracy:  0.7065001130104065


0.7065001130104065

In [18]:
FLAccuracyDF

Unnamed: 0,DataSize,Accuracy,Weightage
Genesis,1382,0.652725,0.062718
d1,1061,0.671917,0.048151
d2,1050,0.691291,0.047651
d3,1347,0.695999,0.06113
d4,858,0.707949,0.038938
d5,1676,0.723882,0.076061
d6,1166,0.705233,0.052916
d7,813,0.692558,0.036896
d8,1445,0.715372,0.065577
d9,1102,0.669201,0.050011
