In [362]:
from PIL import Image
import numpy as np
import os
import cv2
import keras
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
import pandas as pd
import sys
%matplotlib inline
from scipy.spatial.distance import euclidean as euc
import matplotlib.pyplot as plt
import random
import plotly.express as px
import numpy
import tensorflow as tf
import requests

In [363]:
def readData(filepath, label):
    cells = []
    labels = []
    file = os.listdir(filepath)
    for img in file:
        try:
            image = cv2.imread(filepath + img)
            image_from_array = Image.fromarray(image, 'RGB')
            size_image = image_from_array.resize((50, 50))
            cells.append(np.array(size_image))
            labels.append(label)
        except AttributeError as e:
            print('Skipping file: ', img, e)
    print(len(cells), ' Data Points Read!')
    return np.array(cells), np.array(labels)

In [364]:
def update(name, Cells, Labels, globalId):
    
    s = np.arange(Cells.shape[0])
    np.random.shuffle(s)
    Cells = Cells[s]
    Labels = Labels[s]
    
    num_classes=len(np.unique(Labels))
    len_data=len(Cells)
    print(len_data, ' Data Points')
    
    (x_train)=Cells
    (y_train)=Labels
    
    # Since we're working on image data, we normalize data by divinding 255.
    x_train = x_train.astype('float32')/255 
    train_len=len(x_train)
    
    #Doing One hot encoding as classifier has multiple classes
    y_train=keras.utils.to_categorical(y_train,num_classes)
    
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
    # model.summary()
    
    if globalId != 1:
        model.load_weights("./weights/global"+str(globalId)+".h5")

    # compile the model with loss as categorical_crossentropy and using adam optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    #Fit the model with min batch size as 50[can tune batch size to some factor of 2^power ] 
    model.fit(x_train, y_train, batch_size=10, epochs=3, verbose=1)
    
    #Saving Model
    model.save("./weights/"+str(name)+".h5")
    return len_data, model

In [365]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = numpy.random.permutation(len(a))
    return a[p], b[p]

In [366]:
print('Reading Training Data')
ParasitizedCells, ParasitizedLabels = readData('./input/cell_images/Parasitized/', 1)
UninfectedCells, UninfectedLabels  = readData('./input/cell_images/Uninfected/', 0)

Reading Training Data
Skipping file:  Thumbs.db 'NoneType' object has no attribute '__array_interface__'
13779  Data Points Read!
Skipping file:  Thumbs.db 'NoneType' object has no attribute '__array_interface__'
13779  Data Points Read!


In [367]:
Cells = np.concatenate((ParasitizedCells, UninfectedCells))
Labels = np.concatenate((ParasitizedLabels, UninfectedLabels))
Cells, Labels = unison_shuffled_copies(Cells, Labels)

In [368]:
def getDataLen(trainingDict):
    n = 0
    for w in trainingDict:
#         print(w)
        n += trainingDict[w]
    print('Total number of data points after this round: ', n)
    return n

def assignWeights(trainingDf, trainingDict):
    n = getDataLen(trainingDict)
    trainingDf['Weightage'] = trainingDf['DataSize'].apply(lambda x: x/n)
    return trainingDf, n
    
def scale(weight, scaler):
    scaledWeights = []
    for i in range(len(weight)):
        scaledWeights.append(scaler * weight[i])
    return scaledWeights

def getWeight(d):
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
    # model.summary()
    
    fpath = "./weights/"+d+".h5"
    model.load_weights(fpath)
    weight = model.get_weights()
    return weight

def getScaledWeight(d, scaler):
    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
    # model.summary()
    
    fpath = "./weights/"+d+".h5"
    model.load_weights(fpath)
    weight = model.get_weights()
    return scale(weight, scaler)

def avgWeights(scaledWeights):
    avg = list()
    for weight_list_tuple in zip(*scaledWeights):
        layer_mean = tf.math.reduce_sum(weight_list_tuple, axis=0)
        avg.append(layer_mean)
    return avg

def FedAvg(trainingDict):
    trainingDf = pd.DataFrame.from_dict(trainingDict, orient='index', columns=['DataSize']) 
    models = list(trainingDict.keys())
    scaledWeights = []
    trainingDf, dataLen = assignWeights(trainingDf, trainingDict)
    for m in models:
        scaledWeights.append(getScaledWeight(m, trainingDf.loc[m]['Weightage']))
    fedAvgWeight = avgWeights(scaledWeights)
    return fedAvgWeight, dataLen

def saveModel(weight, n):
    
    print('Reading Testing Data')
    
    TestParasitizedCells, TestParasitizedLabels = readData('./input/fed/test/Parasitized/', 1)
    TestUninfectedCells, TestUninfectedLabels  = readData('./input/fed/test/Uninfected/', 0)
    TestCells = np.concatenate((TestParasitizedCells, TestUninfectedCells))
    TestLabels = np.concatenate((TestParasitizedLabels, TestUninfectedLabels))
    
    sTest = np.arange(TestCells.shape[0])
    np.random.shuffle(sTest)
    TestCells = TestCells[sTest]
    TestLabels = TestLabels[sTest]
    
    num_classes=len(np.unique(TestLabels))
    
    (x_test) = TestCells
    (y_test) = TestLabels
    
    # Since we're working on image data, we normalize data by divinding 255.
    x_test = x_test.astype('float32')/255
    test_len=len(x_test)
    
    #Doing One hot encoding as classifier has multiple classes
    y_test=keras.utils.to_categorical(y_test,num_classes)

    #creating sequential model
    model=Sequential()
    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
    # model.summary()
    
    model.set_weights(weight)

    # compile the model with loss as categorical_crossentropy and using adam optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    scores = model.evaluate(x_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    fpath = "./weights/global"+str(n)+".h5"
    model.save(fpath)
    return scores[0], scores[1]

def euclidean(m, n):
    distance = []
    for i in range(len(m)):
#         print(i)
        distance.append(euc(m[i].reshape(-1,1), n[i].reshape(-1,1)))
#     print(distance)
    distance = sum(distance)/len(m)
    return distance

def merge(trainingDict, b):
#     print(trainingDict)
    models = list(trainingDict.keys())
#     print(models)
    trainingDf = pd.DataFrame.from_dict(trainingDict, orient='index', columns=['DataSize'])
    l_weights = []
    g_weight = {}
#     print(models)
    for m in models:
#         print(m)
        if 'global' in m:
            g_weight['name'] = m
            g_weight['weight'] = getWeight(m)
        else:
            l_weights.append({
                'name': m,
                'weight': getWeight(m)
            })
#     print(g_weight)
    scores = {}
            
    for m in l_weights:
        scores[m['name']] = euclidean(m['weight'], g_weight['weight'])

    sortedScores = {k: v for k, v in sorted(scores.items(), key=lambda item: item[1])}
    
#     print(scores)
#     print(sortedScores)

    b = int(len(scores)*b)
    
    selected = []
    
    for i in range(b):
        selected.append((sortedScores.popitem())[0])

    newDict = {}
    for i in trainingDict.keys():
        if (i not in selected) and ('global' not in i):
            newDict[i] = trainingDict[i]

    print('Selections: ', newDict)
    
    NewGlobal, dataLen = FedAvg(newDict)
    
    return NewGlobal, dataLen
    

In [369]:
per_client_batch_size = 100

In [370]:
curr_local = 0
curr_global = 0

In [371]:
local = {}
loss_array = []
acc_array = []
for i in range(0, len(Cells), per_client_batch_size):
    if int(curr_global) == 0:
        curr_global += 1
        name = 'global' + str(curr_global)
        l, m = update(name, Cells[i:i+100], Labels[i:i+100], curr_global)
        local[name] = l
    elif (curr_local != 0) and (int(curr_local)%5 == 0):
        curr_global += 1
        print('Current Global: ', curr_global)
        name = 'global' + str(curr_global)
        m, l = merge(local, 0.25)
        loss, acc = saveModel(m, curr_global)
        loss_array.append(loss)
        acc_array.append(acc)
        curr_local += 1
        local = {}
        local[name] = l
    else:
        print('Current Local: ', curr_local)
        name = str('local'+str(curr_local))
        curr_local += 1
        l, m = update(name, Cells[i:i+100], Labels[i:i+100], curr_global)
        local[name] = l

100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  0
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  1
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  2
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  3
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  4
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  2
Selections:  {'local0': 100, 'local2': 100, 'local3': 100, 'local4': 100}
Total number of data points after this round:  400
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.6931493878364563
Accuracy:  0.49610719084739685
Current Local:  6
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  7
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  8
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  9
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  3
Selections:  {'local7': 100, 'local8': 100, 'local9': 100}
Total number of data

Epoch 2/3
Epoch 3/3
Current Local:  22
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  23
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  24
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  6
Selections:  {'local21': 100, 'local22': 100, 'local24': 100}
Total number of data points after this round:  300
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.6925294995307922
Accuracy:  0.5038928389549255
Current Local:  26
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  27
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  28
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  29
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  7
Selections:  {'local27': 100, 'local28': 100, 'local29': 100}
Total number of data points after this round:  300
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.6924129724502563
Accuracy:  0.504254937171936
Curre

Current Local:  46
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  47
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  48
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  49
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  11
Selections:  {'local46': 100, 'local48': 100, 'local49': 100}
Total number of data points after this round:  300
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.6851301193237305
Accuracy:  0.521998941898346
Current Local:  51
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  52
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  53
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  54
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  12
Selections:  {'local52': 100, 'local53': 100, 'local54': 100}
Total number of data points after this round:  300
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.6810

Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.5997427105903625
Accuracy:  0.6864023208618164
Current Local:  96
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  97
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  98
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  99
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  21
Selections:  {'local97': 100, 'local98': 100, 'local99': 100}
Total number of data points after this round:  300
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.6116018891334534
Accuracy:  0.6807894110679626
Current Local:  101
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  102
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  103
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  104
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  22
Selections:  {'local101': 100, 'local102': 100, 'local103': 100}


Current Local:  144
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  30
Selections:  {'local141': 100, 'local142': 100, 'local143': 100}
Total number of data points after this round:  300
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.5070445537567139
Accuracy:  0.7497736811637878
Current Local:  146
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  147
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  148
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  149
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  31
Selections:  {'local146': 100, 'local148': 100, 'local149': 100}
Total number of data points after this round:  300
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.49703407287597656
Accuracy:  0.750316858291626
Current Local:  151
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  152
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current L

Epoch 3/3
Current Local:  193
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  194
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  40
Selections:  {'local192': 100, 'local193': 100, 'local194': 100}
Total number of data points after this round:  300
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.2859310805797577
Accuracy:  0.8929929137229919
Current Local:  196
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  197
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  198
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  199
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  41
Selections:  {'local197': 100, 'local198': 100, 'local199': 100}
Total number of data points after this round:  300
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.2660815715789795
Accuracy:  0.9009596109390259
Current Local:  201
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3

Epoch 2/3
Epoch 3/3
Current Local:  242
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  243
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  244
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  50
Selections:  {'local241': 100, 'local242': 100, 'local244': 100}
Total number of data points after this round:  300
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.224176287651062
Accuracy:  0.9263081550598145
Current Local:  246
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  247
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  248
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Local:  249
100  Data Points
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  51
Selections:  {'local246': 100, 'local248': 100, 'local249': 100}
Total number of data points after this round:  300
Reading Testing Data
2740  Data Points Read!
2783  Data Points Read!
Loss:  0.22156310081481934
Accuracy:  0.922686

In [None]:
#accuracy
fig = px.line(y=acc_array)
fig.show()

In [378]:
#loss
fig = px.line(y=loss_array)
fig.show()