In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd drive/MyDrive/FTSL/1000_participants

/content/drive/.shortcut-targets-by-id/1CI1F_BNtclDb589EJiZx_ypJ_TSazyxZ/FTSL/1000_participants


In [None]:
!pip3 install tensorflow_model_optimization

Collecting tensorflow_model_optimization
[?25l  Downloading https://files.pythonhosted.org/packages/55/38/4fd48ea1bfcb0b6e36d949025200426fe9c3a8bfae029f0973d85518fa5a/tensorflow_model_optimization-0.5.0-py2.py3-none-any.whl (172kB)
[K     |██                              | 10kB 19.0MB/s eta 0:00:01[K     |███▉                            | 20kB 24.9MB/s eta 0:00:01[K     |█████▊                          | 30kB 21.2MB/s eta 0:00:01[K     |███████▋                        | 40kB 24.7MB/s eta 0:00:01[K     |█████████▌                      | 51kB 24.7MB/s eta 0:00:01[K     |███████████▍                    | 61kB 27.5MB/s eta 0:00:01[K     |█████████████▎                  | 71kB 18.0MB/s eta 0:00:01[K     |███████████████▏                | 81kB 19.0MB/s eta 0:00:01[K     |█████████████████               | 92kB 18.4MB/s eta 0:00:01[K     |███████████████████             | 102kB 18.4MB/s eta 0:00:01[K     |████████████████████▉           | 112kB 18.4MB/s eta 0:00:01[K  

In [None]:
import os
import sys
import tempfile

import cv2
import keras
import numpy as np
from numpy import dstack 
import pandas as pd
from keras.layers import (Conv2D, Dense, Dropout, Flatten, GaussianNoise,
                          MaxPooling2D, MaxPool2D , Activation)
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.models import Sequential
from keras.utils import np_utils, to_categorical
from matplotlib import pyplot
from numpy import dstack, mean, std
from pandas import read_csv
from PIL import Image
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

%matplotlib inline
import random

import matplotlib.pyplot as plt
import numpy
import plotly.express as px
import requests
import tensorflow as tf
import tensorflow_model_optimization as tfmot
from scipy.spatial.distance import euclidean as euc

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

from plotly.offline import download_plotlyjs, init_notebook_mode, iplot

init_notebook_mode(connected=True)

In [None]:
Train = pd.read_csv('mnist_train.csv')
Test = pd.read_csv('mnist_test.csv')

In [None]:
# ----------------------------- #
# ---------- SETTINGS ----------#
# ----------------------------- #

NUM_Clients = 5 # number of clients contributing per training round

# ML
Cluster_Size = 250 # max client dataset size for training
Batch_Size = 32
NUM_Epoch = 3
verbose = 1

# Krum
krum_f = 0.0 # percentage of byzantine nodes

# Differential Privacy
Gaussian_Noise = False
Gaussian_Noise_Std_Dev = 0.05

Gradient_Clipping = False
Clip_Norm = 0.45

Gradient_Pruning = False
initial_sparsity = 0.00
final_sparsity = 0.45

# ---------------------------- #
# ----------------------------- #
# ----------------------------- #

In [None]:
def preprocess(df):
  y = df.label
  X = df.drop(['label'], axis=1)
  X = X.values.reshape(-1,28 ,28,1)
  # print('Data size : ', X.shape)
  X = X/255.0

  y = to_categorical(y, num_classes=10)

  # print('X Shape: ', X.shape)
  # print('y Shape: ', y.shape)
  return X, y

In [None]:
X_test, y_test = preprocess(Test)

In [None]:
def train(name, X_train, y_train, globalId):

    n_timesteps, n_features, n_outputs = X_train.shape[0], X_train.shape[1], y_train.shape[0]

    model = Sequential()

    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(28,28,1)))

    if Gaussian_Noise == True:
        model.add(GaussianNoise(Gaussian_Noise_Std_Dev))

    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(10,activation="softmax"))#2 represent output layer neurons
    if Gradient_Pruning == True:
        end_step = np.ceil(n_timesteps / Batch_Size).astype(np.int32) * NUM_Epoch

        # Define model for pruning.
        pruning_params = {
              'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=initial_sparsity,
                                                                       final_sparsity=final_sparsity,
                                                                       begin_step=0,
                                                                       end_step=end_step)
        }

        logdir = tempfile.mkdtemp()

        callbacks = [
          tfmot.sparsity.keras.UpdatePruningStep(),
          tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
        ]

        model = prune_low_magnitude(model, **pruning_params)

    model.built = True

    if globalId != 1:
        model.load_weights("./weights/global"+str(globalId)+".h5")

    if Gradient_Clipping == True:
        opt = keras.optimizers.Adam(clipnorm=Clip_Norm)
        model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    else: 
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    if Gradient_Pruning == True:
        history = model.fit(X_train, y_train, batch_size=Batch_Size, epochs=NUM_Epoch, verbose=1, callbacks=callbacks)
    else:
        history = model.fit(X_train, y_train, batch_size=Batch_Size, epochs=NUM_Epoch, verbose=1)

    #Saving Model
    model.save("./weights/"+str(name)+".h5")
    return n_timesteps, model

In [None]:
def euclidean(m, n):
    # Finds eucledian distance between two ML models m & n
    distance = []
    for i in range(len(m)):
        distance.append(euc(m[i].reshape(-1,1), n[i].reshape(-1,1)))
    distance = sum(distance)/len(m)
    return distance

def saveModel(weight, n):
    
    num_classes=len(np.unique(y_test))

    model = Sequential()

    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(28,28,1)))

    if Gaussian_Noise == True:
        model.add(GaussianNoise(Gaussian_Noise_Std_Dev))

    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(10,activation="softmax"))#2 represent output layer neurons

    if Gradient_Pruning == True:
          end_step = np.ceil(16500 / Batch_Size).astype(np.int32) * NUM_Epoch

          # Define model for pruning.
          pruning_params = {
                'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=initial_sparsity,
                                                                        final_sparsity=final_sparsity,
                                                                        begin_step=0,
                                                                        end_step=end_step)
          }

          logdir = tempfile.mkdtemp()

          callbacks = [
            tfmot.sparsity.keras.UpdatePruningStep(),
            tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
          ]

          model = prune_low_magnitude(model, **pruning_params)

    model.set_weights(weight)

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    scores = model.evaluate(X_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    fpath = "./weights/global"+str(n)+".h5"
    model.save(fpath)
    return scores[0], scores[1]

def getDataLen(trainingDict):
    n = 0
    for w in trainingDict:
        n += trainingDict[w]
    print('Total number of data points after this round: ', n)
    return n

def assignWeights(trainingDf, trainingDict):
    n = getDataLen(trainingDict)
    trainingDf['Weightage'] = trainingDf['DataSize'].apply(lambda x: x/n)
    return trainingDf, n
    
def scale(weight, scaler):
    scaledWeights = []
    for i in range(len(weight)):
        scaledWeights.append(scaler * weight[i])
    return scaledWeights

def getScaledWeight(d, scaler):
    model = Sequential()

    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(28,28,1)))

    if Gaussian_Noise == True:
        model.add(GaussianNoise(Gaussian_Noise_Std_Dev))

    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(10,activation="softmax"))#2 represent output layer neurons

    if Gradient_Pruning == True:
        model = prune_low_magnitude(model)
    fpath = "./weights/"+d+".h5"
    model.load_weights(fpath)
    weight = model.get_weights()
    return scale(weight, scaler)

def getWeight(d):
    model = Sequential()

    model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(28,28,1)))

    if Gaussian_Noise == True:
        model.add(GaussianNoise(Gaussian_Noise_Std_Dev))

    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(10,activation="softmax"))#2 represent output layer neurons

    if Gradient_Pruning == True:
        model = prune_low_magnitude(model)
    fpath = "./weights/"+d+".h5"
    model.load_weights(fpath)
    weight = model.get_weights()
    return weight

def avgWeights(scaledWeights):
    avg = list()
    for weight_list_tuple in zip(*scaledWeights):
        layer_mean = tf.math.reduce_sum(weight_list_tuple, axis=0)
        avg.append(layer_mean)
    return avg

def FedAvg(trainingDict):
    trainingDf = pd.DataFrame.from_dict(trainingDict, orient='index', columns=['DataSize']) 
    models = list(trainingDict.keys())
    scaledWeights = []
    trainingDf, dataLen = assignWeights(trainingDf, trainingDict)
    for m in models:
        scaledWeights.append(getScaledWeight(m, trainingDf.loc[m]['Weightage']))
    fedAvgWeight = avgWeights(scaledWeights)
    return fedAvgWeight, dataLen

def MK(trainingDict, b):
    models = list(trainingDict.keys())
    trainingDf = pd.DataFrame.from_dict(trainingDict, orient='index', columns=['DataSize'])
    l_weights = []
    g_weight = {}
    for m in models:
        if 'global' in m:
            g_weight['name'] = m
            g_weight['weight'] = getWeight(m)
        else:
            l_weights.append({
                'name': m,
                'weight': getWeight(m)
            })
    scores = {}
    for m in l_weights:
        scores[m['name']] = euclidean(m['weight'], g_weight['weight'])
    sortedScores = {k: v for k, v in sorted(scores.items(), key=lambda item: item[1])}

    b = int(len(scores)*b)
    
    selected = []
    for i in range(b):
        selected.append((sortedScores.popitem())[0])

    newDict = {}
    for i in trainingDict.keys():
        if (((i not in selected) and ('global' not in i))):
            newDict[i] = trainingDict[i]

    print('Selections: ', newDict)
    NewGlobal, dataLen = FedAvg(newDict)
    return NewGlobal, dataLen


In [None]:
import time
def BEAS():
    start_time = time.time()
    curr_local = 0
    curr_global = 0
    local = {}
    loss_array = []
    acc_array = []
    for i in range(0, len(Train), Cluster_Size):
        print("--- %s seconds ---" % (time.time() - start_time))
        if int(curr_global) == 0:
            curr_global += 1
            name = 'global' + str(curr_global)
            X_train, y_train = preprocess(Train[i:i+Cluster_Size])
            l, m = train(name, X_train, y_train, curr_global)
            local[name] = l
        else:
            print('Current Local: ', curr_local)
            name = str('local'+str(curr_local))
            curr_local += 1
            X_train, y_train = preprocess(Train[i:i+Cluster_Size])
            if X_train.shape[0]<=Cluster_Size-1:
              continue
            l, m = train(name, X_train, y_train, curr_global)
            local[name] = l

            if (int(curr_local)%NUM_Clients == 0) and (curr_local != 0):
              curr_global += 1
              print('Current Global: ', curr_global)
              name = 'global' + str(curr_global)
              m, l = MK(local, krum_f)
              loss, acc = saveModel(m, curr_global)
              loss_array.append(loss)
              acc_array.append(acc)
              local = {}
              local[name] = l
    print('----------------------------------------')
    print('Number of Clients: ', NUM_Clients)
    print('Cluster Size: ', Cluster_Size)
    print('Batch Size: ', Batch_Size)
    print('Number of Local Epochs: ', NUM_Epoch)
    print('F: ', krum_f)
    print('Gaussian_Noise: ', Gaussian_Noise)
    if Gaussian_Noise: 
        print('Noise Std Dev: ', Gaussian_Noise_Std_Dev)
    print('Gradient_Clipping: ', Gradient_Clipping)
    if Gradient_Clipping: 
        print('Clip Norm: ', Clip_Norm)
    print('Gradient_Pruning: ', Gradient_Pruning)
    if Gradient_Pruning: 
        print('Pruning Sparcity: ', final_sparsity)
    print('----------------------------------------')
    print(acc_array)
    fig = px.line(y=acc_array)
    fig.show()

In [None]:
BEAS()

--- 2.0503997802734375e-05 seconds ---
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- 0.8988182544708252 seconds ---
Current Local:  0
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- 1.8161649703979492 seconds ---
Current Local:  1
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- 2.7594215869903564 seconds ---
Current Local:  2
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- 3.994166374206543 seconds ---
Current Local:  3
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- 4.974301338195801 seconds ---
Current Local:  4
Epoch 1/3
Epoch 2/3
Epoch 3/3
Current Global:  2
Selections:  {'local0': 250, 'local1': 250, 'local2': 250, 'local3': 250, 'local4': 250}
Total number of data points after this round:  1250
Loss:  2.299086809158325
Accuracy:  0.16840000450611115
--- 8.209159851074219 seconds ---
Current Local:  5
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- 9.1921546459198 seconds ---
Current Local:  6
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- 10.18203592300415 seconds ---
Current Local:  7
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- 11.166312456130981 seconds ---
Current Local:  8
Epoch

KeyboardInterrupt: ignored