# Server

## Setting variables

In [1]:
rounds = 100
num_users = 10 # number of clients
target_test_acc = 0.99
lrs = [0.1]

C = 1
E = 5
B = 10 # 'all' for a single minibatch

In [2]:
import os

import socket
import struct
import pickle
import sys

from threading import Thread
from threading import Lock

import copy

import logging
import math
import random
import re
import time
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from datetime import timedelta
from keras import backend as K
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelBinarizer
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import SGD
from tqdm import tqdm
tf.get_logger().setLevel(logging.ERROR)

2023-05-09 03:44:51.216984: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Device

In [3]:
# if tf.test.gpu_device_name():
#     device = "gpu"
# else:
#     device = "cpu"

# print(device)

## Model

A CNN with two 5x5 convolution layers (the first with 32 channels, the second with 64, each followed with 2x2 max pooling), a fully connected layer with 512 units and ReLu activation, and a final softmax output layer (1,663,370 total parameters)

In [4]:
class CNN:
    @staticmethod
    def build(input_shape):
        model = Sequential()
        model.add(Conv2D(filters=32, kernel_size=(5,5), padding='same', activation='relu', input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Conv2D(filters=64, padding='same', kernel_size=(5,5), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(512, activation='relu'))
        model.add(Dense(10, activation='softmax'))
        return model

initialize global model

In [5]:
model = CNN()
  
global_model = model.build((28,28,1))
initial_weights = global_model.get_weights()

global_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 32)        832       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 64)        51264     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 7, 7, 64)         0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 3136)              0         
                                                                 
 dense (Dense)               (None, 512)               1

2023-05-09 03:44:53.972154: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


In [6]:
# # Save the entire model as a SavedModel.
# checkpoint_path = "training/cp.ckpt"
# checkpoint_dir = os.path.dirname(checkpoint_path)

# # Create a callback that saves the model's weights
# cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
#                                                  save_weights_only=True,
#                                                  verbose=1)

global_model.save('saved_model/global_model')



# Load data

In [7]:
mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [8]:
X_train = X_train.astype("float32")/255
X_test = X_test.astype("float32")/255
X_train = np.expand_dims(X_train, -1)
X_test = np.expand_dims(X_test, -1)
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

print("x_train shape:", X_train.shape)
print(X_train.shape[0], "train samples")
print(X_test.shape[0], "test samples")

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [9]:
train_batched = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(len(y_train)) # for testing on train set
test_batched = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(len(y_test))

## variables

In [10]:
global global_weights    

clientsoclist = [0]*num_users

start_time = 0
weight_count = 0

global_weights = initial_weights
weights_list = {}

lock = Lock()

## Socket initialization
### Set host address and port number

### Required socket functions

In [11]:
def send_msg(sock, msg):
    # prefix each message with a 4-byte length in network byte order
    msg = pickle.dumps(msg)
    l_send = len(msg)
    msg = struct.pack('>I', l_send) + msg
    sock.sendall(msg)
    return l_send

def recv_msg(sock):
    # read message length and unpack it into an integer
    raw_msglen = recvall(sock, 4)
    if not raw_msglen:
        return None
    msglen = struct.unpack('>I', raw_msglen)[0]
    # read the message data
    msg =  recvall(sock, msglen)
    msg = pickle.loads(msg)
    return msg

def recvall(sock, n):
    # helper function to receive n bytes or return None if EOF is hit
    data = b''
    while len(data) < n:
        packet = sock.recv(n - len(data))
        if not packet:
            return None
        data += packet
    return data

In [12]:
def average_weights(weights_list):
    avg_weights = list()
    for j in range(len(global_weights)):
        weights = [weights_list[k][j] for k in range(num_users)]
        layer_mean = tf.math.reduce_mean(weights, axis=0)
        avg_weights.append(layer_mean)
        
    return avg_weights

## Receive users for aggregation

In [13]:
def receive(userid, r, num_users, conn): #thread for receive clients
    global weight_count
    global global_weights
    global weights_list
    
    msg = {
        'current_round': r,
        'rounds': rounds,
        'client_id': userid,
        'weight': global_weights
    }
    distribute = send_msg(conn, msg)    #send global weight
    receive = recv_msg(conn)    # get weights from clients
    with lock:
        weights_list[userid] = receive['weight']
        if receive['current_round'] == r:
            weight_count += 1
        
        if weight_count == num_users:
            global_weights = average_weights(weights_list)
            weight_count = 0

## Thread define

In [14]:
def run_thread(func, num_user, r):
    global clientsoclist
    global start_time
    
    thrs = []
    print("timer start!")
    start_time = time.time()    # store start time
    for i in range(num_user):
        conn, addr = s.accept()
        print('Conntected with', addr)
        # append client socket on list
        clientsoclist[i] = conn
        args = (i, r, num_user, conn)
        thread = Thread(target=func, args=args)
        thrs.append(thread)
        thread.start()
    for thread in thrs:
        thread.join()
    end_time = time.time()  # store end time
    print("TrainingTime: {} sec".format(end_time - start_time))

In [15]:
host_name = socket.gethostbyname(socket.gethostname())
# host_name = '172.31.2.147'
port_number = 12345
print(host_name)

172.31.26.96


In [16]:
print(host_name)

172.31.26.96


### Open the server socket

In [17]:
s = socket.socket()
s.bind((host_name, port_number))
s.listen(10)

## Comunication overhead

In [None]:
loss='categorical_crossentropy'
metrics = ['accuracy']
cce = tf.keras.losses.CategoricalCrossentropy()

result_per_lr = {}
start = time.time()

dir = ''
    
for lr in lrs:
    train_losses = []
    train_accs = []
    test_losses = []
    test_accs = []
    
    print('\nlearning rate: {}'.format(lr))
    
    for r in range(rounds):
        train_loss = 0
        train_acc = 0
        test_loss = 0
        test_acc = 0
        
        run_thread(receive, num_users, r)
    
        # evaluate current round
        start = time.time()
        global_model.set_weights(global_weights)
        
        # test global model on full training set
        for (X,y) in train_batched:
            preds = global_model.predict(X)
            train_loss = cce(y, preds)
            train_acc = accuracy_score(tf.argmax(preds, axis=1), tf.argmax(y, axis=1))
            train_losses.append(train_loss.numpy())
            train_accs.append(train_acc)

        # test global model on testing set
        for(X, y) in test_batched:
            preds = global_model.predict(X)
            test_loss = cce(y, preds)
            test_acc = accuracy_score(tf.argmax(preds, axis=1), tf.argmax(y, axis=1))
            test_losses.append(test_loss.numpy())
            test_accs.append(test_acc)

        elapsed = (time.time() - start)

        print('comm_round: {}/{} | test_acc: {:.3%} | test_loss: {:.3} | train_acc: {:.3%} | train_loss: {:.3} | elapsed: {}'.format(r+1, rounds, test_acc, test_loss, train_acc, train_loss, timedelta(seconds=elapsed)))
        print('\n')
        global_model.save('saved_model/global_model')
        
    result_per_lr[lr] = {
        'train_accs' : train_accs,
        'test_accs' : test_accs,
        'train_losses' : train_losses,
        'test_losses' : test_losses
                          }
    
    


learning rate: 0.1
timer start!
Conntected with ('172.31.26.96', 59218)
Conntected with ('172.31.26.96', 59232)
Conntected with ('172.31.26.96', 59246)
Conntected with ('172.31.26.38', 39090)
Conntected with ('172.31.26.38', 39100)
Conntected with ('172.31.26.38', 39110)
Conntected with ('172.31.23.109', 40580)
Conntected with ('172.31.23.109', 36106)
Conntected with ('172.31.23.109', 36122)
Conntected with ('172.31.24.39', 42912)
TrainingTime: 166.5627269744873 sec


2023-05-09 03:47:42.606024: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [60000,10]
	 [[{{node Placeholder/_1}}]]


  1/313 [..............................] - ETA: 24s

2023-05-09 03:47:58.476050: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [10000,10]
	 [[{{node Placeholder/_1}}]]


comm_round: 1/100 | test_acc: 53.910% | test_loss: 1.86 | train_acc: 53.253% | train_loss: 1.87 | elapsed: 0:00:18.861891






timer start!
Conntected with ('172.31.24.39', 37800)
Conntected with ('172.31.26.38', 51570)
Conntected with ('172.31.26.96', 45414)
Conntected with ('172.31.26.96', 45422)
Conntected with ('172.31.26.96', 45432)
Conntected with ('172.31.26.38', 40604)
Conntected with ('172.31.26.38', 40614)
Conntected with ('172.31.23.109', 40646)
Conntected with ('172.31.23.109', 40650)
Conntected with ('172.31.23.109', 40660)
TrainingTime: 97.55454158782959 sec
comm_round: 2/100 | test_acc: 65.120% | test_loss: 0.875 | train_acc: 64.952% | train_loss: 0.892 | elapsed: 0:00:17.989822






timer start!
Conntected with ('172.31.24.39', 38728)
Conntected with ('172.31.23.109', 54260)
Conntected with ('172.31.26.96', 60454)
Conntected with ('172.31.23.109', 54276)
Conntected with ('172.31.26.38', 57338)
Conntected with ('172.31.23.109', 53228)
Conntected with ('172.31.26.96', 33894)
Conntected with ('172.31.26.38', 57352)
Conntected with ('172.31.26.38', 57358)
Conntected with ('172.31.26.96', 33906)
TrainingTime: 97.9536623954773 sec
comm_round: 3/100 | test_acc: 87.280% | test_loss: 0.411 | train_acc: 86.593% | train_loss: 0.429 | elapsed: 0:00:17.884068






timer start!
Conntected with ('172.31.24.39', 51856)
Conntected with ('172.31.26.96', 43436)
Conntected with ('172.31.26.38', 38044)
Conntected with ('172.31.26.38', 38046)
Conntected with ('172.31.26.96', 43438)
Conntected with ('172.31.23.109', 33150)
Conntected with ('172.31.26.38', 38060)
Conntected with ('172.31.26.96', 43452)
Conntected with ('172.31.23.109', 33152)
Conntected with ('172.31.23.109', 33158)
TrainingTime: 97.2976701259613 sec
comm_round: 4/100 | test_acc: 94.410% | test_loss: 0.221 | train_acc: 93.965% | train_loss: 0.234 | elapsed: 0:00:17.803191






timer start!
Conntected with ('172.31.24.39', 55354)
Conntected with ('172.31.26.38', 45010)
Conntected with ('172.31.26.96', 51842)
Conntected with ('172.31.23.109', 38524)
Conntected with ('172.31.26.96', 42658)
Conntected with ('172.31.26.38', 49378)
Conntected with ('172.31.23.109', 38530)
Conntected with ('172.31.26.38', 49384)
Conntected with ('172.31.23.109', 38544)
Conntected with ('172.31.26.96', 42664)
TrainingTime: 97.7203140258789 sec
comm_round: 5/100 | test_acc: 94.680% | test_loss: 0.187 | train_acc: 94.123% | train_loss: 0.2 | elapsed: 0:00:18.117958






timer start!
Conntected with ('172.31.24.39', 44568)
Conntected with ('172.31.26.38', 43576)
Conntected with ('172.31.26.96', 59312)
Conntected with ('172.31.23.109', 58536)
Conntected with ('172.31.26.38', 43580)
Conntected with ('172.31.26.38', 43592)
Conntected with ('172.31.23.109', 58552)
Conntected with ('172.31.26.96', 59316)
Conntected with ('172.31.26.96', 51456)
Conntected with ('172.31.23.109', 41400)
TrainingTime: 97.63223433494568 sec
comm_round: 6/100 | test_acc: 95.970% | test_loss: 0.139 | train_acc: 95.628% | train_loss: 0.148 | elapsed: 0:00:18.422992






timer start!
Conntected with ('172.31.24.39', 48054)
Conntected with ('172.31.23.109', 36942)
Conntected with ('172.31.26.96', 48474)
Conntected with ('172.31.26.96', 48486)
Conntected with ('172.31.26.38', 35232)
Conntected with ('172.31.23.109', 36944)
Conntected with ('172.31.26.38', 35248)
Conntected with ('172.31.26.38', 35254)
Conntected with ('172.31.23.109', 36946)
Conntected with ('172.31.26.96', 48494)
TrainingTime: 97.81545233726501 sec
comm_round: 7/100 | test_acc: 96.460% | test_loss: 0.122 | train_acc: 96.132% | train_loss: 0.13 | elapsed: 0:00:17.722595






timer start!
Conntected with ('172.31.24.39', 42704)
Conntected with ('172.31.26.38', 59586)
Conntected with ('172.31.23.109', 48896)
Conntected with ('172.31.23.109', 60906)
Conntected with ('172.31.23.109', 60922)
Conntected with ('172.31.26.96', 52182)
Conntected with ('172.31.26.38', 60938)
Conntected with ('172.31.26.96', 52194)
Conntected with ('172.31.26.96', 52198)
Conntected with ('172.31.26.38', 60944)
TrainingTime: 96.30521750450134 sec
comm_round: 8/100 | test_acc: 96.670% | test_loss: 0.108 | train_acc: 96.360% | train_loss: 0.117 | elapsed: 0:00:18.384609






timer start!
Conntected with ('172.31.24.39', 60146)
Conntected with ('172.31.26.38', 48184)
Conntected with ('172.31.26.96', 33340)
Conntected with ('172.31.26.38', 48188)
Conntected with ('172.31.26.96', 33342)
Conntected with ('172.31.23.109', 60210)
Conntected with ('172.31.23.109', 60226)
Conntected with ('172.31.26.38', 48196)
Conntected with ('172.31.23.109', 60240)
Conntected with ('172.31.26.96', 33356)
TrainingTime: 96.59703087806702 sec
comm_round: 9/100 | test_acc: 97.080% | test_loss: 0.0979 | train_acc: 96.692% | train_loss: 0.106 | elapsed: 0:00:17.792020






timer start!
Conntected with ('172.31.24.39', 59486)
Conntected with ('172.31.23.109', 47530)
Conntected with ('172.31.26.96', 42914)
Conntected with ('172.31.26.38', 48168)
Conntected with ('172.31.23.109', 58226)
Conntected with ('172.31.26.38', 48170)
Conntected with ('172.31.23.109', 58240)
Conntected with ('172.31.26.96', 42930)
Conntected with ('172.31.26.96', 42932)
Conntected with ('172.31.26.38', 48186)
TrainingTime: 97.75461316108704 sec
comm_round: 10/100 | test_acc: 96.680% | test_loss: 0.111 | train_acc: 96.123% | train_loss: 0.117 | elapsed: 0:00:17.790711






timer start!
Conntected with ('172.31.24.39', 53270)
Conntected with ('172.31.23.109', 52502)
Conntected with ('172.31.26.38', 36512)
Conntected with ('172.31.26.96', 60502)
Conntected with ('172.31.26.38', 36528)
Conntected with ('172.31.23.109', 52506)
Conntected with ('172.31.26.96', 60510)
Conntected with ('172.31.26.38', 36544)
Conntected with ('172.31.26.96', 47286)
Conntected with ('172.31.23.109', 36368)
TrainingTime: 95.68799567222595 sec
comm_round: 11/100 | test_acc: 97.010% | test_loss: 0.0919 | train_acc: 96.848% | train_loss: 0.0954 | elapsed: 0:00:17.949393






timer start!
Conntected with ('172.31.24.39', 53580)
Conntected with ('172.31.23.109', 42740)
Conntected with ('172.31.26.38', 43006)
Conntected with ('172.31.26.96', 53630)
Conntected with ('172.31.23.109', 42746)
Conntected with ('172.31.23.109', 42754)
Conntected with ('172.31.26.38', 43012)
Conntected with ('172.31.26.96', 53640)
Conntected with ('172.31.26.96', 53646)
Conntected with ('172.31.26.38', 43024)
TrainingTime: 97.23591160774231 sec
comm_round: 12/100 | test_acc: 97.470% | test_loss: 0.0815 | train_acc: 97.267% | train_loss: 0.0859 | elapsed: 0:00:18.108026






timer start!
Conntected with ('172.31.24.39', 35152)
Conntected with ('172.31.26.38', 42302)
Conntected with ('172.31.23.109', 37178)
Conntected with ('172.31.23.109', 37182)
Conntected with ('172.31.26.38', 42318)
Conntected with ('172.31.26.96', 33126)
Conntected with ('172.31.26.96', 33138)
Conntected with ('172.31.26.38', 42324)
Conntected with ('172.31.23.109', 40016)
Conntected with ('172.31.26.96', 33246)
TrainingTime: 97.19141674041748 sec
comm_round: 13/100 | test_acc: 97.450% | test_loss: 0.0831 | train_acc: 97.187% | train_loss: 0.0866 | elapsed: 0:00:17.999547






timer start!
Conntected with ('172.31.24.39', 58238)
Conntected with ('172.31.26.96', 43818)
Conntected with ('172.31.26.38', 55730)
Conntected with ('172.31.23.109', 57384)
Conntected with ('172.31.26.96', 43828)
Conntected with ('172.31.26.38', 55732)
Conntected with ('172.31.23.109', 57388)
Conntected with ('172.31.23.109', 57404)
Conntected with ('172.31.26.38', 55748)
Conntected with ('172.31.26.96', 43842)
TrainingTime: 98.09150815010071 sec
comm_round: 14/100 | test_acc: 97.650% | test_loss: 0.0746 | train_acc: 97.485% | train_loss: 0.0775 | elapsed: 0:00:18.281270






timer start!
Conntected with ('172.31.24.39', 35998)
Conntected with ('172.31.23.109', 58228)
Conntected with ('172.31.23.109', 58244)
Conntected with ('172.31.26.96', 52264)
Conntected with ('172.31.26.38', 36572)
Conntected with ('172.31.26.38', 36578)
Conntected with ('172.31.23.109', 58256)
Conntected with ('172.31.26.96', 52272)
Conntected with ('172.31.26.96', 52286)
Conntected with ('172.31.26.38', 36594)
TrainingTime: 97.41075348854065 sec
comm_round: 15/100 | test_acc: 97.750% | test_loss: 0.0721 | train_acc: 97.517% | train_loss: 0.0757 | elapsed: 0:00:18.120926






timer start!
Conntected with ('172.31.24.39', 54602)
Conntected with ('172.31.23.109', 49162)
Conntected with ('172.31.26.38', 50416)
Conntected with ('172.31.26.38', 50428)
Conntected with ('172.31.26.96', 40278)
Conntected with ('172.31.26.96', 40288)
Conntected with ('172.31.26.38', 50430)
Conntected with ('172.31.23.109', 49176)
Conntected with ('172.31.23.109', 49188)
Conntected with ('172.31.26.96', 51660)
TrainingTime: 96.34517216682434 sec
comm_round: 16/100 | test_acc: 97.900% | test_loss: 0.068 | train_acc: 97.703% | train_loss: 0.0707 | elapsed: 0:00:17.731767






timer start!
Conntected with ('172.31.24.39', 34604)
Conntected with ('172.31.23.109', 56672)
Conntected with ('172.31.26.96', 35562)
Conntected with ('172.31.26.38', 57686)
Conntected with ('172.31.23.109', 56676)
Conntected with ('172.31.26.38', 57692)
Conntected with ('172.31.23.109', 56678)
Conntected with ('172.31.26.38', 57702)
Conntected with ('172.31.26.96', 35570)
Conntected with ('172.31.26.96', 35582)
TrainingTime: 96.03097224235535 sec
comm_round: 17/100 | test_acc: 97.920% | test_loss: 0.0632 | train_acc: 97.847% | train_loss: 0.0659 | elapsed: 0:00:18.031370






timer start!
Conntected with ('172.31.24.39', 49624)
Conntected with ('172.31.26.96', 34032)
Conntected with ('172.31.26.38', 55102)
Conntected with ('172.31.23.109', 59286)
Conntected with ('172.31.23.109', 59288)
Conntected with ('172.31.26.96', 34048)
Conntected with ('172.31.26.96', 60694)
Conntected with ('172.31.23.109', 59304)
Conntected with ('172.31.26.38', 55112)
Conntected with ('172.31.26.38', 55116)
TrainingTime: 96.84680652618408 sec
comm_round: 18/100 | test_acc: 97.930% | test_loss: 0.0621 | train_acc: 97.965% | train_loss: 0.0634 | elapsed: 0:00:17.883153






timer start!
Conntected with ('172.31.24.39', 44012)
Conntected with ('172.31.26.96', 37738)
Conntected with ('172.31.23.109', 54762)
Conntected with ('172.31.26.38', 43684)
Conntected with ('172.31.26.38', 43694)
Conntected with ('172.31.26.96', 37746)
Conntected with ('172.31.26.96', 37762)
Conntected with ('172.31.23.109', 54764)
Conntected with ('172.31.26.38', 43708)
Conntected with ('172.31.23.109', 54780)
TrainingTime: 96.5436282157898 sec
comm_round: 19/100 | test_acc: 97.910% | test_loss: 0.0645 | train_acc: 97.802% | train_loss: 0.0658 | elapsed: 0:00:18.228810






timer start!
Conntected with ('172.31.24.39', 49756)
Conntected with ('172.31.26.38', 52388)
Conntected with ('172.31.26.96', 41532)
Conntected with ('172.31.23.109', 49560)
Conntected with ('172.31.26.38', 52390)
Conntected with ('172.31.23.109', 49566)
Conntected with ('172.31.23.109', 49568)
Conntected with ('172.31.26.96', 39366)
Conntected with ('172.31.26.38', 43124)
Conntected with ('172.31.26.96', 39372)
TrainingTime: 96.7830400466919 sec
comm_round: 20/100 | test_acc: 98.070% | test_loss: 0.0611 | train_acc: 98.003% | train_loss: 0.0607 | elapsed: 0:00:17.795671






timer start!
Conntected with ('172.31.24.39', 54650)
Conntected with ('172.31.26.38', 58856)
Conntected with ('172.31.26.96', 56936)
Conntected with ('172.31.26.38', 58860)
Conntected with ('172.31.23.109', 47168)
Conntected with ('172.31.23.109', 47184)
Conntected with ('172.31.23.109', 47188)
Conntected with ('172.31.26.38', 58862)
Conntected with ('172.31.26.96', 56938)
Conntected with ('172.31.26.96', 56948)
TrainingTime: 96.79198408126831 sec
comm_round: 21/100 | test_acc: 98.090% | test_loss: 0.0585 | train_acc: 98.027% | train_loss: 0.0594 | elapsed: 0:00:17.848130






timer start!
Conntected with ('172.31.24.39', 57744)
Conntected with ('172.31.23.109', 48154)
Conntected with ('172.31.26.96', 51384)
Conntected with ('172.31.26.38', 60822)
Conntected with ('172.31.26.38', 60828)
Conntected with ('172.31.26.96', 47920)
Conntected with ('172.31.23.109', 37862)
Conntected with ('172.31.23.109', 37872)
Conntected with ('172.31.26.38', 41454)
Conntected with ('172.31.26.96', 47930)
TrainingTime: 96.69057083129883 sec
comm_round: 22/100 | test_acc: 97.240% | test_loss: 0.0787 | train_acc: 97.147% | train_loss: 0.0811 | elapsed: 0:00:17.879801






timer start!
Conntected with ('172.31.24.39', 53002)
Conntected with ('172.31.23.109', 50350)
Conntected with ('172.31.26.38', 50376)
Conntected with ('172.31.26.38', 50390)
Conntected with ('172.31.23.109', 50352)
Conntected with ('172.31.26.96', 48260)
Conntected with ('172.31.26.96', 48270)
Conntected with ('172.31.26.38', 50406)
Conntected with ('172.31.26.96', 48284)
Conntected with ('172.31.23.109', 50356)
TrainingTime: 96.41162061691284 sec
comm_round: 23/100 | test_acc: 98.230% | test_loss: 0.0533 | train_acc: 98.297% | train_loss: 0.0512 | elapsed: 0:00:17.955060






timer start!
Conntected with ('172.31.24.39', 55824)
Conntected with ('172.31.26.38', 51014)
Conntected with ('172.31.23.109', 60552)
Conntected with ('172.31.26.96', 57996)
Conntected with ('172.31.26.96', 57998)
Conntected with ('172.31.26.38', 48482)
Conntected with ('172.31.23.109', 36932)
Conntected with ('172.31.26.38', 48490)
Conntected with ('172.31.26.96', 41956)
Conntected with ('172.31.23.109', 36934)
TrainingTime: 97.22859191894531 sec
comm_round: 24/100 | test_acc: 98.360% | test_loss: 0.0534 | train_acc: 98.228% | train_loss: 0.0522 | elapsed: 0:00:17.944553






timer start!
Conntected with ('172.31.24.39', 46206)
Conntected with ('172.31.23.109', 38884)
Conntected with ('172.31.26.38', 48338)
Conntected with ('172.31.23.109', 38886)
Conntected with ('172.31.26.96', 38464)
Conntected with ('172.31.26.38', 48342)
Conntected with ('172.31.23.109', 38890)
Conntected with ('172.31.26.96', 38468)
Conntected with ('172.31.26.38', 48348)
Conntected with ('172.31.26.96', 38470)
TrainingTime: 96.25351810455322 sec
comm_round: 25/100 | test_acc: 98.240% | test_loss: 0.0518 | train_acc: 98.255% | train_loss: 0.0524 | elapsed: 0:00:18.195901






timer start!
Conntected with ('172.31.24.39', 51472)
Conntected with ('172.31.26.96', 42988)
Conntected with ('172.31.23.109', 53250)
Conntected with ('172.31.23.109', 48842)
Conntected with ('172.31.26.38', 38126)
Conntected with ('172.31.26.96', 54750)
Conntected with ('172.31.23.109', 48846)
Conntected with ('172.31.26.38', 38138)
Conntected with ('172.31.26.96', 54756)
Conntected with ('172.31.26.38', 38142)
TrainingTime: 96.98613739013672 sec
comm_round: 26/100 | test_acc: 98.240% | test_loss: 0.0522 | train_acc: 98.275% | train_loss: 0.051 | elapsed: 0:00:18.151077






timer start!
Conntected with ('172.31.24.39', 51736)
Conntected with ('172.31.23.109', 43974)
Conntected with ('172.31.26.38', 34296)
Conntected with ('172.31.26.96', 37818)
Conntected with ('172.31.26.96', 37830)
Conntected with ('172.31.23.109', 43988)
Conntected with ('172.31.26.38', 34310)
Conntected with ('172.31.26.38', 34324)
Conntected with ('172.31.26.96', 37838)
Conntected with ('172.31.23.109', 44000)
TrainingTime: 96.79574608802795 sec
comm_round: 27/100 | test_acc: 98.320% | test_loss: 0.0522 | train_acc: 98.348% | train_loss: 0.05 | elapsed: 0:00:34.517645






timer start!
Conntected with ('172.31.24.39', 55634)
Conntected with ('172.31.23.109', 52946)
Conntected with ('172.31.26.96', 54328)
Conntected with ('172.31.26.38', 53694)
Conntected with ('172.31.23.109', 52960)
Conntected with ('172.31.23.109', 52974)
Conntected with ('172.31.26.38', 53700)
Conntected with ('172.31.26.96', 54334)
Conntected with ('172.31.26.38', 53714)
Conntected with ('172.31.26.96', 54340)
TrainingTime: 282.9335286617279 sec
comm_round: 28/100 | test_acc: 98.270% | test_loss: 0.0517 | train_acc: 98.367% | train_loss: 0.048 | elapsed: 0:01:10.497851






timer start!
Conntected with ('172.31.24.39', 48018)
Conntected with ('172.31.23.109', 50926)
Conntected with ('172.31.26.38', 44960)
Conntected with ('172.31.23.109', 50938)
Conntected with ('172.31.26.38', 44970)
Conntected with ('172.31.23.109', 50942)
Conntected with ('172.31.26.38', 44976)
Conntected with ('172.31.26.96', 36292)
Conntected with ('172.31.26.96', 36294)
Conntected with ('172.31.26.96', 35040)
TrainingTime: 287.38916659355164 sec
comm_round: 29/100 | test_acc: 97.820% | test_loss: 0.0663 | train_acc: 97.880% | train_loss: 0.0633 | elapsed: 0:01:02.054289






timer start!
Conntected with ('172.31.24.39', 49430)
Conntected with ('172.31.26.38', 43866)
Conntected with ('172.31.23.109', 60928)
Conntected with ('172.31.23.109', 60932)
Conntected with ('172.31.26.38', 43874)
Conntected with ('172.31.26.38', 51298)
Conntected with ('172.31.23.109', 46180)
Conntected with ('172.31.26.96', 42790)
Conntected with ('172.31.26.96', 41338)
Conntected with ('172.31.26.96', 54446)
TrainingTime: 276.99858927726746 sec
comm_round: 30/100 | test_acc: 98.400% | test_loss: 0.0486 | train_acc: 98.567% | train_loss: 0.0435 | elapsed: 0:00:32.128488






timer start!
Conntected with ('172.31.24.39', 49510)
Conntected with ('172.31.23.109', 42536)
Conntected with ('172.31.26.38', 51672)
Conntected with ('172.31.23.109', 42538)
Conntected with ('172.31.23.109', 33494)
Conntected with ('172.31.26.38', 52330)
Conntected with ('172.31.26.38', 52334)
Conntected with ('172.31.26.96', 54842)
Conntected with ('172.31.26.96', 48678)
Conntected with ('172.31.26.96', 54106)
TrainingTime: 293.0154411792755 sec
comm_round: 31/100 | test_acc: 98.450% | test_loss: 0.0482 | train_acc: 98.573% | train_loss: 0.0431 | elapsed: 0:00:18.112338






timer start!
Conntected with ('172.31.24.39', 56552)
Conntected with ('172.31.23.109', 33496)
Conntected with ('172.31.23.109', 33510)
Conntected with ('172.31.26.38', 57022)
Conntected with ('172.31.26.38', 57030)
Conntected with ('172.31.23.109', 33516)
Conntected with ('172.31.26.38', 55814)
Conntected with ('172.31.26.96', 59588)
Conntected with ('172.31.26.96', 53308)
Conntected with ('172.31.26.96', 53316)
TrainingTime: 292.7904863357544 sec
comm_round: 32/100 | test_acc: 98.450% | test_loss: 0.0492 | train_acc: 98.540% | train_loss: 0.0444 | elapsed: 0:00:18.152330






timer start!
Conntected with ('172.31.24.39', 45676)
Conntected with ('172.31.26.38', 58072)
Conntected with ('172.31.23.109', 35322)
Conntected with ('172.31.26.38', 50338)
Conntected with ('172.31.23.109', 39860)
Conntected with ('172.31.26.38', 50340)
Conntected with ('172.31.23.109', 39866)
Conntected with ('172.31.26.96', 54122)
Conntected with ('172.31.26.96', 54138)
Conntected with ('172.31.26.96', 54148)
TrainingTime: 294.4372627735138 sec
comm_round: 33/100 | test_acc: 98.340% | test_loss: 0.0499 | train_acc: 98.458% | train_loss: 0.046 | elapsed: 0:00:19.234332






timer start!
Conntected with ('172.31.24.39', 58510)
Conntected with ('172.31.26.38', 36100)
Conntected with ('172.31.23.109', 59718)
Conntected with ('172.31.26.38', 36108)
Conntected with ('172.31.23.109', 59724)
Conntected with ('172.31.26.38', 36114)
Conntected with ('172.31.23.109', 59736)
Conntected with ('172.31.26.96', 44768)
Conntected with ('172.31.26.96', 50822)
Conntected with ('172.31.26.96', 42848)
TrainingTime: 292.00776743888855 sec
comm_round: 34/100 | test_acc: 98.460% | test_loss: 0.0478 | train_acc: 98.632% | train_loss: 0.0407 | elapsed: 0:01:01.866038






timer start!
Conntected with ('172.31.24.39', 46900)
Conntected with ('172.31.26.38', 36948)
Conntected with ('172.31.23.109', 60656)
Conntected with ('172.31.23.109', 60662)
Conntected with ('172.31.26.38', 36956)
Conntected with ('172.31.23.109', 60666)
Conntected with ('172.31.26.38', 36962)
Conntected with ('172.31.26.96', 51794)
Conntected with ('172.31.26.96', 51804)
Conntected with ('172.31.26.96', 48376)
TrainingTime: 270.45665884017944 sec
comm_round: 35/100 | test_acc: 98.570% | test_loss: 0.0449 | train_acc: 98.715% | train_loss: 0.037 | elapsed: 0:01:10.383458






timer start!
Conntected with ('172.31.24.39', 49090)
Conntected with ('172.31.26.38', 53340)
Conntected with ('172.31.23.109', 37648)
Conntected with ('172.31.26.38', 53878)
Conntected with ('172.31.23.109', 37650)
Conntected with ('172.31.23.109', 37654)
Conntected with ('172.31.26.38', 53890)
Conntected with ('172.31.26.96', 35094)
Conntected with ('172.31.26.96', 35108)
Conntected with ('172.31.26.96', 49078)
TrainingTime: 277.59100246429443 sec
comm_round: 36/100 | test_acc: 98.510% | test_loss: 0.0454 | train_acc: 98.730% | train_loss: 0.0375 | elapsed: 0:01:10.564137






timer start!
Conntected with ('172.31.24.39', 33714)
Conntected with ('172.31.26.38', 36504)
Conntected with ('172.31.23.109', 36616)
Conntected with ('172.31.26.38', 36520)
Conntected with ('172.31.23.109', 36632)
Conntected with ('172.31.23.109', 36646)
Conntected with ('172.31.26.38', 36528)
Conntected with ('172.31.26.96', 46752)
Conntected with ('172.31.26.96', 46768)
Conntected with ('172.31.26.96', 46776)
TrainingTime: 300.65967559814453 sec
comm_round: 37/100 | test_acc: 98.500% | test_loss: 0.0476 | train_acc: 98.612% | train_loss: 0.0409 | elapsed: 0:01:10.653892






timer start!
Conntected with ('172.31.24.39', 34852)
Conntected with ('172.31.26.38', 40034)
Conntected with ('172.31.23.109', 53234)
Conntected with ('172.31.26.38', 40572)
Conntected with ('172.31.23.109', 53236)
Conntected with ('172.31.23.109', 53250)
Conntected with ('172.31.26.38', 40588)
Conntected with ('172.31.26.96', 58772)
Conntected with ('172.31.26.96', 35246)
Conntected with ('172.31.26.96', 58516)


In [None]:
with open(dir+'result_per_lr_{}_{}_{}_{}.pickle'.format(B,C,E, lr), 'wb') as handle:
    pickle.dump(result_per_lr, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
def get_plotted_metrics(result_per_lr, c_rounds):
    plotted_train_accs= []
    plotted_test_accs = []
    plotted_train_losses = []
    plotted_test_losses = []

    for c in range(c_rounds):
        best_train_acc = 0
        best_test_acc = 0
        best_train_loss = math.inf
        best_test_loss = math.inf
        for lr in result_per_lr.keys():
            best_train_acc = max(best_train_acc, result_per_lr[lr]['train_accs'][c])
            best_test_acc = max(best_test_acc, result_per_lr[lr]['test_accs'][c])
            best_train_loss = min(best_train_loss, result_per_lr[lr]['train_losses'][c])
            best_test_loss = min(best_test_loss, result_per_lr[lr]['test_losses'][c])

    if c == 0:
        plotted_train_accs.append(best_train_acc)
        plotted_test_accs.append(best_test_acc)
        plotted_train_losses.append(best_train_loss)
        plotted_test_losses.append(best_test_loss)
    else:
        if plotted_train_accs[-1] > best_train_acc:
            plotted_train_accs.append(plotted_train_accs[-1])
        else:
            plotted_train_accs.append(best_train_acc)

        if plotted_test_accs[-1] > best_test_acc:
            plotted_test_accs.append(plotted_test_accs[-1])
        else:
            plotted_test_accs.append(best_test_acc)

        if plotted_train_losses[-1] < best_train_loss:
            plotted_train_losses.append(plotted_train_losses[-1])
        else:
            plotted_train_losses.append(best_train_loss)

        if plotted_test_losses[-1] < best_test_loss:
            plotted_test_losses.append(plotted_test_losses[-1])
        else:
            plotted_test_losses.append(best_test_loss)
    
    return plotted_train_accs, plotted_test_accs, plotted_train_losses, plotted_test_losses

In [None]:
plotted_train_accs, plotted_test_accs, plotted_train_losses, plotted_test_losses = get_plotted_metrics(result_per_lr, c_rounds)

In [None]:
fig, ax = plt.subplots()
ax.plot(range(1, len(plotted_train_accs)+1), plotted_train_accs, label='train')
ax.plot(range(1,len(plotted_test_accs)+1), plotted_test_accs, label='test')
ax.set_xticks(np.arange(0, len(plotted_test_accs)+1, 100))
ax.axhline(y=target_test_acc, color='grey', linestyle='-', linewidth=0.5)
ax.set_ylabel('accuracy')

ax.set_xlabel('communication rounds')
ax.set_title('B={}, C={}, E={}, Model={}, Data={}'.format(B, C, E, MODEL, DATA))
ax.legend()

ax2 = ax.twinx()
ax2.set_ylim(ax.get_ylim())
ax2.set_yticks([target_test_acc])

plt.show()

In [None]:
n_round_at_target = None
for i,acc in enumerate(plotted_test_accs):
    if acc>=target_test_acc:
        print("the number of rounds to achieve target test-accuracy: ")
        n_round_at_target = i+1
        print(n_round_at_target)
        break;

In [None]:
loss_at_target = None
for i,loss in enumerate(plotted_test_losses):
    if n_round_at_target and i==n_round_at_target-1:
        print("loss at target test-accuracy: ")
        loss_at_target = loss
        print(loss_at_target)
        break;

In [None]:
# end_time = time.time()  # store end time
# print("TrainingTime: {} sec".format(end_time - start_time))

In [None]:
end_time = time.time()  # store end time
print("WorkingTime: {} sec".format(end_time - start_time))