# Server

## Setting variables

In [1]:
rounds = 100
num_users = 10 # number of clients
target_test_acc = 0.99
lrs = [0.1]

C = 1
E = 5
B = 10 # 'all' for a single minibatch

In [2]:
import os

import socket
import struct
import pickle
import sys

from threading import Thread
from threading import Lock

import copy

import logging
import math
import random
import re
import time
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from datetime import timedelta
from keras import backend as K
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelBinarizer
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import SGD
from tqdm import tqdm
tf.get_logger().setLevel(logging.ERROR)

2023-05-08 08:14:03.956831: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Device

In [3]:
# if tf.test.gpu_device_name():
#     device = "gpu"
# else:
#     device = "cpu"

# print(device)

## Model

A CNN with two 5x5 convolution layers (the first with 32 channels, the second with 64, each followed with 2x2 max pooling), a fully connected layer with 512 units and ReLu activation, and a final softmax output layer (1,663,370 total parameters)

In [4]:
class CNN:
    @staticmethod
    def build(input_shape):
        model = Sequential()
        model.add(Conv2D(filters=32, kernel_size=(5,5), padding='same', activation='relu', input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Conv2D(filters=64, padding='same', kernel_size=(5,5), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(512, activation='relu'))
        model.add(Dense(10, activation='softmax'))
        return model

initialize global model

In [5]:
model = CNN()
  
global_model = model.build((28,28,1))
initial_weights = global_model.get_weights()

global_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 32)        832       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 64)        51264     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 7, 7, 64)         0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 3136)              0         
                                                                 
 dense (Dense)               (None, 512)               1

2023-05-08 08:14:06.856776: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


In [6]:
# Save the entire model as a SavedModel.
!mkdir -p saved_model
global_model.save('saved_model/global_model')



# Load data

In [7]:
mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [8]:
X_train = X_train.astype("float32")/255
X_test = X_test.astype("float32")/255
X_train = np.expand_dims(X_train, -1)
X_test = np.expand_dims(X_test, -1)
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

print("x_train shape:", X_train.shape)
print(X_train.shape[0], "train samples")
print(X_test.shape[0], "test samples")

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [9]:
train_batched = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(len(y_train)) # for testing on train set
test_batched = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(len(y_test))

## variables

In [10]:
global global_weights    

clientsoclist = [0]*num_users

start_time = 0
weight_count = 0

global_weights = initial_weights
weights_list = {}

lock = Lock()

## Socket initialization
### Set host address and port number

### Required socket functions

In [11]:
def send_msg(sock, msg):
    # prefix each message with a 4-byte length in network byte order
    msg = pickle.dumps(msg)
    l_send = len(msg)
    msg = struct.pack('>I', l_send) + msg
    sock.sendall(msg)
    return l_send

def recv_msg(sock):
    # read message length and unpack it into an integer
    raw_msglen = recvall(sock, 4)
    if not raw_msglen:
        return None
    msglen = struct.unpack('>I', raw_msglen)[0]
    # read the message data
    msg =  recvall(sock, msglen)
    msg = pickle.loads(msg)
    return msg

def recvall(sock, n):
    # helper function to receive n bytes or return None if EOF is hit
    data = b''
    while len(data) < n:
        packet = sock.recv(n - len(data))
        if not packet:
            return None
        data += packet
    return data

In [12]:
def average_weights(weights_list):
    avg_weights = list()
    for j in range(len(global_weights)):
        weights = [weights_list[k][j] for k in range(num_users)]
        layer_mean = tf.math.reduce_mean(weights, axis=0)
        avg_weights.append(layer_mean)
        
    return avg_weights

## Receive users for aggregation

In [13]:
def receive(userid, num_users, conn): #thread for receive clients
    global weight_count
    global global_weights
    global weights_list
    
    msg = {
        'rounds': rounds,
        'client_id': userid,
        'weight': global_weights
    }
    distribute = send_msg(conn, msg)    #send global weight
    r = recv_msg(conn)    # get weights from clients
    with lock:
        weights_list[userid] = r['weight']
        weight_count += 1
        
        if weight_count == num_users:
            global_weights = average_weights(weights_list)
            weight_count = 0

## Thread define

In [14]:
def run_thread(func, num_user):
    global clientsoclist
    global start_time
    
    thrs = []
    print("timer start!")
    start_time = time.time()    # store start time
    for i in range(num_user):
        conn, addr = s.accept()
        print('Conntected with', addr)
        # append client socket on list
        clientsoclist[i] = conn
        args = (i, num_user, conn)
        thread = Thread(target=func, args=args)
        thrs.append(thread)
        thread.start()
    for thread in thrs:
        thread.join()
    end_time = time.time()  # store end time
    print("TrainingTime: {} sec".format(end_time - start_time))

In [15]:
host_name = socket.gethostbyname(socket.gethostname())
# host_name = '172.31.2.147'
port_number = 12345
print(host_name)

172.31.26.96


In [16]:
print(host_name)

172.31.26.96


### Open the server socket

In [17]:
s = socket.socket()
s.bind((host_name, port_number))
s.listen(10)

## Comunication overhead

In [18]:
loss='categorical_crossentropy'
metrics = ['accuracy']
cce = tf.keras.losses.CategoricalCrossentropy()

result_per_lr = {}
start = time.time()

dir = ''
    
for lr in lrs:
    train_losses = []
    train_accs = []
    test_losses = []
    test_accs = []
    
    print('\nlearning rate: {}'.format(lr))
    
    for r in range(rounds):
        train_loss = 0
        train_acc = 0
        test_loss = 0
        test_acc = 0
        
        run_thread(receive, num_users)
        
        # save global model
        weights_path = './checkpoints/checkpoint_round_' + str(r)
        global_model.save_weights(weights_path)
    
        # evaluate current round
        start = time.time()
        global_model.set_weights(global_weights)
        global_model.save('saved_model/global_model')
        
        # test global model on full training set
        for (X,y) in train_batched:
            preds = global_model.predict(X)
            train_loss = cce(y, preds)
            train_acc = accuracy_score(tf.argmax(preds, axis=1), tf.argmax(y, axis=1))
            train_losses.append(train_loss.numpy())
            train_accs.append(train_acc)

        # test global model on testing set
        for(X, y) in test_batched:
            preds = global_model.predict(X)
            test_loss = cce(y, preds)
            test_acc = accuracy_score(tf.argmax(preds, axis=1), tf.argmax(y, axis=1))
            test_losses.append(test_loss.numpy())
            test_accs.append(test_acc)

        elapsed = (time.time() - start)

        print('comm_round: {}/{} | test_acc: {:.3%} | test_loss: {:.3} | train_acc: {:.3%} | train_loss: {:.3} | elapsed: {}'.format(r+1, rounds, test_acc, test_loss, train_acc, train_loss, timedelta(seconds=elapsed)))
        print('\n')
        
    result_per_lr[lr] = {
        'train_accs' : train_accs,
        'test_accs' : test_accs,
        'train_losses' : train_losses,
        'test_losses' : test_losses
                          }
    
    


learning rate: 0.1
timer start!
Conntected with ('172.31.26.96', 50192)
Conntected with ('172.31.26.96', 50202)
Conntected with ('172.31.26.38', 49742)
Conntected with ('172.31.26.38', 49756)
Conntected with ('172.31.26.38', 49766)
Conntected with ('172.31.26.38', 49782)
Conntected with ('172.31.23.109', 47954)
Conntected with ('172.31.23.109', 47970)
Conntected with ('172.31.23.109', 47974)
Conntected with ('172.31.23.109', 47976)
TrainingTime: 411.32035970687866 sec


2023-05-08 08:21:01.186509: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [60000,10]
	 [[{{node Placeholder/_1}}]]


  1/313 [..............................] - ETA: 25s

2023-05-08 08:21:17.912379: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [10000,10]
	 [[{{node Placeholder/_1}}]]


comm_round: 1/100 | test_acc: 57.920% | test_loss: 1.99 | train_acc: 57.238% | train_loss: 1.99 | elapsed: 0:00:20.574748


timer start!
Conntected with ('172.31.26.96', 57766)
Conntected with ('172.31.26.96', 57772)
Conntected with ('172.31.26.38', 58204)
Conntected with ('172.31.23.109', 42428)
Conntected with ('172.31.23.109', 48724)
Conntected with ('172.31.26.38', 36150)
Conntected with ('172.31.23.109', 60560)
Conntected with ('172.31.23.109', 33186)
Conntected with ('172.31.26.38', 54732)
Conntected with ('172.31.26.38', 54740)
TrainingTime: 130.95165634155273 sec




comm_round: 2/100 | test_acc: 72.690% | test_loss: 0.838 | train_acc: 72.485% | train_loss: 0.854 | elapsed: 0:00:19.547050


timer start!
Conntected with ('172.31.26.96', 49260)
Conntected with ('172.31.26.96', 34958)
Conntected with ('172.31.23.109', 39650)
Conntected with ('172.31.26.38', 42784)
Conntected with ('172.31.26.38', 36944)
Conntected with ('172.31.23.109', 45488)
Conntected with ('172.31.23.109', 36374)
Conntected with ('172.31.26.38', 33948)
Conntected with ('172.31.23.109', 40418)
Conntected with ('172.31.26.38', 52512)
TrainingTime: 130.48813605308533 sec




comm_round: 3/100 | test_acc: 87.660% | test_loss: 0.396 | train_acc: 87.010% | train_loss: 0.41 | elapsed: 0:00:19.738871


timer start!
Conntected with ('172.31.26.96', 38422)
Conntected with ('172.31.26.38', 50626)
Conntected with ('172.31.26.96', 40462)
Conntected with ('172.31.23.109', 35512)
Conntected with ('172.31.23.109', 45606)
Conntected with ('172.31.26.38', 52344)
Conntected with ('172.31.23.109', 56130)
Conntected with ('172.31.23.109', 56132)
Conntected with ('172.31.26.38', 47604)
Conntected with ('172.31.26.38', 44224)
TrainingTime: 136.54810976982117 sec




comm_round: 4/100 | test_acc: 91.910% | test_loss: 0.265 | train_acc: 91.338% | train_loss: 0.277 | elapsed: 0:00:19.798662


timer start!
Conntected with ('172.31.26.96', 37338)
Conntected with ('172.31.26.96', 47412)
Conntected with ('172.31.26.38', 40506)
Conntected with ('172.31.23.109', 52548)
Conntected with ('172.31.26.38', 44532)
Conntected with ('172.31.23.109', 41694)
Conntected with ('172.31.23.109', 41710)
Conntected with ('172.31.26.38', 55630)
Conntected with ('172.31.23.109', 55318)
Conntected with ('172.31.26.38', 48562)
TrainingTime: 138.73277473449707 sec




comm_round: 5/100 | test_acc: 94.230% | test_loss: 0.179 | train_acc: 94.122% | train_loss: 0.189 | elapsed: 0:00:20.006162


timer start!
Conntected with ('172.31.26.96', 45760)
Conntected with ('172.31.26.96', 45774)
Conntected with ('172.31.26.38', 36984)
Conntected with ('172.31.23.109', 36284)
Conntected with ('172.31.23.109', 40414)
Conntected with ('172.31.26.38', 48258)
Conntected with ('172.31.26.38', 48262)
Conntected with ('172.31.26.38', 49024)
Conntected with ('172.31.23.109', 37106)
Conntected with ('172.31.23.109', 37120)
TrainingTime: 129.24039912223816 sec




comm_round: 6/100 | test_acc: 95.960% | test_loss: 0.137 | train_acc: 95.812% | train_loss: 0.145 | elapsed: 0:00:20.067992


timer start!
Conntected with ('172.31.26.96', 44200)
Conntected with ('172.31.26.96', 44206)
Conntected with ('172.31.23.109', 59566)
Conntected with ('172.31.26.38', 51860)
Conntected with ('172.31.26.38', 35714)
Conntected with ('172.31.23.109', 50694)
Conntected with ('172.31.23.109', 37294)
Conntected with ('172.31.26.38', 42480)
Conntected with ('172.31.26.38', 42484)
Conntected with ('172.31.23.109', 37306)
TrainingTime: 129.65172028541565 sec




comm_round: 7/100 | test_acc: 95.840% | test_loss: 0.139 | train_acc: 95.623% | train_loss: 0.146 | elapsed: 0:00:19.666954


timer start!
Conntected with ('172.31.26.96', 51436)
Conntected with ('172.31.26.96', 51438)
Conntected with ('172.31.26.38', 39682)
Conntected with ('172.31.23.109', 36538)
Conntected with ('172.31.26.38', 48730)
Conntected with ('172.31.26.38', 48744)
Conntected with ('172.31.23.109', 33534)
Conntected with ('172.31.23.109', 33542)
Conntected with ('172.31.23.109', 33552)
Conntected with ('172.31.26.38', 41822)
TrainingTime: 126.32215309143066 sec




comm_round: 8/100 | test_acc: 97.080% | test_loss: 0.1 | train_acc: 96.852% | train_loss: 0.106 | elapsed: 0:00:19.545352


timer start!
Conntected with ('172.31.26.96', 46858)
Conntected with ('172.31.26.96', 52112)
Conntected with ('172.31.23.109', 54106)
Conntected with ('172.31.26.38', 33058)
Conntected with ('172.31.26.38', 33070)
Conntected with ('172.31.26.38', 45860)
Conntected with ('172.31.23.109', 52286)
Conntected with ('172.31.26.38', 57162)
Conntected with ('172.31.23.109', 52298)
Conntected with ('172.31.23.109', 52314)
TrainingTime: 140.76223015785217 sec




comm_round: 9/100 | test_acc: 96.480% | test_loss: 0.107 | train_acc: 96.442% | train_loss: 0.112 | elapsed: 0:00:19.778455


timer start!
Conntected with ('172.31.26.96', 42618)
Conntected with ('172.31.26.96', 55762)
Conntected with ('172.31.23.109', 35920)
Conntected with ('172.31.26.38', 52542)
Conntected with ('172.31.26.38', 52554)
Conntected with ('172.31.26.38', 55334)
Conntected with ('172.31.23.109', 45162)
Conntected with ('172.31.23.109', 52870)
Conntected with ('172.31.23.109', 38364)
Conntected with ('172.31.26.38', 59574)
TrainingTime: 132.7366271018982 sec




comm_round: 10/100 | test_acc: 97.280% | test_loss: 0.09 | train_acc: 97.115% | train_loss: 0.0952 | elapsed: 0:00:19.569041


timer start!
Conntected with ('172.31.26.96', 53852)
Conntected with ('172.31.26.96', 41610)
Conntected with ('172.31.26.38', 56886)
Conntected with ('172.31.23.109', 37512)
Conntected with ('172.31.26.38', 39186)
Conntected with ('172.31.26.38', 58648)
Conntected with ('172.31.23.109', 52514)
Conntected with ('172.31.23.109', 57838)
Conntected with ('172.31.26.38', 50152)
Conntected with ('172.31.23.109', 57842)
TrainingTime: 134.40747213363647 sec




comm_round: 11/100 | test_acc: 97.440% | test_loss: 0.079 | train_acc: 97.368% | train_loss: 0.084 | elapsed: 0:00:19.637222


timer start!
Conntected with ('172.31.26.96', 56708)
Conntected with ('172.31.26.96', 50392)
Conntected with ('172.31.23.109', 48818)
Conntected with ('172.31.26.38', 36532)
Conntected with ('172.31.23.109', 48820)
Conntected with ('172.31.26.38', 36534)
Conntected with ('172.31.26.38', 36538)
Conntected with ('172.31.23.109', 33028)
Conntected with ('172.31.26.38', 47430)
Conntected with ('172.31.23.109', 45478)
TrainingTime: 133.0425100326538 sec




comm_round: 12/100 | test_acc: 97.660% | test_loss: 0.0744 | train_acc: 97.555% | train_loss: 0.0781 | elapsed: 0:00:19.989146


timer start!
Conntected with ('172.31.26.96', 51258)
Conntected with ('172.31.26.96', 40590)
Conntected with ('172.31.23.109', 42168)
Conntected with ('172.31.23.109', 58274)
Conntected with ('172.31.26.38', 38828)
Conntected with ('172.31.26.38', 38842)
Conntected with ('172.31.26.38', 56520)
Conntected with ('172.31.26.38', 56524)
Conntected with ('172.31.23.109', 46444)
Conntected with ('172.31.23.109', 46448)
TrainingTime: 129.63949418067932 sec




comm_round: 13/100 | test_acc: 97.700% | test_loss: 0.0722 | train_acc: 97.585% | train_loss: 0.0752 | elapsed: 0:00:19.567651


timer start!
Conntected with ('172.31.26.96', 48156)
Conntected with ('172.31.26.96', 51380)
Conntected with ('172.31.23.109', 47574)
Conntected with ('172.31.26.38', 49206)
Conntected with ('172.31.26.38', 49208)
Conntected with ('172.31.23.109', 51326)
Conntected with ('172.31.26.38', 41208)
Conntected with ('172.31.23.109', 51336)
Conntected with ('172.31.26.38', 41220)
Conntected with ('172.31.23.109', 43416)
TrainingTime: 138.86871528625488 sec




comm_round: 14/100 | test_acc: 97.750% | test_loss: 0.0725 | train_acc: 97.495% | train_loss: 0.0754 | elapsed: 0:00:19.765812


timer start!
Conntected with ('172.31.26.96', 34910)
Conntected with ('172.31.26.96', 34926)
Conntected with ('172.31.26.38', 45692)
Conntected with ('172.31.23.109', 52220)
Conntected with ('172.31.23.109', 39134)
Conntected with ('172.31.26.38', 42268)
Conntected with ('172.31.26.38', 42282)
Conntected with ('172.31.23.109', 48092)
Conntected with ('172.31.26.38', 36868)
Conntected with ('172.31.23.109', 41864)
TrainingTime: 128.66335773468018 sec




comm_round: 15/100 | test_acc: 97.660% | test_loss: 0.0713 | train_acc: 97.392% | train_loss: 0.0775 | elapsed: 0:00:19.742828


timer start!
Conntected with ('172.31.26.96', 40048)
Conntected with ('172.31.26.96', 40060)
Conntected with ('172.31.23.109', 33028)
Conntected with ('172.31.26.38', 46374)
Conntected with ('172.31.26.38', 46388)
Conntected with ('172.31.23.109', 58080)
Conntected with ('172.31.26.38', 51920)
Conntected with ('172.31.26.38', 51922)
Conntected with ('172.31.23.109', 58090)
Conntected with ('172.31.23.109', 58100)
TrainingTime: 127.14950227737427 sec




comm_round: 16/100 | test_acc: 98.080% | test_loss: 0.0611 | train_acc: 97.913% | train_loss: 0.0633 | elapsed: 0:00:19.677063


timer start!
Conntected with ('172.31.26.96', 45596)
Conntected with ('172.31.26.96', 45604)
Conntected with ('172.31.26.38', 37068)
Conntected with ('172.31.23.109', 47818)
Conntected with ('172.31.23.109', 39902)
Conntected with ('172.31.23.109', 59540)
Conntected with ('172.31.26.38', 45254)
Conntected with ('172.31.23.109', 52234)
Conntected with ('172.31.26.38', 45260)
Conntected with ('172.31.26.38', 45268)
TrainingTime: 136.62696313858032 sec




comm_round: 17/100 | test_acc: 98.000% | test_loss: 0.0628 | train_acc: 97.807% | train_loss: 0.0646 | elapsed: 0:00:20.019844


timer start!
Conntected with ('172.31.26.96', 60214)
Conntected with ('172.31.26.96', 60894)
Conntected with ('172.31.26.38', 46496)
Conntected with ('172.31.23.109', 38404)
Conntected with ('172.31.26.38', 42068)
Conntected with ('172.31.23.109', 34802)
Conntected with ('172.31.23.109', 49906)
Conntected with ('172.31.23.109', 46364)
Conntected with ('172.31.26.38', 46406)
Conntected with ('172.31.26.38', 53576)
TrainingTime: 127.69984555244446 sec




comm_round: 18/100 | test_acc: 98.120% | test_loss: 0.0611 | train_acc: 97.965% | train_loss: 0.0623 | elapsed: 0:00:20.287091


timer start!
Conntected with ('172.31.26.96', 60270)
Conntected with ('172.31.26.96', 57334)
Conntected with ('172.31.26.38', 59796)
Conntected with ('172.31.26.38', 59804)
Conntected with ('172.31.23.109', 48404)
Conntected with ('172.31.23.109', 57610)
Conntected with ('172.31.26.38', 34488)
Conntected with ('172.31.23.109', 58650)
Conntected with ('172.31.23.109', 58662)
Conntected with ('172.31.26.38', 34504)
TrainingTime: 127.95372366905212 sec




comm_round: 19/100 | test_acc: 98.290% | test_loss: 0.0564 | train_acc: 98.018% | train_loss: 0.0585 | elapsed: 0:00:20.206012


timer start!
Conntected with ('172.31.26.96', 36834)
Conntected with ('172.31.26.96', 36838)
Conntected with ('172.31.23.109', 39032)
Conntected with ('172.31.23.109', 39042)
Conntected with ('172.31.26.38', 53306)
Conntected with ('172.31.26.38', 53310)
Conntected with ('172.31.23.109', 58904)
Conntected with ('172.31.26.38', 53432)
Conntected with ('172.31.23.109', 50442)
Conntected with ('172.31.26.38', 53436)
TrainingTime: 135.87059330940247 sec




comm_round: 20/100 | test_acc: 98.340% | test_loss: 0.0537 | train_acc: 98.163% | train_loss: 0.0546 | elapsed: 0:00:20.161236


timer start!
Conntected with ('172.31.26.96', 38278)
Conntected with ('172.31.26.96', 41640)
Conntected with ('172.31.23.109', 42856)
Conntected with ('172.31.26.38', 44182)
Conntected with ('172.31.23.109', 42862)
Conntected with ('172.31.23.109', 57266)
Conntected with ('172.31.26.38', 60182)
Conntected with ('172.31.26.38', 60194)
Conntected with ('172.31.23.109', 37074)
Conntected with ('172.31.26.38', 37676)
TrainingTime: 134.5206527709961 sec




comm_round: 21/100 | test_acc: 97.970% | test_loss: 0.0598 | train_acc: 97.885% | train_loss: 0.0618 | elapsed: 0:00:20.182167


timer start!
Conntected with ('172.31.26.96', 46610)
Conntected with ('172.31.26.96', 41062)
Conntected with ('172.31.23.109', 37344)
Conntected with ('172.31.26.38', 42166)
Conntected with ('172.31.26.38', 42170)
Conntected with ('172.31.23.109', 52416)
Conntected with ('172.31.23.109', 52430)
Conntected with ('172.31.26.38', 51218)
Conntected with ('172.31.23.109', 35334)
Conntected with ('172.31.26.38', 43414)
TrainingTime: 126.78040790557861 sec




comm_round: 22/100 | test_acc: 98.100% | test_loss: 0.0584 | train_acc: 97.985% | train_loss: 0.0596 | elapsed: 0:00:19.780733


timer start!
Conntected with ('172.31.26.96', 55304)
Conntected with ('172.31.26.96', 41464)
Conntected with ('172.31.23.109', 43614)
Conntected with ('172.31.23.109', 53482)
Conntected with ('172.31.26.38', 37470)
Conntected with ('172.31.26.38', 37472)
Conntected with ('172.31.23.109', 59248)
Conntected with ('172.31.26.38', 45808)
Conntected with ('172.31.26.38', 45810)
Conntected with ('172.31.23.109', 45358)
TrainingTime: 140.8441390991211 sec




comm_round: 23/100 | test_acc: 98.430% | test_loss: 0.0476 | train_acc: 98.378% | train_loss: 0.0466 | elapsed: 0:00:20.248866


timer start!
Conntected with ('172.31.26.96', 44340)
Conntected with ('172.31.26.96', 55548)
Conntected with ('172.31.26.38', 48582)
Conntected with ('172.31.23.109', 58888)
Conntected with ('172.31.23.109', 58894)
Conntected with ('172.31.26.38', 43790)
Conntected with ('172.31.23.109', 36728)
Conntected with ('172.31.23.109', 36744)
Conntected with ('172.31.26.38', 46696)
Conntected with ('172.31.26.38', 46700)
TrainingTime: 139.51449489593506 sec




comm_round: 24/100 | test_acc: 98.370% | test_loss: 0.0492 | train_acc: 98.293% | train_loss: 0.0497 | elapsed: 0:00:19.885100


timer start!
Conntected with ('172.31.26.96', 58482)
Conntected with ('172.31.26.96', 51580)
Conntected with ('172.31.26.38', 50840)
Conntected with ('172.31.23.109', 46756)
Conntected with ('172.31.23.109', 46770)
Conntected with ('172.31.26.38', 60248)
Conntected with ('172.31.26.38', 35576)
Conntected with ('172.31.26.38', 47498)
Conntected with ('172.31.23.109', 58554)
Conntected with ('172.31.23.109', 58570)
TrainingTime: 140.67708897590637 sec




comm_round: 25/100 | test_acc: 98.460% | test_loss: 0.0469 | train_acc: 98.363% | train_loss: 0.0474 | elapsed: 0:00:19.951712


timer start!
Conntected with ('172.31.26.96', 33864)
Conntected with ('172.31.26.96', 43584)
Conntected with ('172.31.23.109', 45574)
Conntected with ('172.31.26.38', 50772)
Conntected with ('172.31.26.38', 50774)
Conntected with ('172.31.23.109', 40422)
Conntected with ('172.31.23.109', 51624)
Conntected with ('172.31.26.38', 41084)
Conntected with ('172.31.26.38', 40624)
Conntected with ('172.31.23.109', 34164)
TrainingTime: 129.15554785728455 sec




comm_round: 26/100 | test_acc: 98.520% | test_loss: 0.0452 | train_acc: 98.433% | train_loss: 0.0447 | elapsed: 0:00:19.896803


timer start!
Conntected with ('172.31.26.96', 40276)
Conntected with ('172.31.26.96', 53296)
Conntected with ('172.31.23.109', 33008)
Conntected with ('172.31.26.38', 48630)
Conntected with ('172.31.26.38', 48644)
Conntected with ('172.31.23.109', 46772)
Conntected with ('172.31.23.109', 46776)
Conntected with ('172.31.26.38', 41784)
Conntected with ('172.31.26.38', 46776)
Conntected with ('172.31.23.109', 53334)
TrainingTime: 141.32919120788574 sec




comm_round: 27/100 | test_acc: 96.760% | test_loss: 0.0948 | train_acc: 96.662% | train_loss: 0.0988 | elapsed: 0:00:20.152180


timer start!
Conntected with ('172.31.26.96', 57694)
Conntected with ('172.31.26.96', 37016)
Conntected with ('172.31.23.109', 36890)
Conntected with ('172.31.26.38', 59818)
Conntected with ('172.31.26.38', 50526)
Conntected with ('172.31.23.109', 52356)
Conntected with ('172.31.23.109', 36802)
Conntected with ('172.31.23.109', 36810)
Conntected with ('172.31.26.38', 59620)
Conntected with ('172.31.26.38', 59632)
TrainingTime: 265.31303215026855 sec




comm_round: 28/100 | test_acc: 98.300% | test_loss: 0.0523 | train_acc: 98.205% | train_loss: 0.0524 | elapsed: 0:00:20.144928


timer start!
Conntected with ('172.31.26.96', 35770)
Conntected with ('172.31.26.96', 56938)
Conntected with ('172.31.23.109', 52570)
Conntected with ('172.31.23.109', 54426)
Conntected with ('172.31.23.109', 52528)
Conntected with ('172.31.23.109', 46542)
Conntected with ('172.31.26.38', 49352)
Conntected with ('172.31.26.38', 57474)
Conntected with ('172.31.26.38', 44176)
Conntected with ('172.31.26.38', 54710)
TrainingTime: 425.53310108184814 sec






Exception in thread Thread-295 (receive):
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.10/threading.py", line 953, in run
Exception in thread Thread-296 (receive):
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_12749/1254998431.py", line 11, in receive
  File "/tmp/ipykernel_12749/2362980962.py", line 6, in send_msg
    self.run()
  File "/usr/local/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_12749/1254998431.py", line 11, in receive
  File "/tmp/ipykernel_12749/2362980962.py", line 6, in send_msg
BrokenPipeError: [Errno 32] Broken pipe
BrokenPipeError: [Errno 32] Broken pipe


comm_round: 29/100 | test_acc: 98.330% | test_loss: 0.0533 | train_acc: 98.265% | train_loss: 0.051 | elapsed: 0:00:20.183615


timer start!
Conntected with ('172.31.26.96', 33860)
Conntected with ('172.31.26.96', 51932)
Conntected with ('172.31.23.109', 56038)
Conntected with ('172.31.23.109', 33754)
Conntected with ('172.31.26.38', 54076)
Conntected with ('172.31.26.38', 45106)
Conntected with ('172.31.23.109', 56580)
Conntected with ('172.31.26.38', 53944)
Conntected with ('172.31.26.38', 35646)
Conntected with ('172.31.23.109', 45834)


KeyboardInterrupt: 

In [None]:
with open(dir+'result_per_lr_{}_{}_{}_{}.pickle'.format(B,C,E, lr), 'wb') as handle:
    pickle.dump(result_per_lr, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# end_time = time.time()  # store end time
# print("TrainingTime: {} sec".format(end_time - start_time))

In [None]:
end_time = time.time()  # store end time
print("WorkingTime: {} sec".format(end_time - start_time))