In [31]:
import matplotlib.pyplot as plt
import argparse
import json
import os
import threading
import time
from random import random
import numpy as np
import requests
import tensorflow as tf

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Quiet tensorflow error messages

class NumpyEncoder(json.JSONEncoder): # inherits JSONEncoder 
    def default(self, o):
        if isinstance(o, np.ndarray):
            return o.tolist()
        return json.JSONEncoder.default(self, o)

class Client:
    def __init__(self, max_round: int, time_delay = 5, suppress=True, num_samples=600, client_id = 0, experiment = 1):
        """
        @params: 
            experiment : Desired data split type (1~4)
            max_round : the maximum number of rounds that should be trained (arbitrary integer)
            model : the NN model type (either 'ann' or 'cnn')
            time_delay : the time delay until the next local check (arbitrary positive integer) 
                        (Need to increase this value if one round of training takes much longer than current time_delay. 
                        The reason is that any network communication until next round after the client has already uploaded 
                        the parameters for current round increases network overhead. Thus, higher time_delay will make communication
                        more stable while increasing the absolute time it takes. Requires careful selection of this value.)
            suppress : boolean value to print the logs
        
        @return: 
            None : Initializes the variables
                   Setup the urls for communication
                   Fetch client's id from the server
                   Downloads MNIST dataset and splits
                   Build model
        """
        
        '''
        Urls
        '''
        
        self.base_url = "http://127.0.0.1:9103/" # Base Url
        self.put_weight_url =  self.base_url + f"put_weight/{client_id}"
        self.get_weight_url =  self.base_url + "get_server_weight" # Url that we send or fetch weight parameters
        self.round_url =  self.base_url + "get_server_round" 

        '''
        Initial setup
        '''
        self.experiment = experiment
        self.client_id = client_id
        self.time_delay = time_delay
        self.suppress = suppress
        self.global_round = self.request_global_round()
        self.current_round = 0
        self.max_round = max_round # Set the maximum number of rounds
        
        '''
        Downloads MNIST dataset and prepares (train_x, train_y), (test_x, test_y)
        '''
        self.train_images, self.train_labels = None, None
        self.test_images, self.test_labels = None, None
        self.prepare_images()
        
        self.train_index_list = None
        self.test_index_list = None
        self.split_train_images = []
        self.split_train_labels = []
        
        self.local_data_num = 0
        self.data_split(num_samples=num_samples)
        
        '''
        Builds model
        '''
        self.model = None
        self.build_cnn_model()
        
    def prepare_images(self):
        """
        return: 
            None : Prepares MNIST images in the required format for each model
            
        """
        mnist = tf.keras.datasets.mnist
        (self.train_images, self.train_labels), (self.test_images, self.test_labels) = mnist.load_data()
        self.train_images, self.test_images = self.train_images / 255, self.test_images / 255
        
        # For CNN, add dummy channel to feed the images to CNN
        self.train_images=self.train_images.reshape(-1,28, 28, 1)
        self.test_images=self.test_images.reshape(-1,28, 28, 1)
            
    
    def build_cnn_model(self):
        """
        @params: 
            None
        
        @return: 
            None : saves the CNN model in self.model variable 
        """
        #This model definition must be same in the server (Federated.py)
        self.model = tf.keras.models.Sequential([
            tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)),
            tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
            tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
            tf.keras.layers.Dropout(0.25),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dropout(0.5),
            tf.keras.layers.Dense(10, activation='softmax')
        ])

        self.model.compile(optimizer=tf.keras.optimizers.SGD(),
                      loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                      metrics=['accuracy'])
        
    def data_split(self, num_samples):
        """
        @params: 
            num_samples : The number of sample images in each client. This value is used for equally
                          sized dataset
        
        @return: 
            None : Split the dataset depending on the self.experiment value
           
                If self.experiment is 1: Uniform data split: We take equal amount of data from each class (iid)
                If self.experiment is 2: Random data split1: We take equal amount of data, but not uniformly distributed across classes
                If self.experiment is 3: Random data split2: We take different amount of data and not uniformly distributed across classes
                If self.experiment is 4: Skewed: We take disproportionate amount of data for some classes
                        
        """
        if self.train_index_list is None or self.test_index_list is None:
            self.train_index_list = [[], [], [], [], [], [], [], [], [], []]
            self.test_index_list = [[], [], [], [], [], [], [], [], [], []]
            for i, v in enumerate(self.train_labels):
                self.train_index_list[v].append(i)

            for i, v in enumerate(self.test_labels):
                self.test_index_list[v].append(i)

        
        self.split_train_images = []
        self.split_train_labels = []
        
        if self.experiment == 1: #uniform data split
            self.local_data_num = num_samples
            
            for i in range(len(self.train_index_list)):
                indices = self.train_index_list[i]
                random_indices = np.random.choice(indices, size=num_samples//10)
                
                self.split_train_images.extend(self.train_images[random_indices])
                self.split_train_labels.extend(self.train_labels[random_indices])
            

        elif self.experiment == 2: # Randomly selected, equally sized dataset
            self.local_data_num = num_samples
            random_indices = np.random.choice([i for i in range(len(self.train_labels))], size=num_samples)
            self.split_train_images = self.train_images[random_indices]
            self.split_train_labels = self.train_labels[random_indices]

            counts = [0 for _ in range(10)]
            
            for label in self.train_labels[random_indices]:
                counts[label] += 1
            
        elif self.experiment == 3: # Randomly selected, differently sized dataset
            n = np.random.randint(1, num_samples)
            self.local_data_num = n
            random_indices = np.random.choice([i for i in range(len(self.train_labels))], size=n)
            self.split_train_images = self.train_images[random_indices]
            self.split_train_labels = self.train_labels[random_indices]
            

            counts = [0 for _ in range(10)]
            
            for label in self.train_labels[random_indices]:
                counts[label] += 1
  
        elif self.experiment == 4: #Skewed
            temp = [i for i in range(10)]
            skewed_numbers = np.random.choice(temp, np.random.randint(1, 10))
            non_skewed_numbers = list(set(temp)-set(skewed_numbers))
            N = 0
            
            counts = [0 for _ in range(10)]
            
            for i in skewed_numbers:
                n = np.random.randint(50, 60)
                N += n
                
                indices = self.train_index_list[i]
                random_indices = np.random.choice(indices, size=n)
                
                self.split_train_images.extend(self.train_images[random_indices])
                self.split_train_labels.extend(self.train_labels[random_indices])
                
                counts[i] += n
            
                
            for i in non_skewed_numbers:
                n = np.random.randint(1, 10)
                N += n
                
                indices = self.train_index_list[i]
                random_indices = np.random.choice(indices, size=n)
                
                self.split_train_images.extend(self.train_images[random_indices])
                self.split_train_labels.extend(self.train_labels[random_indices])
                
                counts[i] += n
            
            self.local_data_num = N
        
        self.split_train_images = np.array(self.split_train_images)
        self.split_train_labels = np.array(self.split_train_labels)
        self.update_total_num_data(self.local_data_num)    

        
        
    def update_total_num_data(self, num_data):
        """
        num_data : the number of training images that the current client has
        
        update the total number of training images that is stored in the server
        """
        update_num_data_url =  self.base_url + f"update_num_data/{self.client_id}/{num_data}"
        requests.get(update_num_data_url)
        

    
    def request_global_round(self):
        """
        result : Current global round that the server is in
        """
        result = requests.get(self.round_url)
        result = result.json()
        return result
    
    def request_global_weight(self):
        """
        global_weight : Up-to-date version of the model parameters
        """
        result = requests.get(self.get_weight_url)
        result_data = result.json()
        
        global_weight = None
        if result_data is not None:
            global_weight = []
            for i in range(len(result_data)):
                temp = np.array(result_data[i], dtype=np.float32)
                global_weight.append(temp)
            
        return global_weight

    def upload_local_weight(self, local_weight):
        """
        local_weight : the local weight that current client has converged to
        
        Add current client's weights to the server (Server accumulates these from multiple clients and computes the global weight)
        """
        local_weight_to_json = json.dumps(local_weight, cls=NumpyEncoder)
        requests.put(self.put_weight_url, data=local_weight_to_json)
        
    def train_local_model(self):
        """
        local_weight : local weight of the current client after training
        """
        global_weight = self.request_global_weight()
        if global_weight != None:
            global_weight = np.array(global_weight)
            self.model.set_weights(global_weight)
        
        self.model.fit(self.split_train_images, self.split_train_labels, epochs=10, batch_size=16, verbose=0)
        local_weight = self.model.get_weights()
        return local_weight
    
    def task(self):
        """
        Federated learning task
        1. If the current round is larger than the max round that we set, finish
        2. If the global round = current client's round, the client needs update
        3. Otherwise, we need to wait until other clients to finish
        """
        
        #this is for executing on multiple devices
        self.global_round = self.request_global_round()

        if self.current_round >= self.max_round:
            print(f"Client {self.client_id} finished")
            return 

        if self.global_round == self.current_round: #need update 
            local_weight = self.train_local_model()
            self.upload_local_weight(local_weight)
            self.current_round += 1
            time.sleep(self.time_delay)
            return self.task()

        else: #need to wait until other clients finish
            time.sleep(self.time_delay * 2)
            return self.task()

# Fed learning on local computer 

In [33]:
clients = []

for i in range(5):
    client = Client(max_round =5, 
                    time_delay = 5, 
                    num_samples = 100,  
                    suppress=True, 
                    client_id = i, 
                    experiment = 1)
    clients.append(client) #retain references to the clients

for client in clients:
    t = threading.Thread(target=client.task)
    t.start()


Exception in thread Thread-10:
Traceback (most recent call last):
  File "C:\Users\david\miniconda3\envs\test_fl\lib\site-packages\urllib3\connection.py", line 174, in _new_conn
    conn = connection.create_connection(
  File "C:\Users\david\miniconda3\envs\test_fl\lib\site-packages\urllib3\util\connection.py", line 96, in create_connection
    raise err
  File "C:\Users\david\miniconda3\envs\test_fl\lib\site-packages\urllib3\util\connection.py", line 86, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [WinError 10061] 대상 컴퓨터에서 연결을 거부했으므로 연결하지 못했습니다

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\david\miniconda3\envs\test_fl\lib\site-packages\urllib3\connectionpool.py", line 699, in urlopen
    httplib_response = self._make_request(
  File "C:\Users\david\miniconda3\envs\test_fl\lib\site-packages\urllib3\connectionpool.py", line 394, in _make_request
    conn.request(method, url, **httplib_requ

Exception in thread Thread-8:
Traceback (most recent call last):
  File "C:\Users\david\miniconda3\envs\test_fl\lib\site-packages\urllib3\connection.py", line 174, in _new_conn
    conn = connection.create_connection(
  File "C:\Users\david\miniconda3\envs\test_fl\lib\site-packages\urllib3\util\connection.py", line 96, in create_connection
    raise err
  File "C:\Users\david\miniconda3\envs\test_fl\lib\site-packages\urllib3\util\connection.py", line 86, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [WinError 10061] 대상 컴퓨터에서 연결을 거부했으므로 연결하지 못했습니다

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\david\miniconda3\envs\test_fl\lib\site-packages\urllib3\connectionpool.py", line 699, in urlopen
    httplib_response = self._make_request(
  File "C:\Users\david\miniconda3\envs\test_fl\lib\site-packages\urllib3\connectionpool.py", line 394, in _make_request
    conn.request(method, url, **httplib_reque

In [30]:
for client in clients:
    client.kill()