# Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  
from sklearn.utils import shuffle 
import math
import scipy
import socket
import pickle

# Load, Shuffle and Normalize Local Data

In [None]:
data = pd.read_excel('mnist_client4.xlsx').values #4000 sample
    
# Shuffle data
data = shuffle(data, random_state=42)

num_data = data.shape[0]
num_col = data.shape[1]
num_class = len(np.unique(data[:,num_col-1]))

# Normalize input data (shift 1 unit and normalize between 0,1)
for ii in range(num_col-1): 
    data[:, ii] = ((data[:, ii] + 10) / 255) # max value for pixcels is 255

# Set Parameters

In [None]:
percent_train = 0.7 # 70% Train 30% Test
num_train = round(num_data * percent_train)
num_test = num_data - num_train

# Convert labels to one-hot encoding (necessary for multi-class classification) 
y_one_hot_train = np.zeros((num_train, num_class))
y_one_hot_train[np.arange(num_train), data[:num_train, num_col-1].astype(int)] = 1

y_one_hot_test = np.zeros((num_test, num_class))
y_one_hot_test[np.arange(num_test), data[num_train:, num_col-1].astype(int)] = 1

# Layers neurons
# n0 = data.shape[1]-1
# n1 = 35
# n2 = 25
# n3 = num_class  

n0 = data.shape[1]-1
n1 = 20
n2 = 15
n3 = num_class

# learning_rate1
learning_rate1 = 0.003
learning_rate2 = 0.002
learning_rate3 = 0.001 

# local epoch
epoch = 50   

# Regularization parameters 
lambda_reg =0.00001

# Activation Functions

In [None]:
ACTIVATION_FUNC = 'leaky_relu'
leaky_relu_alpha = 0.01
def activation_function(x,fun_name=ACTIVATION_FUNC):
    if(fun_name == 'relu'): 
        return np.maximum(0, x)
    elif(fun_name == 'logsig'): 
        return  1 /( 1 + (math.e)**(-1 * x))
    elif(fun_name == 'tansig'):
        return 2/(1+ (math.e)**(-2*x))-1
    elif(fun_name == 'leaky_relu'): 
        return np.where(x > 0, x, leaky_relu_alpha * x) 

def activation_function_derivative(x,fun_name=ACTIVATION_FUNC):
    if(fun_name == 'relu'): 
        return np.where(x > 0, 1, 0)
    elif(fun_name == 'logsig'): 
        logsig_x = activation_function(x)
        return logsig_x * (1 - logsig_x)
    elif(fun_name == 'tansig'):
        tansig_x = activation_function(x)
        return 1 - tansig_x**2
    elif(fun_name == 'leaky_relu'):
        return np.where(x > 0, 1, leaky_relu_alpha)

# Softmax Function

In [None]:
# Return the softmax output of a vector
# epsilon to avoid divide by zero encountered
epsilon = 0.000001
def softmax(z):
    exp_z = np.exp(z)
    sum = exp_z.sum() 
    softmax_z = np.round(exp_z/(sum +epsilon ),3)
    return softmax_z

# Local Train Section

In [None]:
def train_local_model(initials_parameters):
    mse_train = np.zeros(epoch)  
    w1 = initials_parameters["weights"][0]
    w2 = initials_parameters["weights"][1]
    w3 = initials_parameters["weights"][2]
    
    b1 = initials_parameters["bias"][0]
    b2 = initials_parameters["bias"][1]
    b3 = initials_parameters["bias"][2]
    
    # Initialize momentum parameters 
    beta = 0.9
    vw1 = np.zeros((n1, n0))
    vw2 = np.zeros((n2, n1))
    vw3 = np.zeros((n3, n2)) 

    vb1 = np.zeros((n1,1))
    vb2 = np.zeros((n2,1))
    vb3 = np.zeros((n3,1)) 

    for t in range(epoch): 
        error_data_train = np.zeros(num_train)
        output_data_train = np.zeros(num_train) 

        for i in range(num_train):
            
            # ******************************* feed-forward ******************************
            input_data = data[i, :num_col-1].reshape(-1,1)
            net1 = w1 @ input_data + b1
            o1 = activation_function(net1)
            net2 = w2 @ o1 + b2
            o2 = activation_function(net2)
            net3 = w3 @ o2 + b3  
            o3 = net3
            z = softmax(o3) 
            output_data_train[i] = np.argmax(z) 

            # ****************************** Backpropagation for a single sample ****************************** 
            # cross entropy error
            output_layer_error = -(y_one_hot_train[i:i+1] @ np.log10(z,where=z>0)).reshape(-1,1).flatten() # where=z>0 to avoid RuntimeWarning: divide by zero encountered in log10

            # update w3 with momentum
            w3_old = w3
            # dw3 = dE/dz*dz/do3*do3/dnet3*dnet3/dw3 = (z - target) * fprim_net3 * o2
            dw3 =  (z - y_one_hot_train[i:i+1].reshape(-1,1)) @ o2.reshape(1,-1) 
            vw3 = beta * vw3 + (1 - beta) * dw3
            w3 = w3 - learning_rate3 * vw3 - lambda_reg * w3

            # update b3 with momentum 
            # db3 = dE/dz*dz/do3*do3/dnet3*dnet3/db3 = (z - target) * fprim_net3 * 1 
            db3 = (z - y_one_hot_train[i:i+1].reshape(-1,1))
            vb3 = beta * vb3 + (1 - beta) * db3 
            b3 = b3 - learning_rate3 * vb3 - lambda_reg * b3

            # update w2 with momentum
            w2_old = w2
            # dw2 = dE/z*dz/do3*do3/dnet3*dnet3/do2*do2/dnet2*dnet2/dw2 = (z - target) * fprim_net3 * w3 * fprim_net2 * o1
            diag_fprim_net2 = np.diag(np.array(activation_function_derivative(net2)).flatten()) 
            dw2 = diag_fprim_net2 @ w3.T @ (z - y_one_hot_train[i:i+1].reshape(-1,1)) @  o1.reshape(1,-1) 
            vw2 = beta * vw2 + (1 - beta) * dw2 
            w2 = w2 - learning_rate2 * vw2 - lambda_reg * w2

            # update b2 with momentum 
            # db2 =  dE/z*dz/do3*do3/dnet3*dnet3/do2*do2/dnet2*dnet2/db2 = (z - target) * fprim_net3 * w3 * fprim_net2 * 1
            db2 = diag_fprim_net2 @ w3_old.T @ (z - y_one_hot_train[i:i+1].reshape(-1,1))
            vb2 = beta * vb2 + (1 - beta) * db2 
            b2 = b2 - learning_rate2 * vb2 - lambda_reg * b2

            # update w1 with momentum   
            diag_fprim_net1 = np.diag(np.array(activation_function_derivative(net1)).flatten()) 
            # dw1 = dE/dz*dz/do3*do3/dnet3*dnet3/do2*do2/dnet2*dnet2/do1*do1/dnet1*dnet1/dw1 = (z - target) * fprim_net3 * w3 * fprim_net2 * w2 * fprim_net1 * input_data
            dw1 = diag_fprim_net1 @ w2_old.T @ diag_fprim_net2 @ w3_old.T @ (z - y_one_hot_train[i:i+1].reshape(-1,1)) @ input_data.reshape(1,-1) 
            vw1 = beta * vw1 + (1 - beta) * dw1 
            w1 = w1 - learning_rate1 * vw1 - lambda_reg * w1

            # update b2 with momentum   
            # db1 = dE/dz*dz/do3*do3/dnet3*dnet3/do2*do2/dnet2*dnet2/do1*do1/dnet1*dnet1/db1 = (z - target) * fprim_net3 * w3 * fprim_net2 * w2 * fprim_net1 * 1
            db1 = diag_fprim_net1 @ w2_old.T @ diag_fprim_net2 @ w3_old.T @ (z - y_one_hot_train[i:i+1].reshape(-1,1)) 
            vb1 = beta * vb1 + (1 - beta) * db1
            b1 = b1 - learning_rate1 * vb1 - lambda_reg * b1

            error_data_train[i] = output_layer_error[0]

        mse_train[t] = np.mean(error_data_train ** 2)
        
        # Plotting the training output
        plt.figure(figsize=(20, 8))
        plt.subplot(2, 2, 1)
        plt.plot(data[:num_train, num_col-1], '-sr')
        plt.plot(output_data_train, '-*b')
        plt.xlabel('Train Data')
        plt.ylabel('Output')   

        # Plotting the training MSE
        plt.subplot(2, 2, 2)
        plt.semilogy(np.arange(1, t + 1), mse_train[:t])
        plt.xlabel('Epoch')
        plt.ylabel('MSE Train')

        plt.tight_layout()
        plt.show()
        
    
        print('Epoch: {} \t'.format(t+1))
        print('MSE_train: ',mse_train[t])
        print("\n\033[1;m" + "*" * 125)

    # Plotting the training Regression
    plt.figure(2)
    m_train , b_train = np.polyfit(data[:num_train, num_col - 1], output_data_train, 1)    
    plt.scatter(data[:num_train, num_col - 1], output_data_train,facecolors='none',edgecolors='#104E8B')
    plt.plot(data[:num_train, num_col - 1], m_train*data[:num_train, num_col - 1]+b_train,'r')
    plt.title('Regression Train') 
    mse_train_result = mse_train[-1]  
    
    plot_confusion_matrix(output_data_train,"Train Confusion Matrix")
    
    # train_accuracy
    train_accuracy = np.mean(output_data_train == data[:num_train,num_col-1] ) 
    print(f"Accuracy on the train set: {train_accuracy * 100}%")
    
    # return local updated parameters 
    return {"client_id":"C4" ,"weights":[w1,w2,w3],"bias":[b1,b2,b3],"num_samples":num_data, "Train_MSE":mse_train_result,'Train_Accuracy':train_accuracy}

# Test Global Model(Aggregated Model) On Client Local Data

In [None]:
def evaluate_aggregated_model(aggregated_parameters):
    w1 = aggregated_parameters["weights"][0]
    w2 = aggregated_parameters["weights"][1]
    w3 = aggregated_parameters["weights"][2]
    
    b1 = aggregated_parameters["bias"][0]
    b2 = aggregated_parameters["bias"][1]
    b3 = aggregated_parameters["bias"][2]
    
    current_server_round = aggregated_parameters["current_server_round"]
    
    error_data_test = np.zeros(num_test)
    output_data_test = np.zeros(num_test)
    
    for i in range(num_test):
        input_data = data[num_train + i, :num_col-1].reshape(-1,1) 
        net1 = w1 @ input_data + b1 
        o1 = activation_function(net1)
        net2 = w2 @ o1 + b2
        o2 = activation_function(net2)
        net3 = w3 @ o2 + b3 
        o3 = net3
        z = softmax(o3) 
        output_data_test[i] = np.argmax(z)
        error = -(y_one_hot_test[i:i+1] @ np.log10(z,where=z>0)).reshape(-1,1).flatten() 
        error_data_test[i] = error[0]
        
    mse_test[current_server_round] = np.mean(error_data_test ** 2)
    
    # Plotting the test output
    plt.figure(figsize=(20, 8))
    plt.subplot(2, 2, 1)    
    plt.plot(data[num_train:, num_col-1], '-sr')
    plt.plot(output_data_test, '-*b')
    plt.xlabel('Test Data')
    plt.ylabel('Output')
    
    # Plotting the test MSE
    plt.subplot(2, 2, 2)
    plt.semilogy(np.arange(1, current_server_round + 1), mse_test[:current_server_round])
    plt.xlabel('Epoch')
    plt.ylabel('MSE Test')

    # Plotting Regression Test
    plt.figure()
    m_test , b_test = np.polyfit(data[num_train:, num_col - 1], output_data_test, 1)  
    plt.scatter(data[num_train:, num_col - 1], output_data_test,facecolors='none',edgecolors='#104E8B')
    plt.plot(data[num_train:, num_col - 1], m_test*data[num_train:,num_col - 1]+b_test,'r')
    plt.title('Regression Test')
    
    plt.tight_layout()
    plt.show()
    
    plot_confusion_matrix(output_data_test,"Test Confusion Matrix")
    
    print('current_server_round: {} \t'.format(current_server_round+1))
    print('MSE_Test: ' ,mse_test[current_server_round])
    
    
    test_accuracy = np.mean(output_data_test == data[num_train:,num_col-1]) 
    print(f"Accuracy on the test set: {test_accuracy * 100}%") 
    
    return {"client_id":"C4" ,"Test_MSE": mse_test[current_server_round],'Test_Accuracy':test_accuracy}

# Plot Confusion Matrix

In [None]:
def plot_confusion_matrix(predicted_classes,title):
   # Assuming you have converted regression predictions to classes 
    actual_classes = data[:num_train,num_col-1]  
    
    # Create a confusion matrix-like matrix
    confusion_matrix = np.zeros((num_class, num_class))

    # Fill the confusion matrix
    for actual, predicted in zip(actual_classes, predicted_classes):
        confusion_matrix[actual.astype(int)][predicted.astype(int)] += 1 


    # Plot the confusion matrix
    plt.figure()
    plt.imshow(confusion_matrix, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(title)
    plt.colorbar()

    # Annotate the plot with numbers
    for i in range(num_class):
        for j in range(num_class):
            plt.text(j, i, str(int(confusion_matrix[i, j])), fontsize=12, ha='center', va='center')  # Corrected

    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.xticks(np.arange(num_class), np.arange(0, num_class))
    plt.yticks(np.arange(num_class), np.arange(0, num_class))

    plt.show()


# Client-Server Communication Section

In [None]:
# Receive data until fully received (For Huge data => chunk by chunk)
def receive_all(socket, length):
    data = b''
    while len(data) < length:
        packet = socket.recv(length - len(data))
        if not packet:
            return None
        data += packet
    return data

# Send data in chunks(For Huge data => chunk by chunk)
def send_all(socket, data):
    data_pickle = pickle.dumps(data)
    data_size = len(data_pickle)
    socket.sendall(data_size.to_bytes(4, 'big'))  # Send data size first

    sent = 0
    while sent < data_size:
        chunk = data_pickle[sent:sent+4096]  # Send in chunks
        socket.sendall(chunk)
        sent += len(chunk)

# Initialize client socket
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_address = ('localhost', 1111)
client_socket.connect(server_address)
print('Client socket initialized')

# Receive initial weights from server
initial_parameters_size = int.from_bytes(client_socket.recv(4), 'big')  # Receive data size first
initial_parameters_data = receive_all(client_socket, initial_parameters_size)
initial_parameters = pickle.loads(initial_parameters_data)
print('Received initial weights from server')


# initialize mse_test according to server_rounds (after each aggregation proccess evaluate global-model with local test-data)
server_rounds = initial_parameters["server_rounds"] 
mse_test = np.zeros(server_rounds)

while True: 
    # get server signal => ready: start new learning process on client local data |  terminate: federated-learning process terminate by server
    signal = pickle.loads(client_socket.recv(4096))
    print('Server Signal: ', signal)
    
    if 'terminate' in signal:
        print("Received termination signal. Terminating client.")
        break 

    print('Start Local Training')

    # start train model with initial parameters already recieved from server on local Data
    data_to_send = train_local_model(initial_parameters)
    data_to_send['num_samples'] = num_data
    
    print('End Local Training')
    
    # send trained parameters back to server to aggregate
    send_all(client_socket, data_to_send)

    print('Sent Updated Params To Server')
    
    # receive aggregated model from server
    aggregated_parameters_size = int.from_bytes(client_socket.recv(4), 'big')  # Receive data size first
    aggregated_parameters_data = receive_all(client_socket, aggregated_parameters_size)
    aggregated_parameters = pickle.loads(aggregated_parameters_data)
    
    print('Received Aggregated Model From Server')
    
    print('Start Evaluating Aggregated Model With Local Test Data') 
        
    # evaluate aggregated model with test-data
    test_result = evaluate_aggregated_model(aggregated_parameters)
    
    print('End Evaluating Aggregated Model With Local Test Data')
    
    send_all(client_socket, test_result)

    print('Sent Evaluated Aggregated Model With Local Test Data Results To Server')
    
    # set updated-params to inital-params of next federated-learning local training process on client  
    initial_parameters = aggregated_parameters

    print("\n\033[1;m" + "*" * 125)   
    
# Close connection
client_socket.close()
