In [125]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.utils import shuffle

class FedLearning(object):
    
    #Loading iris data and shuffling for spliting data to clients
    def __init__(self, obj_data):
        self.data = obj_data        
        df = pd.DataFrame(data = self.data.data, columns = self.data.feature_names)
        df['target'] = self.data.target
        #df['target'] = df['target'].map({0:"setosa", 1:"versicolor", 2:"virginica"})
        df = shuffle(df)
        self.x_data = df.iloc[:, :-1]
        self.y_data = df.iloc[:, [-1]]
    
    #Spliting data by the number of clients
    #Spliting training(80%) and test data(20%) of each client
    def client_data(self, num):
        client = [];client_ans = [];client_test =[];client_ans_test=[]
        split_loc = len(self.x_data)//num
        for i in range(num):
            temp_x = self.x_data.iloc[split_loc*i:split_loc*(i+1), :]
            temp_y = self.y_data.iloc[split_loc*i:split_loc*(i+1), :]
            client.append(temp_x.iloc[:int(len(temp_x)*0.8), :])
            client_ans.append(temp_y.iloc[:int(len(temp_y)*0.8), :])
            client_test.append(temp_x.iloc[int(len(temp_x)*0.8):, :])
            client_ans_test.append(temp_y.iloc[int(len(temp_y)*0.8):, :])
        return client, client_ans, client_test, client_ans_test
    
    #initialize weight of model
    def weight_ini(self):
        #.shape[1] is the number of columns
        #.shape[0] is the number of rows
        #n_features = self.x_data.shape[1]
        #w = np.ones((3,n_features)).T * 0.1
        w = np.array([[0.1,0.2,0.3],
                      [0.1,0.2,0.3],
                      [0.1,0.2,0.3],
                      [0.1,0.2,0.3]])
        b = np.ones((3,1))                 
        return w, b
    
    #Average weights and bias learned using client data
    #client_weight and client_bias are array structure
    def server_aggre(self, client_weight, client_bias):
        Avg_weight = np.mean(client_weight, axis = 0)
        Avg_bias = np.mean(client_bias)
        return Avg_weight, Avg_bias
    
    def client(self, weight, bias, client, client_ans, iteration, learning_rate):
        #calculate client data w.r.t current weight and bias
        for i in range(iteration):
            result = np.matmul(client, weight) #+ bias
            
            #making probability using the above result
            prob_result = self.softmax(result)
            prob_result = pd.DataFrame.to_numpy(prob_result.T)
            
            #making target as numerical data using one hot encoding
            client_ans = pd.DataFrame.to_numpy(client_ans)
            target = self.one_hot_encoding(client_ans)
            
            #calculate loss value using cross entropy
            loss = self.cross_entropy(target, prob_result)
            
            #update weight and bias using gradient descent
            gradient = self.cost_gradient(target, prob_result, client)
            weight = weight - learning_rate * gradient
            
            prob_result = pd.DataFrame(prob_result)
            client_ans = pd.DataFrame(client_ans)
            
            print(weight, loss)
        
        return weight, loss
        
    #I use the multinomial LR model instead of binomial LR
    #becasuse the dataset has three classes(multi-class)
    #change loss func(log loss to cross-entropy loss)
    #and change the sigmoid func to softmax func
    def softmax(self, result):
        final_result = np.exp(result).T / np.sum(np.exp(result), axis = 1).T
        return final_result
    
    #One hot encoding converts the categorical data into a numerical form
    def one_hot_encoding(self, y_data):
        OneHotEncoding = []
        encoding = []
        for i in range(len(y_data)):
            if(y_data[i] == 0):
                encoding = np.array([1,0,0]) #class 1(setosa)
            elif(y_data[i] == 1):
                encoding = np.array([0,1,0]) #class 2(versicolor)
            elif(y_data[i] == 2):
                encoding = np.array([0,0,1]) #class 3(virginica)
            OneHotEncoding.append(encoding)
        return OneHotEncoding
    
    #use cross entropy for calculating loss value
    def cross_entropy(self, y_data, y_pred):
        loss = -np.sum(y_data * np.log(y_pred + 10**-100)) / len(y_data)
        return loss
    
    #use gradient descent for updating weight and bias
    def cost_gradient(self, y_data, y_pred, X):
        grad = -(np.dot(X.T, (y_data-y_pred))) / len(X)
        return grad

    

iris = load_iris()
FL = FedLearning(iris)
(client, client_ans, client_test, client_ans_test) = FL.client_data(3)
print(client[0], client_ans[0], client_test[0], client_ans_test[0])
print(len(client[2]), len(client_ans[2]), len(client_test[2]), len(client_ans_test[2]))
(w,b) = FL.weight_ini()
#print(client_ans)
FL.client(w, b, client[0], client_ans[0], 10, 0.005)

'''
LR = LogisticRegression()
LR.fit(x_data, y_data.values.ravel())
y_pred = LR.predict(x_data)
print("LR's Accuracy is", accuracy_score(y_data, y_pred))
coef = LR.coef_
intercept = LR.intercept_
print(coef, intercept)
   
'''

     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
90                 5.5               2.6                4.4               1.2
109                7.2               3.6                6.1               2.5
27                 5.2               3.5                1.5               0.2
18                 5.7               3.8                1.7               0.3
102                7.1               3.0                5.9               2.1
59                 5.2               2.7                3.9               1.4
64                 5.6               2.9                3.6               1.3
91                 6.1               3.0                4.6               1.4
86                 6.7               3.1                4.7               1.5
131                7.9               3.8                6.4               2.0
104                6.5               3.0                5.8               2.2
45                 4.8               3.0                1.4     

'\nLR = LogisticRegression()\nLR.fit(x_data, y_data.values.ravel())\ny_pred = LR.predict(x_data)\nprint("LR\'s Accuracy is", accuracy_score(y_data, y_pred))\ncoef = LR.coef_\nintercept = LR.intercept_\nprint(coef, intercept)\n   \n'