In [None]:
pip install tensorflow-privacy

In [None]:
pip install phe

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting phe
  Downloading phe-1.5.0-py2.py3-none-any.whl (53 kB)
[K     |████████████████████████████████| 53 kB 1.8 MB/s 
[?25hInstalling collected packages: phe
Successfully installed phe-1.5.0


In [None]:
# Import Dependencies
import numpy as np
from sklearn.datasets import load_diabetes
import phe as paillier
import random

In [None]:
# Dataset Description
diabetes = load_diabetes()
print(diabetes['DESCR'])

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - age     age in years
      - sex
      - bmi     body mass index
      - bp      average blood pressure
      - s1      tc, total serum cholesterol
      - s2      ldl, low-density lipoproteins
      - s3      hdl, high-density lipoproteins
      - s4      tch, total cholesterol / HDL
      - s5      ltg, possibly log of serum triglycerides level
      - s6      glu, blood sugar level

Note: Each of these 1

In [None]:
# Function to Get the Data
def get_data(n_clients):
    """
    Import the dataset via sklearn, shuffle and split train/test.
    Return training, target lists for `n_clients` and a holdout test set
    """
    print("Loading data")
    diabetes = load_diabetes()
    y = diabetes.target
    X = diabetes.data

    # Adding constant to emulate intercept
    X = np.c_[X, np.ones(X.shape[0])]

    
    # Shuffle
    perm = np.random.permutation(X.shape[0])
    X, y = X[perm, :], y[perm]

    # Select test at random
    test_size = 50
    test_idx = np.random.choice(X.shape[0], size=test_size, replace=False)
    train_idx = np.ones(X.shape[0], dtype=bool)
    train_idx[test_idx] = False
    X_test, y_test = X[test_idx, :], y[test_idx]
    X_train, y_train = X[train_idx, :], y[train_idx]

    # Spliting train among multiple clients.
    X, y = [], []
    step = int(X_train.shape[0] / n_clients)
    for c in range(n_clients):
        X.append(X_train[step * c: step * (c + 1), :])
        y.append(y_train[step * c: step * (c + 1)])

    return X, y, X_test, y_test

In [None]:
# MSE for Linear Regression
def mean_square_error(y_pred, y):
    """ 1/m * \sum_{i=1..m} (y_pred_i - y_i)^2 """
    return np.mean((y - y_pred) ** 2)

In [None]:
# Function to take in the Public Key and Encrypt the Data
def encrypt_vector(public_key, x):
    return [public_key.encrypt(i) for i in x]

In [None]:
# Function to take in the Private key and Decrypt the Data
def decrypt_vector(private_key, x):
    return np.array([private_key.decrypt(i) for i in x])

In [None]:
# Function to Sum up the Encrypted Values
def sum_encrypted_vectors(x, y):
    if len(x) != len(y):
        raise ValueError('Encrypted vectors must have the same size')
    return [x[i] + y[i] for i in range(len(x))]

SERVER

In [None]:
class Server:
    """Private key holder. Decrypts the average gradient"""

    def __init__(self, key_length):
        keypair = paillier.generate_paillier_keypair(n_length=key_length)
        self.pubkey, self.privkey = keypair

    def decrypt_aggregate(self, input_model, n_clients):
        return decrypt_vector(self.privkey, input_model) / n_clients

CLIENT

In [None]:
class Client:
    """Runs linear regression with local data or by gradient steps,
    where gradient can be passed in.
    Using public key can encrypt locally computed gradients.
    """

    def __init__(self, name, X, y, pubkey):
        self.name = name
        self.pubkey = pubkey
        self.X, self.y = X, y
        self.weights = np.zeros(X.shape[1])

    def fit(self, n_iter, eta=0.01):
        """Linear regression for n_iter"""
        for _ in range(n_iter):
            gradient = self.compute_gradient()
            self.gradient_step(gradient, eta)

    def gradient_step(self, gradient, eta=0.01):
        """Updating the model with the given gradient"""
        self.weights -= eta * gradient

    def compute_gradient(self):
        """Computing the gradient of the current model using the training set
        """
        delta = self.predict(self.X) - self.y
        return delta.dot(self.X);
        #return delta.dot(self.X) + random.random();

    def predict(self, X):
        return X.dot(self.weights)

    def encrypted_gradient(self, sum_to=None):
        """Computing and encrypt gradient."""
        gradient = self.compute_gradient()
        encrypted_gradient = encrypt_vector(self.pubkey,gradient)
        if sum_to is not None:
            return sum_encrypted_vectors(sum_to, encrypted_gradient)
        else:
            return encrypted_gradient

FEDERATED LEARNING

In [None]:
def federated_learning(n_iter, eta, n_clients, key_length):
    names = ['Hospital {}'.format(i) for i in range(1, n_clients + 1)]

    X, y, X_test, y_test = get_data(n_clients=n_clients)

    # Instantiating the server and generating private and public keys
    server = Server(key_length=key_length)

    # Instantiating the clients.
    clients = []
    for i in range(n_clients):
        clients.append(Client(names[i], X[i], y[i], server.pubkey))
    print('Error (MSE) that each client gets on test set by '
          'training only on own local data:')
    for c in clients:
        c.fit(n_iter, eta)
        y_pred = c.predict(X_test)
        mse = mean_square_error(y_pred, y_test)
        print('{:s}:\t{:.2f}'.format(c.name, mse))

    # The federated learning with gradient descent
    print('Running distributed gradient aggregation for {:d} iterations'
          .format(n_iter))
    for i in range(n_iter):

        # Computing gradients, encrypting and aggregating
        encrypt_aggr = clients[0].encrypted_gradient(sum_to=None)
        for c in clients:
            encrypt_aggr = c.encrypted_gradient(sum_to=encrypt_aggr)

        # Sending aggregate to server and decrypt it
        aggr = server.decrypt_aggregate(encrypt_aggr, n_clients)

        # Take gradient steps
        for c in clients:
            c.gradient_step(aggr, eta)

    print('Error (MSE) that each client gets after running the protocol:')
    for c in clients:
        y_pred = c.predict(X_test)
        mse = mean_square_error(y_pred, y_test)
        print('{:s}:\t{:.2f}'.format(c.name, mse))

In [None]:
federated_learning(n_iter=100, eta=0.01, n_clients=3, key_length=1024)

Loading data
Error (MSE) that each client gets on test set by training only on own local data:
Hospital 1:	2992.43
Hospital 2:	2906.90
Hospital 3:	3158.22
Running distributed gradient aggregation for 100 iterations
Error (MSE) that each client gets after running the protocol:
Hospital 1:	2606.76
Hospital 2:	2594.13
Hospital 3:	2679.82
