# DEPENDABLE AND SECURE AI-ML (AI60006) 
# **Abhinav Bohra 18CS30049**
## Assignment 2

In [1]:
%%capture
%cd /content/
!pip install phe
!git clone https://abhinav-bohra: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@github.com/abhinav-bohra/Adversarial-Machine-Learning.git

In [2]:
%cd /content/Adversarial-Machine-Learning/A2
!git pull

/content/Adversarial-Machine-Learning/A2
Already up to date.


In [3]:
# Question 1
!python federated_learning_with_encryption.py

Loading data
Error (MSE) that each client gets on test set by training only on own local data:
Hospital 1:	3810.44
Hospital 2:	3982.58
Hospital 3:	3569.32
Hospital 4:	4144.15
Hospital 5:	3848.39
Running distributed gradient aggregation for 50 iterations
Error (MSE) that each client gets after running the protocol:
Hospital 1:	3775.50
Hospital 2:	3775.50
Hospital 3:	3775.50
Hospital 4:	3775.50
Hospital 5:	3775.50


Time Taken to compute federated learning process = 0.0 mins 51.8663330078125 secs


In [4]:
#-------------------------------------------------------------------------------
# Question 2 | PSEUDO CODE | LOGIC/SYSTEM DEISGN
#-------------------------------------------------------------------------------
# 1. Train SVM Classifier model on server using the encrypted public data
# 2. Encrypt the data on the client. 
# 3. Send the encrypted X_test to the server.
# 4. Use unencrypted model parameters for inference
# 5. Send model predictions back to the client
# 6. On the client, decrypt Y_pred and calculate accuracy.

In [5]:
# Question 2
import os
import math
import time
import numpy as np
import pandas as pd
import phe.encoding
from phe import paillier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

#-------------------------------------------------------------------------------------------------------------------------------------------------
# UTIL FUNCTIONS
#-------------------------------------------------------------------------------------------------------------------------------------------------
class ExampleEncodedNumber(phe.encoding.EncodedNumber):
    BASE = 64
    LOG2_BASE = math.log(BASE, 2)

def encrypt_vector(vec, public_key):
  # encoded_vector = [ExampleEncodedNumber.encode(public_key, v) for v in vec]
  # encrypted_vector = [public_key.encrypt(ev) for ev in encoded_vector]
  encrypted_vector = [public_key.encrypt(ev) for ev in vec]
  return encrypted_vector

def decrypt_vector(vec, private_key):
  # decoded_vector = [private_key.decrypt_encoded(x, ExampleEncodedNumber) for x in vec]
  # decrypted_vector = [d.decode() for d in decoded_vector]
  decrypted_vector = [private_key.decrypt(x) for x in vec]
  return decrypted_vector

def load_data(input_file, target_feature):
  # Load data and separate the features and labels
  data = pd.read_csv(input_file).dropna()
  X = data.drop(target_feature, axis=1).values.tolist()
  y = data[target_feature].values.tolist()
  return X, y
  
#-------------------------------------------------------------------------------------------------------------------------------------------------
# CLIENT CLASS
#-------------------------------------------------------------------------------------------------------------------------------------------------
class Client:

    def __init__(self, key_length):
        # Generate the public and private keys for Paillier encryption
        public_key, private_key = paillier.generate_paillier_keypair(n_length=key_length)
        self.public_key, self.private_key = public_key, private_key

    def encrypt_data(self, input_file, target_feature):        
        # Encrypt the testing data and labels
        self.X_test, self.y_test = load_data(input_file, target_feature)
        self.X_test_encrypted = [encrypt_vector(x, self.public_key) for x in self.X_test]
        self.y_test_encrypted = encrypt_vector(self.y_test, self.public_key)
        return self.X_test_encrypted, self.y_test_encrypted
    
    def eval(self, encrypted_predictions):
        logits = decrypt_vector(encrypted_predictions, self.private_key)
        y_pred = [1 if l>0 else 0 for l in logits]
        test_accuracy = accuracy_score(self.y_test, y_pred)
        return test_accuracy

#-------------------------------------------------------------------------------------------------------------------------------------------------
# SERVER CLASS
#-------------------------------------------------------------------------------------------------------------------------------------------------

class Server:

    def __init__(self, input_file, target_feature):
        self.model = None
        self.X_train, self.y_train = load_data(input_file, target_feature)
        
    def train_model(self,hyperparams):
        # Train the SVM model
        svm_model = SVC(kernel=hyperparams['kernel'], C=hyperparams['C'], gamma=hyperparams['gamma'])
        svm_model.fit(self.X_train, self.y_train)
        self.model = svm_model

    def predict(self, X_test_encrypted):
        # Make predictions on the encrypted test data using the trained model
        encrypted_logits = list()
        w = self.model.coef_[0]
        b = self.model.intercept_[0]
        for x in X_test_encrypted:
          score = b
          for i in range(len(x)):
            score += x[i]*w[i]
          encrypted_logits.append(score)
        return encrypted_logits

#-------------------------------------------------------------------------------------------------------------------------------------------------
# DRIVER CODE
#-------------------------------------------------------------------------------------------------------------------------------------------------
start_time = time.time()

# Parameters
key_length = 1024
target_feature = "Outcome"
hyperparameters = {'kernel':'linear', 'C':1, 'gamma':'auto'}

# Instantiate Server
server = Server("server/train.csv", target_feature)
# Train SVM Classifier
svm_model = server.train_model(hyperparameters)

# Instantiate Client
client = Client(key_length)
# Encrypt data on client machine
X_test_encrypted, y_test_encrypted = client.encrypt_data("client/test.csv", target_feature)

# Send encrypted data to server for inference
encrypted_preds = server.predict(X_test_encrypted)

# Send encrypted predictions back to client and evaluate accuracy
test_accuracy = client.eval(encrypted_preds)
print(f'\n\nTest accuracy for Privacy-preserving SVM Model is {round(test_accuracy, 2)}')

end_time = time.time()
time_taken = (end_time - start_time)
print(f'\n\nTotal Time Taken: = {time_taken//60} mins {round(time_taken%60,2)} secs')



Test accuracy for Privacy-preserving SVM Model is 0.75


Total Time Taken: = 0.0 mins 33.56 secs


## Verifying Results


In [8]:
#------------------------------------------------------------------------------------------------------------------------------
# SVM CLASSIFIER WITH UNENCRYPTED DATA AND UNENCRYPTED MODEL
#------------------------------------------------------------------------------------------------------------------------------
%cd /content/Adversarial-Machine-Learning/A2
import os
import math
import time
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

def load_data(input_file, target_feature):
  data = pd.read_csv(input_file).dropna()
  X = data.drop(target_feature, axis=1).values.tolist()
  y = data[target_feature].values.tolist()
  return X, y

start_time = time.time()
target_feature = "Outcome"
hyperparameters = {'kernel':'linear', 'C':1, 'gamma':'auto'}

X_train, y_train = load_data("server/train.csv", target_feature)
X_test, y_test = load_data("client/test.csv", target_feature)

svm_model = SVC(kernel=hyperparameters['kernel'], C=hyperparameters['C'], gamma=hyperparameters['gamma'])
svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)

print(f'\n\nTest accuracy for Normal SVM Model is {round(test_accuracy, 2)}')
end_time = time.time()
time_taken = (end_time - start_time)
print(f'\n\nTotal Time Taken: = {time_taken//60} mins {round(time_taken%60,2)} secs')

/content/Adversarial-Machine-Learning/A2


Test accuracy for Normal SVM Model is 0.75


Total Time Taken: = 0.0 mins 3.31 secs


In [9]:
#!/usr/bin/env python3.4
import math
import phe.encoding
from phe import paillier

class ExampleEncodedNumber(phe.encoding.EncodedNumber):
    BASE = 64
    LOG2_BASE = math.log(BASE, 2)

public_key, private_key = paillier.generate_paillier_keypair()

a=10.5
b=20

encoded_a = ExampleEncodedNumber.encode(public_key, a)
encrypted_a = public_key.encrypt(encoded_a)

encoded_b = ExampleEncodedNumber.encode(public_key, b)
encrypted_b = public_key.encrypt(encoded_b)

x = 2*encrypted_a + 3*encrypted_b*encrypted_b
xd = private_key.decrypt_encoded(x, ExampleEncodedNumber)
print("Decrypted: {}".format(xd.decode()))

NotImplementedError: ignored

Therefore, Paillier Partially Homomorphic Encryption supports addition and multiplication of an encrypted number by a scalar (constant), but it does not support multiplication of two encrypted numbers directly.

# Push Updates

In [6]:
%cd /content/Adversarial-Machine-Learning/
!git config --global user.email "abhinavbohra@iitkgp.ac.in"
!git config --global user.name "abhinav-bohra"
!git add .
!git status

/content/Adversarial-Machine-Learning
On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean


In [7]:
!git commit -m "added results"
!git push

On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean
Everything up-to-date
