In [1]:
import os
import math
import time
import numpy as np
import pandas as pd
import phe.encoding
from phe import paillier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

  from pandas.core import (


In [2]:
class ExampleEncodedNumber(phe.encoding.EncodedNumber):
    BASE = 64
    LOG2_BASE = math.log(BASE, 2)

In [3]:
def encrypt_vector(vec, public_key):
    
    try:
      
        encoded_vector = [public_key.encode(v) for v in vec]
        
        # Encryption step
        encrypted_vector = [public_key.encrypt(ev) for ev in encoded_vector]
    except AttributeError:
        # If encoding is not required, proceed directly to encryption
        encrypted_vector = [public_key.encrypt(v) for v in vec]
    
    return encrypted_vector

In [4]:
def decrypt_vector(vec, private_key):
  # decoded_vector = [private_key.decrypt_encoded(x, ExampleEncodedNumber) for x in vec]
  # decrypted_vector = [d.decode() for d in decoded_vector]
    decrypted_vector = [private_key.decrypt(x) for x in vec]
    return decrypted_vector




In [5]:
def load_data(input_file, target_feature):
  # Load data and separate the features and labelsz
  data = pd.read_csv(input_file).dropna()
  X = data.drop(target_feature, axis=1).values.tolist()
  y = data[target_feature].values.tolist()
  return X, y

In [6]:
class Client:

    def __init__(self, key_length):
        # Generate the public and private keys for Paillier encryption
        public_key, private_key = paillier.generate_paillier_keypair(n_length=key_length)
        self.public_key, self.private_key = public_key, private_key

    def encrypt_data(self, input_file, target_feature):        
        # Encrypt the testing data and labels
        self.X_test, self.y_test = load_data(input_file, target_feature)
        self.X_test_encrypted = [encrypt_vector(x, self.public_key) for x in self.X_test]
        self.y_test_encrypted = encrypt_vector(self.y_test, self.public_key)
        return self.X_test_encrypted, self.y_test_encrypted
    
    def eval(self, encrypted_predictions):
        logits = decrypt_vector(encrypted_predictions, self.private_key)
        y_pred = [1 if l>0 else 0 for l in logits]
        test_accuracy = accuracy_score(self.y_test, y_pred)
        return test_accuracy

In [7]:
class Server:

    def __init__(self, input_file, target_feature):
        self.model = None
        self.X_train, self.y_train = load_data(input_file, target_feature)
        
    def train_model(self,hyperparams):
        # Train the SVM model
        svm_model = SVC(kernel=hyperparams['kernel'], C=hyperparams['C'], gamma=hyperparams['gamma'])
        svm_model.fit(self.X_train, self.y_train)
        self.model = svm_model

    def predict(self, X_test_encrypted):
        # Make predictions on the encrypted test data using the trained model
        encrypted_logits = list()
        w = self.model.coef_[0]
        b = self.model.intercept_[0]
        for x in X_test_encrypted:
          score = b
          for i in range(len(x)):
            score += x[i]*w[i]
          encrypted_logits.append(score)
        return encrypted_logits

In [8]:
start_time = time.time()

# Parameters
key_length = 1024
target_feature = "Outcome"
hyperparameters = {'kernel':'linear', 'C':1, 'gamma':'auto'}

# Instantiate Server
server = Server(r"https://api.csvgetter.com/vS230MWnrcErZ0148aNZ", target_feature)
# Train SVM Classifier
svm_model = server.train_model(hyperparameters)

# Instantiate Client
client = Client(key_length)
# Encrypt data on client machine
X_test_encrypted, y_test_encrypted = client.encrypt_data(r"https://api.csvgetter.com/vS230MWnrcErZ0148aNZ", target_feature)

# Send encrypted data to server for inference
encrypted_preds = server.predict(X_test_encrypted)

# Send encrypted predictions back to client and evaluate accuracy
test_accuracy = client.eval(encrypted_preds)
print(f'\n\nTest accuracy for Privacy-preserving SVM Model is {round(test_accuracy, 2)}')

end_time = time.time()
time_taken = (end_time - start_time)
print(f'\n\nTotal Time Taken: = {time_taken//60} mins {round(time_taken%60,2)} secs')



Test accuracy for Privacy-preserving SVM Model is 0.82


Total Time Taken: = 0.0 mins 33.36 secs


 SVM CLASSIFIER WITH UNENCRYPTED DATA AND UNENCRYPTED MODEL