Below is the list of required files that need to uploaded for this script to run:

- `lr_utils.py`
- `utils.py`
-`diabetes_binary_5050split_health_indicators_BRFSS2015.csv`


In [1]:
pip install tenseal

Collecting tenseal
  Downloading tenseal-0.3.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tenseal
Successfully installed tenseal-0.3.14


In [2]:
import torch
import tenseal as ts
import pandas as pd
import random
from time import time
import numpy as np

from utils import load_diabetes_data_5050, print_metrics
from lr_utils import LR, train, evaluate_model

In [3]:
#Load BRFSS dataset with 50/50 split
x_train, x_test, y_train, y_test = load_diabetes_data_5050()

##Training LR Model

In [4]:
#Define training parameters
n_features = x_train.shape[1]
model = LR(n_features)
optim = torch.optim.SGD(model.parameters(), lr=1)
criterion = torch.nn.BCELoss()

In [5]:
model = train(model, optim, criterion, x_train, y_train)

Epoch: 1 	Training Loss: 0.751095
Epoch: 2 	Training Loss: 0.574077
Epoch: 3 	Training Loss: 0.550781
Epoch: 4 	Training Loss: 0.539773
Epoch: 5 	Training Loss: 0.532945
Epoch: 6 	Training Loss: 0.528282
Epoch: 7 	Training Loss: 0.524941
Epoch: 8 	Training Loss: 0.522473
Epoch: 9 	Training Loss: 0.520607
Epoch: 10 	Training Loss: 0.519172


In [6]:
# Evaluate the model
accuracy, precision, recall, f1, confusion  = evaluate_model(model, x_test, y_test)

print_metrics(accuracy, precision, recall, f1, confusion)

Accuracy: 0.7479
Precision: 0.7400
Recall: 0.7656
F1 Score: 0.7526
Confusion Matrix:
 [[5153 1905]
 [1660 5421]]


##Encrypted Evaluation

In [7]:
class EncryptedLR:

    def __init__(self, torch_lr):
        self.weight = torch_lr.lr.weight.data.tolist()[0]
        self.bias = torch_lr.lr.bias.data.tolist()

    def forward(self, enc_x):
        enc_out = enc_x.dot(self.weight) + self.bias
        return enc_out

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

eelr = EncryptedLR(model)

In [8]:
# parameters
poly_mod_degree = 8192
coeff_mod_bit_sizes = [30, 26, 30]
# create TenSEALContext
ctx_eval = ts.context(ts.SCHEME_TYPE.CKKS, poly_mod_degree, -1, coeff_mod_bit_sizes)
# scale of ciphertext to use
ctx_eval.global_scale = 2 ** 26
# this key is needed for doing dot-product operations
ctx_eval.generate_galois_keys()

In [9]:
t_start = time()
enc_x_test = [ts.ckks_vector(ctx_eval, x.tolist()) for x in x_test]
t_end = time()
print(f"Encryption of the test-set took {int(t_end - t_start)} seconds")

Encryption of the test-set took 85 seconds


In [10]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

def encrypted_evaluation1(model, enc_x_test, y_test):
  t_start = time()
  y_true = []
  y_pred = []

  for enc_x, y in zip(enc_x_test, y_test):
        # encrypted evaluation
        enc_out = model(enc_x)
        # plain comparison
        output = enc_out.decrypt()
        output = torch.tensor(output)
        output = torch.sigmoid(output)
        predicted = output >= 0.5
        y_true.extend(y.view(-1).tolist())
        y_pred.extend(predicted.view(-1).tolist())

  t_end = time()
  print(f"Evaluated test_set of {len(x_test)} entries in {int(t_end - t_start)} seconds")

  # Calculate metrics
  accuracy = accuracy_score(y_true, y_pred)
  precision = precision_score(y_true, y_pred)
  recall = recall_score(y_true, y_pred)
  f1 = f1_score(y_true, y_pred)
  confusion = confusion_matrix(y_true, y_pred)

  return accuracy, precision, recall, f1, confusion


In [11]:
accuracy, precision, recall, f1, confusion  = encrypted_evaluation1(eelr, enc_x_test, y_test)

print_metrics(accuracy, precision, recall, f1, confusion)

Evaluated test_set of 14139 entries in 157 seconds
Accuracy: 0.7485
Precision: 0.7382
Recall: 0.7714
F1 Score: 0.7544
Confusion Matrix:
 [[5121 1937]
 [1619 5462]]
