Below is the list of required files that need to uploaded for this script to run:

- `lr_utils.py`
- `utils.py`
- `diabetes_subset_1000.csv`

In [None]:
pip install tenseal



In [33]:
import torch
import tenseal as ts
import pandas as pd
import random
from time import time

import numpy as np
import matplotlib.pyplot as plt

from utils import *
from lr_utils import LR, train, evaluate_model
from sklearn.model_selection import train_test_split

In [34]:
def load_diabetes_data_subset_1000():
    data = pd.read_csv("diabetes_subset_1000.csv")
    data = data.dropna()
    y = torch.tensor(data["Diabetes_binary"].values).float().unsqueeze(1)
    data = data.drop(columns=['Diabetes_binary'])
    data = (data - data.mean()) / data.std()
    x = torch.tensor(data.values).float()
    return train_test_split(x, y, test_size=0.2, random_state=73)

In [35]:
#Load BRFSS dataset subset 1000 with 50/50 split
x_train, x_test, y_train, y_test = load_diabetes_data_subset_1000()

# Training Unencrypted LR Model

In [36]:
n_features = x_train.shape[1]
model = LR(n_features)
optim = torch.optim.SGD(model.parameters(), lr=1)
criterion = torch.nn.BCELoss()
EPOCHS = 5

In [39]:
model = train(model, optim, criterion, x_train, y_train, epochs = EPOCHS)

Epoch: 1 	Training Loss: 0.496032
Epoch: 2 	Training Loss: 0.495556
Epoch: 3 	Training Loss: 0.495187
Epoch: 4 	Training Loss: 0.494895
Epoch: 5 	Training Loss: 0.494659


In [40]:
# Evaluate the model
accuracy, precision, recall, f1, confusion  = evaluate_model(model, x_test, y_test)

print_metrics(accuracy, precision, recall, f1, confusion)

Accuracy: 0.7700
Precision: 0.7500
Recall: 0.8100
F1 Score: 0.7788
Confusion Matrix:
 [[73 27]
 [19 81]]


# Training an Encrypted LR Model on Encrypted Data using Sigmoid Approximation

## Sigmoid Approximation: Degree 1

In [41]:
class EncryptedLR:

    def __init__(self, torch_lr):
        self.weight = torch_lr.lr.weight.data.tolist()[0]
        self.bias = torch_lr.lr.bias.data.tolist()
        self._delta_w = 0
        self._delta_b = 0
        self._count = 0

    def forward(self, enc_x):
        enc_out = enc_x.dot(self.weight) + self.bias
        enc_out = EncryptedLR.sigmoid(enc_out)
        return enc_out

    def backward(self, enc_x, enc_out, enc_y):
        out_minus_y = (enc_out - enc_y)
        self._delta_w += enc_x * out_minus_y
        self._delta_b += out_minus_y
        self._count += 1

    def update_parameters(self):
        if self._count == 0:
            raise RuntimeError("Need at least run one forward iteration")
        self.weight -= self._delta_w * (1 / self._count) + self.weight * 0.05
        self.bias -= self._delta_b * (1 / self._count)
        # reset gradient accumulators and iterations count
        self._delta_w = 0
        self._delta_b = 0
        self._count = 0

    @staticmethod
    def sigmoid(enc_x):
        # sigmoid(x) = 0.5 + 0.125 * x
        return enc_x.polyval([0.5, 0.125])

    def plain_accuracy(self, x_test, y_test):
        w = torch.tensor(self.weight)
        b = torch.tensor(self.bias)
        out = torch.sigmoid(x_test.matmul(w) + b).reshape(-1, 1)
        correct = torch.abs(y_test - out) < 0.5
        return correct.float().mean()

    def encrypt(self, context):
        self.weight = ts.ckks_vector(context, self.weight)
        self.bias = ts.ckks_vector(context, self.bias)

    def decrypt(self):
        self.weight = self.weight.decrypt()
        self.bias = self.bias.decrypt()

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

In [42]:
# create TenSEALContext with multiplicative depth of 5
poly_mod_degree = 8192
coeff_mod_bit_sizes = [40, 21, 21, 21, 21, 21, 40]
ctx_training = ts.context(ts.SCHEME_TYPE.CKKS, poly_mod_degree, -1, coeff_mod_bit_sizes)
ctx_training.global_scale = 2 ** 21
ctx_training.generate_galois_keys()

In [43]:
t_start = time()
enc_x_train = [ts.ckks_vector(ctx_training, x.tolist()) for x in x_train]
enc_y_train = [ts.ckks_vector(ctx_training, y.tolist()) for y in y_train]
t_end = time()
print(f"Encryption of the training_set took {int(t_end - t_start)} seconds")

Encryption of the training_set took 14 seconds


In [44]:
eelr = EncryptedLR(LR(n_features))
accuracy = eelr.plain_accuracy(x_test, y_test)
print(f"Accuracy at epoch #0 is {accuracy}")

times = []
for epoch in range(EPOCHS):
    eelr.encrypt(ctx_training)

    t_start = time()
    for enc_x, enc_y in zip(enc_x_train, enc_y_train):
        enc_out = eelr.forward(enc_x)
        eelr.backward(enc_x, enc_out, enc_y)
    eelr.update_parameters()
    t_end = time()
    times.append(t_end - t_start)

    eelr.decrypt()
    accuracy = eelr.plain_accuracy(x_test, y_test)
    print(f"Accuracy at epoch #{epoch + 1} is {accuracy}")


print(f"\nAverage time per epoch: {int(sum(times) / len(times))} seconds")
print(f"Final accuracy is {accuracy}")

Accuracy at epoch #0 is 0.4749999940395355
Accuracy at epoch #1 is 0.7599999904632568
Accuracy at epoch #2 is 0.75
Accuracy at epoch #3 is 0.7350000143051147
Accuracy at epoch #4 is 0.7350000143051147
Accuracy at epoch #5 is 0.7400000095367432

Average time per epoch: 54 seconds
Final accuracy is 0.7400000095367432


## Sigmoid Approximation: Degree 3

In [45]:
class EncryptedLR:

    def __init__(self, torch_lr):
        self.weight = torch_lr.lr.weight.data.tolist()[0]
        self.bias = torch_lr.lr.bias.data.tolist()
        self._delta_w = 0
        self._delta_b = 0
        self._count = 0

    def forward(self, enc_x):
        enc_out = enc_x.dot(self.weight) + self.bias
        enc_out = EncryptedLR.sigmoid(enc_out)
        return enc_out

    def backward(self, enc_x, enc_out, enc_y):
        out_minus_y = (enc_out - enc_y)
        self._delta_w += enc_x * out_minus_y
        self._delta_b += out_minus_y
        self._count += 1

    def update_parameters(self):
        if self._count == 0:
            raise RuntimeError("Need at least run one forward iteration")
        self.weight -= self._delta_w * (1 / self._count) + self.weight * 0.05
        self.bias -= self._delta_b * (1 / self._count)
        # reset gradient accumulators and iterations count
        self._delta_w = 0
        self._delta_b = 0
        self._count = 0

    @staticmethod
    def sigmoid(enc_x):
        # sigmoid(x) = 0.5 + 0.197 * x - 0.004 * x^3
        return enc_x.polyval([0.5, 0.197, 0, -0.004])

    def plain_accuracy(self, x_test, y_test):
        w = torch.tensor(self.weight)
        b = torch.tensor(self.bias)
        out = torch.sigmoid(x_test.matmul(w) + b).reshape(-1, 1)
        correct = torch.abs(y_test - out) < 0.5
        return correct.float().mean()

    def encrypt(self, context):
        self.weight = ts.ckks_vector(context, self.weight)
        self.bias = ts.ckks_vector(context, self.bias)

    def decrypt(self):
        self.weight = self.weight.decrypt()
        self.bias = self.bias.decrypt()

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

In [46]:
# create TenSEALContext with multiplicative depth of 6
poly_mod_degree = 8192
coeff_mod_bit_sizes = [40, 21, 21, 21, 21, 21, 21, 40]
ctx_training = ts.context(ts.SCHEME_TYPE.CKKS, poly_mod_degree, -1, coeff_mod_bit_sizes)
ctx_training.global_scale = 2 ** 21
ctx_training.generate_galois_keys()

In [47]:
t_start = time()
enc_x_train = [ts.ckks_vector(ctx_training, x.tolist()) for x in x_train]
enc_y_train = [ts.ckks_vector(ctx_training, y.tolist()) for y in y_train]
t_end = time()
print(f"Encryption of the training_set took {int(t_end - t_start)} seconds")

Encryption of the training_set took 16 seconds


In [48]:
eelr = EncryptedLR(LR(n_features))
accuracy = eelr.plain_accuracy(x_test, y_test)
print(f"Accuracy at epoch #0 is {accuracy}")

times = []
for epoch in range(EPOCHS):
    eelr.encrypt(ctx_training)

    t_start = time()
    for enc_x, enc_y in zip(enc_x_train, enc_y_train):
        enc_out = eelr.forward(enc_x)
        eelr.backward(enc_x, enc_out, enc_y)
    eelr.update_parameters()
    t_end = time()
    times.append(t_end - t_start)

    eelr.decrypt()
    accuracy = eelr.plain_accuracy(x_test, y_test)
    print(f"Accuracy at epoch #{epoch + 1} is {accuracy}")


print(f"\nAverage time per epoch: {int(sum(times) / len(times))} seconds")
print(f"Final accuracy is {accuracy}")

Accuracy at epoch #0 is 0.5049999952316284
Accuracy at epoch #1 is 0.7649999856948853
Accuracy at epoch #2 is 0.7599999904632568
Accuracy at epoch #3 is 0.7549999952316284
Accuracy at epoch #4 is 0.7649999856948853
Accuracy at epoch #5 is 0.7699999809265137

Average time per epoch: 83 seconds
Final accuracy is 0.7699999809265137


## Sigmoid Approximation: Degree 5

In [49]:
class EncryptedLR:

    def __init__(self, torch_lr):
        self.weight = torch_lr.lr.weight.data.tolist()[0]
        self.bias = torch_lr.lr.bias.data.tolist()
        self._delta_w = 0
        self._delta_b = 0
        self._count = 0

    def forward(self, enc_x):
        enc_out = enc_x.dot(self.weight) + self.bias
        enc_out = EncryptedLR.sigmoid(enc_out)
        return enc_out

    def backward(self, enc_x, enc_out, enc_y):
        out_minus_y = (enc_out - enc_y)
        self._delta_w += enc_x * out_minus_y
        self._delta_b += out_minus_y
        self._count += 1

    def update_parameters(self):
        if self._count == 0:
            raise RuntimeError("Need at least run one forward iteration")
        self.weight -= self._delta_w * (1 / self._count) + self.weight * 0.05
        self.bias -= self._delta_b * (1 / self._count)
        # reset gradient accumulators and iterations count
        self._delta_w = 0
        self._delta_b = 0
        self._count = 0

    @staticmethod
    def sigmoid(enc_x):
        # sigmoid(x) = 0.5 + 0.191 * x - 0.0046 * x^3 + 0.00004*x^5
        return enc_x.polyval([0.5, 0.191, 0, -0.0046, 0, 0.00004])

    def plain_accuracy(self, x_test, y_test):
        w = torch.tensor(self.weight)
        b = torch.tensor(self.bias)
        out = torch.sigmoid(x_test.matmul(w) + b).reshape(-1, 1)
        correct = torch.abs(y_test - out) < 0.5
        return correct.float().mean()

    def encrypt(self, context):
        self.weight = ts.ckks_vector(context, self.weight)
        self.bias = ts.ckks_vector(context, self.bias)

    def decrypt(self):
        self.weight = self.weight.decrypt()
        self.bias = self.bias.decrypt()

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

In [50]:
# create TenSEALContext with multiplicative depth of 7
poly_mod_degree = 8192*2
coeff_mod_bit_sizes = [40, 30, 30, 30, 30, 30, 30, 30, 40]
ctx_training = ts.context(ts.SCHEME_TYPE.CKKS, poly_mod_degree, -1, coeff_mod_bit_sizes)
ctx_training.global_scale = 2 ** 30
ctx_training.generate_galois_keys()

In [51]:
t_start = time()
enc_x_train = [ts.ckks_vector(ctx_training, x.tolist()) for x in x_train]
enc_y_train = [ts.ckks_vector(ctx_training, y.tolist()) for y in y_train]
t_end = time()
print(f"Encryption of the training_set took {int(t_end - t_start)} seconds")

Encryption of the training_set took 37 seconds


In [52]:
eelr = EncryptedLR(LR(n_features))
accuracy = eelr.plain_accuracy(x_test, y_test)
print(f"Accuracy at epoch #0 is {accuracy}")

times = []
for epoch in range(EPOCHS):
    eelr.encrypt(ctx_training)

    t_start = time()
    for enc_x, enc_y in zip(enc_x_train, enc_y_train):
        enc_out = eelr.forward(enc_x)
        eelr.backward(enc_x, enc_out, enc_y)
    eelr.update_parameters()
    t_end = time()
    times.append(t_end - t_start)

    eelr.decrypt()
    accuracy = eelr.plain_accuracy(x_test, y_test)
    print(f"Accuracy at epoch #{epoch + 1} is {accuracy}")


print(f"\nAverage time per epoch: {int(sum(times) / len(times))} seconds")
print(f"Final accuracy is {accuracy}")

Accuracy at epoch #0 is 0.4699999988079071
Accuracy at epoch #1 is 0.6800000071525574
Accuracy at epoch #2 is 0.7450000047683716
Accuracy at epoch #3 is 0.7450000047683716
Accuracy at epoch #4 is 0.7549999952316284
Accuracy at epoch #5 is 0.7649999856948853

Average time per epoch: 244 seconds
Final accuracy is 0.7649999856948853
