In [7]:
import torch
import tenseal as ts
import random
from time import time

# those are optional and are not necessary for training
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import KBinsDiscretizer


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [8]:

def split_train_test(x, y, test_ratio=0.3):
    idxs = [i for i in range(len(x))]
    random.shuffle(idxs)
    # delimiter between test and train data
    delim = int(len(x) * test_ratio)
    test_idxs, train_idxs = idxs[:delim], idxs[delim:]
    return x[train_idxs], y[train_idxs], x[test_idxs], y[test_idxs]


def german_data_set():
    columnNames=["Status of existing checking account","Duration in month","Credit history",
         "Purpose","Credit amount","Savings account/bonds","Present employment since",
         "Installment rate in percentage of disposable income","Personal status and sex",
         "Other debtors / guarantors","Present residence since","Property","Age in years",
        "Other installment plans","Housing","Number of existing credits at this bank",
        "Job","Number of people being liable to provide maintenance for","Telephone","foreign worker","class"]
    df=pd.read_csv("german.data",sep=" ",header=None)

    df.columns = columnNames    

    numeric_col = ["Age in years","Duration in month","Credit amount"]
    est = KBinsDiscretizer(n_bins=4, encode='ordinal', 
                           strategy='uniform')
    for col in numeric_col:    
        df[col] = est.fit_transform(df[[col]])
    
    #Perform Label encoding in categorical features

    le = preprocessing.LabelEncoder()
    for column in df.columns:
        if df[column].dtype == type(object):
            df[column] = le.fit_transform(df[column])
    
    
        # Spliting the dataframe in to X and Y variables
    dataset = df.values
    X = dataset[:, :-1]
    X = X.astype(str)
    y = dataset[:,-1]

    y = torch.tensor(df["class"].values).float().unsqueeze(1)
    df = df.drop("class", 'columns')
    # standardize data
    df = (df - df.mean()) / df.std()
    x = torch.tensor(df.values).float()
    return split_train_test(x, y)


x_train, y_train, x_test, y_test = german_data_set()

print("############# Data summary #############")
print(f"x_train has shape: {x_train.shape}")
print(f"y_train has shape: {y_train.shape}")
print(f"x_test has shape: {x_test.shape}")
print(f"y_test has shape: {y_test.shape}")
print("#######################################")

############# Data summary #############
x_train has shape: torch.Size([700, 20])
y_train has shape: torch.Size([700, 1])
x_test has shape: torch.Size([300, 20])
y_test has shape: torch.Size([300, 1])
#######################################


  df = df.drop("class", 'columns')


## Training a Logistic Regression Model

We will start by training a logistic regression model (without any encryption), which can be viewed as a single layer neural network with a single node. We will be using this model as a means of comparison against encrypted training and evaluation.

In [106]:
import torch
import torch.nn as nn

# Define the Discriminator (Critic) class
class Discriminator(nn.Module):
    def __init__(self, input_size):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()  # Output a probability between 0 and 1
        )
        
        for layer in self.model:
            if isinstance(layer, nn.Linear):
                layer.weight = nn.init.xavier_uniform_(layer.weight)
                layer.bias = nn.init.ones_(layer.bias)

    def forward(self, x):
        return self.model(x)

# Define the Generator class
class Generator(nn.Module):
    def __init__(self, input_size, output_size):
        super(Generator, self).__init__()
        # self.lr = torch.nn.Linear(n_features, 1)
        self.model = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, output_size),  # Output size should match the data you want to generate
            nn.Tanh()  # Output values are typically scaled between -1 and 1
        )
        
        for layer in self.model:
            if isinstance(layer, nn.Linear):
                layer.weight = nn.init.xavier_uniform_(layer.weight)
                layer.bias = nn.init.ones_(layer.bias)
        
                

    def forward(self, x):
        return self.model(x)


In [108]:
# Example usage:
discriminator_input_size =  x_train.shape[1]
generator_input_size =  x_train.shape[1]
generator_output_size = y_train.shape[1]  # Example output size

In [124]:
generator = Generator(generator_input_size, generator_output_size)
discriminator = Discriminator(discriminator_input_size)
#eelrd = Encrypted_classification(discriminator)

In [None]:

eelr = Encrypted_classification(generator)

In [None]:
class LR(torch.nn.Module):

    def __init__(self, n_features):
        super(LR, self).__init__()
        self.lr = torch.nn.Linear(n_features, 1)
   
        
    def forward(self, x):
        out = torch.sigmoid(self.lr(x))
        return out

In [125]:
n_features = x_train.shape[1]
model = discriminator#LR(n_features)
# use gradient descent with a learning_rate=1
optim = torch.optim.SGD(model.parameters(), lr=0.00001)
# use Binary Cross Entropy Loss
criterion = torch.nn.BCELoss()

In [126]:
# define the number of epochs for both plain and encrypted training
EPOCHS = 5

def train(model, optim, criterion, x, y, epochs=EPOCHS):
    for e in range(1, epochs + 1):
        optim.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optim.step()
        print(f"Loss at epoch {e}: {loss.data}")
    return model

model = train(discriminator, optim, criterion, x_train, y_train)

Loss at epoch 1: 0.3819013237953186
Loss at epoch 2: 0.3803880214691162
Loss at epoch 3: 0.3788767457008362
Loss at epoch 4: 0.3773675858974457
Loss at epoch 5: 0.37586045265197754


In [None]:
def accuracy(model, x, y):
    out = model(x)
    correct = torch.abs(y - out) < 0.5
    return correct.float().mean()

plain_accuracy = accuracy(model, x_test, y_test)
print(f"Accuracy on plain test_set: {plain_accuracy}")

Accuracy on plain test_set: 0.7200000286102295


In [127]:
class Encrypted_classification:
    
    def __init__(self, torch_lr):        
        # TenSEAL processes lists and not torch tensors,
        # so we take out the parameters from the PyTorch model        
        for layer in torch_lr.model:
            if isinstance(layer, nn.Linear):
                layer.weight = layer.weight.data.tolist()[0]
                layer.bias = layer.bias.data.tolist()
                       
        # self.weight = torch_lr.lr.weight.data.tolist()[0]
        # self.bias = torch_lr.lr.bias.data.tolist()
    
    def forward(self, enc_x):
        # We don't need to perform sigmoid as this model
        # will only be used for evaluation, and the label
        # can be deduced without applying sigmoid
        enc_out = enc_x.dot(self.weight) + self.bias
        return enc_out
    
    def __call__(self, *args, **kwargs):
        return self.fr(*args, **kwargs)
        

## functions to perform the evaluation with an encrypted model

    
    def encrypt(self, context):
        self.weight = ts.ckks_vector(context, self.weight)
        self.bias = ts.ckks_vector(context, self.bias)
        
    def decrypt(self, context):
        self.weight = self.weight.decrypt()
        self.bias = self.bias.decrypt()
        

eelr = Encrypted_classification(model)
# eelr = Encrypted_classification(discriminator)

AttributeError: 'Discriminator' object has no attribute 'lr'

In [16]:
# parameters
poly_mod_degree = 4096
coeff_mod_bit_sizes = [40, 20, 40]
# create TenSEALContext
ctx_eval = ts.context(ts.SCHEME_TYPE.CKKS, poly_mod_degree, -1, coeff_mod_bit_sizes)
# scale of ciphertext to use
ctx_eval.global_scale = 2 ** 20
# this key is needed for doing dot-product operations
ctx_eval.generate_galois_keys()

We will encrypt the whole test set before the evaluation:

In [17]:
t_start = time()
enc_x_test = [ts.ckks_vector(ctx_eval, x.tolist()) for x in x_test]
t_end = time()
print(f"Encryption of the test-set took {int(t_end - t_start)} seconds")

Encryption of the test-set took 1 seconds


In [10]:
# (optional) encrypt the model's parameters
# eelr.encrypt(ctx_eval)

In [None]:
def encrypted_evaluation(model, enc_x_test, y_test):
    t_start = time()
    
    correct = 0
    for enc_x, y in zip(enc_x_test, y_test):
        # encrypted evaluation
        enc_out = model(enc_x)
        # plain comparison
        out = enc_out.decrypt()
        out = torch.tensor(out)
        out = torch.sigmoid(out)
        if torch.abs(out - y) < 0.5:
            correct += 1
    
    t_end = time()
    print(f"Evaluated test_set of {len(x_test)} entries in {int(t_end - t_start)} seconds")
    print(f"Accuracy: {correct}/{len(x_test)} = {correct / len(x_test)}")
    return correct / len(x_test)
    

encrypted_accuracy = encrypted_evaluation(eelr, enc_x_test, y_test)
diff_accuracy = plain_accuracy - encrypted_accuracy
print(f"Difference between plain and encrypted accuracies: {diff_accuracy}")


Evaluated test_set of 300 entries in 2 seconds
Accuracy: 217/300 = 0.7233333333333334
Difference between plain and encrypted accuracies: -0.0033333301544189453
Oh! We got a better accuracy on the encrypted test-set! The noise was on our side...


In [19]:
class EncryptedLR:
    
    def __init__(self, torch_lr):
        self.weight = torch_lr.lr.weight.data.tolist()[0]
        self.bias = torch_lr.lr.bias.data.tolist()
        # we accumulate gradients and counts the number of iterations
        self._delta_w = 0
        self._delta_b = 0
        self._count = 0
        
    def forward(self, enc_x):
        enc_out = enc_x.dot(self.weight) + self.bias
        enc_out = EncryptedLR.sigmoid(enc_out)
        return enc_out
    
    def backward(self, enc_x, enc_out, enc_y):
        out_minus_y = (enc_out - enc_y)
        self._delta_w += enc_x * out_minus_y
        self._delta_b += out_minus_y
        self._count += 1
        
    def update_parameters(self):
        if self._count == 0:
            raise RuntimeError("You should at least run one forward iteration")
        # update weights
        # We use a small regularization term to keep the output
        # of the linear layer in the range of the sigmoid approximation
        self.weight -= self._delta_w * (1 / self._count) + self.weight * 0.05
        self.bias -= self._delta_b * (1 / self._count)
        # reset gradient accumulators and iterations count
        self._delta_w = 0
        self._delta_b = 0
        self._count = 0
    
    @staticmethod
    def sigmoid(enc_x):
        # We use the polynomial approximation of degree 3
        # sigmoid(x) = 0.5 + 0.197 * x - 0.004 * x^3
        # from https://eprint.iacr.org/2018/462.pdf
        # which fits the function pretty well in the range [-5,5]
        return enc_x.polyval([0.5, 0.197, 0, -0.004])
    
    def plain_accuracy(self, x_test, y_test):
        # evaluate accuracy of the model on
        # the plain (x_test, y_test) dataset
        w = torch.tensor(self.weight)
        b = torch.tensor(self.bias)
        out = torch.sigmoid(x_test.matmul(w) + b).reshape(-1, 1)
        correct = torch.abs(y_test - out) < 0.5
        return correct.float().mean()    
    
    def encrypt(self, context):
        self.weight = ts.ckks_vector(context, self.weight)
        self.bias = ts.ckks_vector(context, self.bias)
        
    def decrypt(self):
        self.weight = self.weight.decrypt()
        self.bias = self.bias.decrypt()
        
    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)


In [20]:
# parameters
poly_mod_degree = 8192
coeff_mod_bit_sizes = [40, 21, 21, 21, 21, 21, 21, 40]
# create TenSEALContext
ctx_training = ts.context(ts.SCHEME_TYPE.CKKS, poly_mod_degree, -1, coeff_mod_bit_sizes)
ctx_training.global_scale = 2 ** 21
ctx_training.generate_galois_keys()

In [21]:
t_start = time()
enc_x_train = [ts.ckks_vector(ctx_training, x.tolist()) for x in x_train]
enc_y_train = [ts.ckks_vector(ctx_training, y.tolist()) for y in y_train]
t_end = time()
print(f"Encryption of the training_set took {int(t_end - t_start)} seconds")

Encryption of the training_set took 28 seconds


In [None]:
eelr = EncryptedLR(LR(n_features))
accuracy = eelr.plain_accuracy(x_test, y_test)
print(f"Accuracy at epoch #0 is {accuracy}")

times = []
for epoch in range(EPOCHS):
    eelr.encrypt(ctx_training)
    

    t_start = time()
    for enc_x, enc_y in zip(enc_x_train, enc_y_train):
        enc_out = eelr.forward(enc_x)
        eelr.backward(enc_x, enc_out, enc_y)
    eelr.update_parameters()
    t_end = time()
    times.append(t_end - t_start)
    
    eelr.decrypt()
    accuracy = eelr.plain_accuracy(x_test, y_test)
    print(f"Accuracy at epoch #{epoch + 1} is {accuracy}")


print(f"\nAverage time per epoch: {int(sum(times) / len(times))} seconds")
print(f"Final accuracy is {accuracy}")

diff_accuracy = plain_accuracy - accuracy
print(f"Difference between plain and encrypted accuracies: {diff_accuracy}")
if diff_accuracy < 0:
    print("We got a better accuracy when training on encrypted data! The noise was on our side...")

Accuracy at epoch #0 is 0.41999998688697815
Accuracy at epoch #1 is 0.4866666793823242
Accuracy at epoch #2 is 0.6600000262260437
Accuracy at epoch #3 is 0.6333333253860474
Accuracy at epoch #4 is 0.46000000834465027
Accuracy at epoch #5 is 0.28333333134651184

Average time per epoch: 183 seconds
Final accuracy is 0.28333333134651184
Difference between plain and encrypted accuracies: 0.43666669726371765


In [128]:
python --version

NameError: name 'python' is not defined

In [129]:
pip install pyinstaller

Note: you may need to restart the kernel to use updated packages.
Collecting pyinstaller
  Downloading pyinstaller-5.13.2-py3-none-win_amd64.whl (1.3 MB)
     ---------------------------------------- 1.3/1.3 MB 951.9 kB/s eta 0:00:00
Collecting altgraph (from pyinstaller)
  Downloading altgraph-0.17.3-py2.py3-none-any.whl (21 kB)
Collecting pyinstaller-hooks-contrib>=2021.4 (from pyinstaller)
  Downloading pyinstaller_hooks_contrib-2023.8-py2.py3-none-any.whl (282 kB)
     ------------------------------------ 282.9/282.9 kB 970.4 kB/s eta 0:00:00
Collecting pefile>=2022.5.30 (from pyinstaller)
  Downloading pefile-2023.2.7-py3-none-any.whl (71 kB)
     ---------------------------------------- 71.8/71.8 kB 1.3 MB/s eta 0:00:00
Collecting pywin32-ctypes>=0.2.1 (from pyinstaller)
  Downloading pywin32_ctypes-0.2.2-py3-none-any.whl (30 kB)
Installing collected packages: altgraph, pywin32-ctypes, pyinstaller-hooks-contrib, pefile, pyinstaller
  Attempting uninstall: pywin32-ctypes
    Found

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
spyder 5.1.5 requires pyqt5<5.13, which is not installed.
spyder 5.1.5 requires pyqtwebengine<5.13, which is not installed.
