In [1]:
import time
import os
import sys
import logging

import numpy as np
import tenseal as ts

from sklearn.model_selection import train_test_split
os.chdir("..")
from src.features import build_features
from models import encrypted_LR

# definition of parameters

In [2]:
# data_path = "~/homomorphic-encryption/ckks_titanic/data/raw/"            # whole data set
DATA_PATH = "~/homomorphic-encryption/ckks_titanic/data/quick_demo/"   # subset of the data set, with 15 train_samples and 5 test_samples
LOG_PATH = "."
LOG_FILENAME = "date+all"

In [3]:
fileHandler = logging.FileHandler("{0}/{1}.log".format(LOG_PATH, LOG_FILENAME))
streamHandler = logging.StreamHandler(sys.stdout)
logging.basicConfig(format="%(asctime)s  [%(levelname)-8.8s]  %(message)s", datefmt='%m/%d/%Y %I:%M:%S %p', level = logging.DEBUG, handlers=[fileHandler, streamHandler])

In [4]:
def crytp_array(X, local_context):
    res = []
    for vec in X:
        res.append(ts.ckks_vector(local_context, vec))
    return res

def refresh(ciphertext, **kwarg):
    context = kwarg.get("context", None)
    secret_key = kwarg.get("secret_key", None)
    assert context , "Context must be provided with the key : context"+str(context)
    if context.is_private():
        return ts.ckks_vector(context, ciphertext.decrypt())
    else : 
        if not secret_key:
            raise AttributeError("The secret key is not provided and the context provided is public, decryption is not possible")
        else:
            return ts.ckks_vector(context, ciphertext.decrypt(secret_key))

# Loading and processing the data

In [5]:
logging.info(os.getcwd())
raw_train, raw_test = build_features.data_import(DATA_PATH)
train, submission_test = build_features.processing(raw_train, raw_test)

07/15/2020 04:04:26 PM  [INFO    ]  /home/apignet/homomorphic-encryption/ckks_titanic
07/15/2020 04:04:26 PM  [INFO    ]  loading the data into memory (pandas df)
07/15/2020 04:04:26 PM  [INFO    ]  Done
07/15/2020 04:04:26 PM  [INFO    ]  making final data set from raw data
07/15/2020 04:04:26 PM  [INFO    ]  Done


In [6]:
train, test = train_test_split(train, test_size=0.15)
train_labels = train.Survived
test_labels = test.Survived
train_features = train.drop("Survived", axis=1)
test_features = test.drop("Survived", axis=1)

# Definition of safety parameters

In [7]:
logging.info('Definition of safety parameters...')
timer = time.time()
# context = ts.context(ts.SCHEME_TYPE.CKKS, 32768,
# coeff_mod_bit_sizes=[60, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 60])
# context = ts.context(ts.SCHEME_TYPE.CKKS, 8192, coeff_mod_bit_sizes=[40, 21, 21, 21, 21, 21, 21, 40])

context = ts.context(ts.SCHEME_TYPE.CKKS, 16384, coeff_mod_bit_sizes=[60, 40, 40, 40, 40, 40, 40, 40, 60])
context.global_scale = pow(2, 40)
logging.info("Done. " + str(round(time.time() - timer, 2)) + " seconds")
logging.info('Generation of the Galois Key...')
timer = time.time()
context.generate_galois_keys()
logging.info("Done. " + str(round(time.time() - timer, 2)) + " seconds")

#logging.info('Generation of the secret key...')
#timer = time.time()
secret_key = context.secret_key()
#context.make_context_public()
#logging.info("Done. " + str(round(time.time() - timer, 2)) + " seconds")
#if context.is_public():
#    logging.info("The context is now public, the context do not hold the secret key anymore, and decrypt methods need the secret key to be provide,")


refresh_arguments = {'context':context, 'secret_key':secret_key}

07/15/2020 04:04:27 PM  [INFO    ]  Definition of safety parameters...
07/15/2020 04:04:27 PM  [INFO    ]  Done. 0.45 seconds
07/15/2020 04:04:27 PM  [INFO    ]  Generation of the Galois Key...
07/15/2020 04:04:32 PM  [INFO    ]  Done. 4.73 seconds


# Data encryption

In [8]:
logging.info("Data encryption...")
timer = time.time()
encrypted_X = crytp_array(train_features.to_numpy(), context)
encrypted_Y = crytp_array(train_labels.to_numpy().reshape((-1, 1)), context)
encrypted_test_X = crytp_array(test_features.to_numpy(), context)
encrypted_test_Y = crytp_array(test_labels.to_numpy().reshape((-1, 1)), context)
logging.info("Done. " + str(round(time.time() - timer, 2)) + " seconds")

07/15/2020 04:04:32 PM  [INFO    ]  Data encryption...
07/15/2020 04:04:33 PM  [INFO    ]  Done. 1.16 seconds


# Initialize the weight

The weights have to be crypted 

In [9]:
weight_ne = [0. for _ in range(encrypted_test_X[0].size())]
weight = ts.ckks_vector(context, weight_ne)
weight_ne = np.array(weight_ne)
bias_ne = [0.]
bias = ts.ckks_vector(context, bias_ne)
bias_ne = np.array(bias_ne)

# Training

In [None]:
logging.info("Training starting")
model = encrypted_LR.LogisticRegressionHE(weight=weight,
                                          bias=bias,
                                          weight_ne=weight_ne,
                                          bias_ne=bias_ne,
                                          refresh_function=refresh, 
                                          refresh_kwarg=refresh_arguments,
                                          safety=False, 
                                          verbose=True, 
                                          num_iter=2,
                                          lr=0.8,
                                          reg_para=0.5)
model.fit(encrypted_X, encrypted_Y, X_ne=train_features.to_numpy(), Y_ne=train_labels.to_numpy().reshape((-1, 1)))
logging.info("Training done. " + str(round(time.time() - timer, 2)) + " seconds")

07/15/2020 04:04:33 PM  [INFO    ]  Training starting
07/15/2020 04:04:33 PM  [CRITICAL]  The data will be decrypted during the process, the protocol is not safe
07/15/2020 04:04:45 PM  [DEBUG   ]  At the first iteration, the error of encryption on the gradient is 0
07/15/2020 04:04:58 PM  [INFO    ]  iteration number 1 is starting
07/15/2020 04:04:58 PM  [INFO    ]  Loss on the unencrypted fit : 8 
07/15/2020 04:04:58 PM  [DEBUG   ]  error 0
07/15/2020 04:05:10 PM  [INFO    ]  iteration number 2 is starting
07/15/2020 04:05:10 PM  [INFO    ]  Loss on the unencrypted fit : 6 
07/15/2020 04:05:10 PM  [DEBUG   ]  error 0
07/15/2020 04:05:10 PM  [INFO    ]  Training done. 37.7 seconds


# Accuracy

In [None]:
acc = model.encrypted_accuracy(encrypted_test_X, encrypted_test_Y)
logging.info("Accuracy : %s " % acc[0])