In [2]:
import logging
import os
import socket
import sys
import time

import numpy as np
import pickle as pk
import tenseal as ts

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, f1_score, recall_score, precision_score, accuracy_score
                           
os.chdir("/home/apignet/homomorphic-encryption/ckks_titanic/")
from src.features import build_features
from models import Alice_LR
from models.Actors import *

In [2]:
%load_ext memory_profiler

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
1

1

# definition of parameters

### Paths

In [4]:
WORKING_DIR = os.getcwd()

LOG_PATH = WORKING_DIR+"/reports/log/"
LOG_FILENAME = "Alice"

### Paths

In [5]:
fileHandler = logging.FileHandler("{0}/{1}.log".format(LOG_PATH, LOG_FILENAME))
streamHandler = logging.StreamHandler(sys.stdout)
logging.basicConfig(format="%(asctime)s  [%(levelname)-8.8s]  %(message)s", 
                    datefmt='%m/%d/%Y %I:%M:%S %p', 
                    level = logging.INFO, 
                    handlers=[fileHandler, streamHandler]
                   )

### Training variables

In [6]:
EPOCH = 40
LEARNING_RATE = 0.9
MOMENTUM_RATE = 0.6
REGULARIZATION_RATE = 0.5
VERBOSE = 4
SAVE_WEIGHT = 4
N_JOBS = 8

### Socket Variables

In [20]:
HOST = '127.0.0.1'  # The server's hostname or IP address
PORT = 65431        # The port used by the server
PACKETS_SIZE = 16384
SENTINEL = b'BREAK'

# Esthablishing the communication with Bob


In [21]:
BOB = Actor(HOST, PORT, PACKETS_SIZE, SENTINEL)

08/06/2020 03:45:34 PM  [INFO    ]  b'Connection accepted'


## Receiving the public context

In [9]:
logging.info('Waiting for serialized context')

b_context = BOB.reception()
logging.info('Context received. Starting deserialization.')
context = ts.context_from(b_context)
logging.info('Context successfully deserialized')

08/06/2020 02:31:03 PM  [INFO    ]  Waiting for serialized context
08/06/2020 02:31:04 PM  [INFO    ]  Context received. Starting deserialization.
08/06/2020 02:31:08 PM  [INFO    ]  Context successfully deserialized


## Receiving serialized data

In [10]:
len_X, nb_features = pk.loads(BOB.reception())
b_Y = [None for _ in range(len_X)]
b_X = [None for _ in range(len_X)]

In [11]:
while True:
    b_data = BOB.reception()
    if b_data==b'DONE': break
    data = pk.loads(b_data)
    b_X[data[0]]=data[1]
    b_Y[data[0]]=data[2]
    
    if data[0] == len(b_X) // 4:
        logging.info("25 % ...")
    elif data[0] == len(b_X) // 2 :
        logging.info("50 % ...")
    elif data[0] == 3* len(b_X)//4:
        logging.info("75% ...")
    
assert not (None in b_X), 'Missing value: ' + str([idx for idx,e in enumerate(b_X) if e is None])

08/06/2020 02:32:14 PM  [INFO    ]  25 % ...
08/06/2020 02:33:20 PM  [INFO    ]  50 % ...
08/06/2020 02:34:25 PM  [INFO    ]  75% ...


# Initialize the weight

The weights have to be crypted 

In [12]:
%%memit
unencrypted_weight = np.random.normal(loc=0,
                                      scale=0.2, size =(nb_features))
logging.info(unencrypted_weight)
weight = ts.ckks_vector(context, unencrypted_weight.tolist())
unencrypted_bias = np.random.random((1))
bias = ts.ckks_vector(context, unencrypted_bias.tolist())

08/06/2020 02:35:31 PM  [INFO    ]  [ 0.14119233  0.13322082  0.10677418  0.14946615 -0.17499522  0.11817762
 -0.09094475  0.05006976  0.1208017  -0.05478917  0.06556012 -0.05331097
 -0.04307331  0.12412614 -0.42812706  0.12723906  0.00043748 -0.06539209
 -0.20330532 -0.29728731  0.10161476 -0.3024797   0.07153177  0.26603781
 -0.17171421 -0.03019694  0.00290262  0.03083542  0.05290126 -0.01780719
 -0.2148345  -0.23494342]
08/06/2020 02:35:31 PM  [INFO    ]  [-0.24488505 -0.40020771  0.26734085  0.06336104 -0.01561979 -0.19677161
  0.29498591 -0.00980197  0.04640455 -0.09566694 -0.12725217  0.3803441
 -0.31543093 -0.10960499 -0.05537333 -0.30819449  0.00613294  0.068314
  0.13851433  0.15245565 -0.16241355  0.28786917 -0.14767324 -0.39154034
 -0.30012324 -0.60976281  0.01604236 -0.12119899  0.04235395  0.12985458
  0.03750433  0.20356318]
peak memory: 2835.16 MiB, increment: 0.98 MiB


# Training the crypted models

### Initialization of the model...

In [13]:
logging.info("Model initialization")
model = encrypted_LR.LogisticRegressionHE(init_weight=weight,
                                          init_bias=bias,
                                          bob=BOB,
                                          context=context,
                                          verbose=VERBOSE,
                                          save_weight=SAVE_WEIGHT,
                                          max_epoch=EPOCH,
                                          learning_rate=LEARNING_RATE,
                                          momentum_rate=MOMENTUM_RATE,
                                          reg_para=REGULARIZATION_RATE,
                                          n_jobs = N_JOBS)

08/06/2020 02:35:31 PM  [INFO    ]  Model initialization


### Warn Bob that Alice is starting the training
Indeed Alice needs Bob to be ready to refresh the weights

In [14]:
BOB.transmission(b'start_fitting')

13

In [15]:
logging.info("Training starting")
timer=time.time()
model.fit(b_X, b_Y)
logging.info("Training done. " + str(round(time.time() - timer, 0)) + " seconds")

08/06/2020 02:36:20 PM  [INFO    ]  Training starting
08/06/2020 02:36:20 PM  [INFO    ]  Data already serialized
08/06/2020 02:36:20 PM  [INFO    ]  Deserialization of the label for the future computations of the loss
08/06/2020 02:36:26 PM  [INFO    ]  Initialization of 8 workers
08/06/2020 02:36:34 PM  [INFO    ]  Initialization done for process Process-3. Len of data : 69
08/06/2020 02:36:34 PM  [INFO    ]  Initialization done for process Process-4. Len of data : 69
08/06/2020 02:36:35 PM  [INFO    ]  Initialization done for process Process-7. Len of data : 68
08/06/2020 02:36:35 PM  [INFO    ]  Initialization done for process Process-5. Len of data : 68
08/06/2020 02:36:35 PM  [INFO    ]  Initialization done for process Process-6. Len of data : 68
08/06/2020 02:36:35 PM  [INFO    ]  Initialization done for process Process-8. Len of data : 68
08/06/2020 02:36:36 PM  [INFO    ]  Initialization done for process Process-9. Len of data : 68
08/06/2020 02:36:36 PM  [INFO    ]  Initializ

Warn Bob that Alice finished the training

In [16]:
BOB.transmission(b'STOP_REFRESH')

12

In [17]:
ser_loss = [l.serialize() for l in model.loss_list]
ser_epoch = [i*VERBOSE for i in range(len(model.loss_list))]
BOB.transmission(pk.dumps((ser_epoch, ser_loss)))

2601739

In [22]:
while True:
    b_data = BOB.reception()
    if b_data == b'STOP_PREDICT': break
    key, vector = pk.loads(b_data)
    prediction = model.predict(ts.ckks_vector_from(model.context, vector))
    BOB.transmission(pk.dumps((key, prediction.serialize())))

In [23]:
BOB.close()