In [1]:
#Import necessary libraries

import numpy as np
import pandas as pd
import seal
from seal import *
import sourmash as smsh
import time
import matplotlib.pyplot as plt
import pickle

In [3]:
test_sketches = pickle.load(open('test_sketches.dump', 'rb'))
anchor_sketches = pickle.load(open('anchor_sketches.dump', 'rb'))

In [7]:
anchor_sketches

Unnamed: 0,0
4187,<sourmash.minhash.MinHash object at 0x7fda8ea9...
5538,<sourmash.minhash.MinHash object at 0x7fda8d8b...
141,<sourmash.minhash.MinHash object at 0x7fda8cb4...
9,<sourmash.minhash.MinHash object at 0x7fda8bdb...
5350,<sourmash.minhash.MinHash object at 0x7fda8b05...
27,<sourmash.minhash.MinHash object at 0x7fda8a28...
4438,<sourmash.minhash.MinHash object at 0x7fda8952...
3291,<sourmash.minhash.MinHash object at 0x7fda8881...
735,<sourmash.minhash.MinHash object at 0x7fda87a3...
960,<sourmash.minhash.MinHash object at 0x7fda86d2...


In [8]:
#DATA OWNER preprocessing
#Model owner sends data owner sketches of the anchor samples 
#Data owner computes vector of distances to each of the 12 anchors
#for each test sample.
#These vectors will be hidden by the encryption.
import time

start = time.time()
jacc_sim = np.zeros((1000,12))

i=0
for sketch in test_sketches[0]:
    j=0
    for anchor in anchor_sketches[0]:
        jacc_sim[i,j] = round(sketch.jaccard(anchor),4)
        j+=1
    i+=1
        
dist_data = np.zeros((1000,12))

for i in range(1000):
    for j in range(12):
        dist_data[i,j] = -np.log(2*jacc_sim[i,j])+np.log(1+jacc_sim[i,j])
        
end = time.time()
print(f'Time to Preprocess Data: {(end-start):.3f}s')

Time to Preprocess Data: 1.667s


In [9]:
#DATA OWNER preprocessing
#Batches 341 samples into a single plaintext (8192 slots / (12 + 12 empty))
#1000 samples are placed into 3 large vectors
#batch_data[2] has extra 0's at the end
batch_data = np.zeros((3,8192))
for i in range(3):
    for j in range(341):
        if 341*i+j < 1000:
            batch_data[i][24*j:24*j+12] = dist_data[341*i+j]

In [10]:
#DATA OWNER
#Set the parameters of the encryption context.

parms = EncryptionParameters(scheme_type.ckks)
poly_modulus_degree = 16384
parms.set_poly_modulus_degree(poly_modulus_degree)
parms.set_coeff_modulus(CoeffModulus.Create(poly_modulus_degree, [60, 40, 40, 40, 40, 40, 60]))
#320-bit coeff modulus Q. 
#From SEAL manual, security cutoffs for N=16384 are 300 bits for 192-bit security, 438 bits for 128-bit security.
scale = 2.0**40
context = SEALContext(parms)
#print_parameters(context)

print(CoeffModulus.MaxBitCount(poly_modulus_degree))

ckks_encoder = CKKSEncoder(context)
slot_count = ckks_encoder.slot_count()
print(f'Number of slots: {slot_count}')

keygen = KeyGenerator(context)
public_key = keygen.create_public_key()
secret_key = keygen.secret_key()
galois_keys = keygen.create_galois_keys()
relin_keys = keygen.create_relin_keys()

encryptor = Encryptor(context, public_key)
evaluator = Evaluator(context)

438
Number of slots: 8192


In [11]:
#DATA OWNER
#Encode and encrypt data owner's distance vector
pt_data = []
ct_data = []

for i in range(3):
    pt_data.append(ckks_encoder.encode(batch_data[i],scale))
    ct_data.append(encryptor.encrypt(pt_data[i]))

In [13]:
#DATA OWNER
#Save to file: ciphertext data and encryption context,
#including parameters, public key, galois keys, and relinearization keys

pickle.dump(ct_data, open('ct_data','wb'))
pickle.dump(encryptor, open('encryptor', 'wb'))
pickle.dump(evaluator, open('evaluator', 'wb'))
pickle.dump(galois_keys, open('galois_keys', 'wb'))
pickle.dump(relin_keys, open('relin_keys', 'wb'))

TypeError: cannot pickle 'seal.Encryptor' object