In [14]:
from model import LinearModel
import phe as paillier
import json
import time

In [5]:
linearModel = LinearModel()

In [6]:
linearModel.getCoef()

array([ 0.15318276,  0.11040434,  0.16509236,  0.01206011, -0.00332236,
        0.02311403,  0.05003817, -0.00758351, -0.00437855, -0.01109837,
       -0.1167506 ,  0.00674328, -0.00243288,  0.10466852, -0.00075916,
       -0.00165628,  0.02821167,  0.04285777,  0.02512729, -0.00584689,
       -0.01062548])

In [13]:
regression = linearModel.getResults()[0]

In [66]:
print(linearModel.X_test.iloc[1])
print(linearModel.y_test.iloc[1])

HighBP                   0.0
HighChol                 0.0
CholCheck                1.0
BMI                     20.0
Smoker                   1.0
Stroke                   0.0
HeartDiseaseorAttack     0.0
PhysActivity             1.0
Fruits                   0.0
Veggies                  1.0
HvyAlcoholConsump        0.0
AnyHealthcare            1.0
NoDocbcCost              0.0
GenHlth                  2.0
MentHlth                15.0
PhysHlth                 0.0
DiffWalk                 0.0
Sex                      0.0
Age                     10.0
Education                5.0
Income                   4.0
Name: 11085, dtype: float64
0.0


In [52]:
print("For un-encrypted data:")

times = []
reps = 10
x_test_100 = linearModel.X_test.head(100)

for _ in range(0, reps+1):
    start_time = time.time()
    y_pred = regression.predict(x_test_100)
    end_time = time.time()
    times.append(end_time - start_time)
    # print("Repetion num -",_)
    # print("--- %s seconds ---" % (end_time - start_time))
    # print("--- %s milisec ---" % ((end_time - start_time)*1000))

total_time = sum(times)
avg_time = total_time / len(times)

print("--- %s seconds ---" % (avg_time))
print("--- %s milisec ---" % (avg_time * 1000))

For un-encrypted data:
--- 0.0004352656277743253 seconds ---
--- 0.4352656277743253 milisec ---


In [54]:
def storeKeys():
    public_key, private_key = paillier.generate_paillier_keypair()
    keys = {}
    keys["public_key"] = {"n": public_key.n}
    keys["private_key"] = {"p": private_key.p, "q": private_key.q}
    with open("lab_userkeys.json", "w") as file:
        json.dump(keys, file)


def getKeys():
    with open("lab_userkeys.json", "r") as file:
        keys = json.load(file)
        pub_key = paillier.PaillierPublicKey(n=int(keys["public_key"]["n"]))
        priv_key = paillier.PaillierPrivateKey(
            pub_key, keys["private_key"]["p"], keys["private_key"]["q"]
        )
        return pub_key, priv_key


def serializeData(public_key, data):
    encrypted_data_list = [public_key.encrypt(x) for x in data]
    encrypted_data = {}
    encrypted_data["public_key"] = {"n": public_key.n}
    encrypted_data["values"] = [
        (str(x.ciphertext()), x.exponent) for x in encrypted_data_list
    ]
    serialized = json.dumps(encrypted_data)
    return serialized

In [33]:
print("Printing keys and storing in json file.")
storeKeys()

Printing keys and storing in json file.


In [34]:
pub_key, priv_key = getKeys()
print("Key retrieved successfully!")
print("\tKey location: lab_userkeys.json\n")

pub_key, priv_key

Key retrieved successfully!
	Key location: lab_userkeys.json



(<PaillierPublicKey 1130207542>,
 <PaillierPrivateKey for <PaillierPublicKey 1130207542>>)

In [55]:
x = linearModel.X_test
row_num = len(x)
desired = 100
print("Available test data points:", row_num)
print("Desired   test data points:", desired)

times = []

for i in range(desired):
    data_list = x.iloc[i].tolist()
    start_time = time.time()
    serialized = serializeData(public_key=pub_key, data=data_list)
    end_time = time.time()
    times.append(end_time - start_time)

    filename = "enc_data/data_" + str(i) + ".json"
    with open(filename, "w") as file:
        json.dump(serialized, file)


total_time = sum(times)
avg_time = total_time / len(times)

print("Time taken to encrypt test data and dump to file:")
print("--- %s seconds ---" % (avg_time))
print("--- %s milisec ---" % (avg_time * 1000))

Available test data points: 14139
Desired   test data points: 100
Time taken to encrypt test data and dump to file:
--- 8.273035025596618 seconds ---
--- 8273.035025596619 milisec ---


In [62]:
def getData(filename):
    with open(filename, "r") as file:
        d = json.load(file)
    data = json.loads(d)
    return data


def computeData(filename):
    data = getData(filename)
    mycoef = linearModel.getCoef()
    pk = data["public_key"]
    pubkey = paillier.PaillierPublicKey(n=int(pk["n"]))
    try:
        enc_nums_rec = [
            paillier.EncryptedNumber(pubkey, int(x[0], int(x[1]))) for x in data["values"]
        ]
    except:
        print("Error in:", filename)
        return
    print("Enc Nums Rec:\n", enc_nums_rec)
    
    start_time = time.time()
    results = sum([mycoef[i] * enc_nums_rec[i] for i in range(len(mycoef))])
    end_time = time.time()
    time_needed = end_time - start_time
    return results, pubkey, time_needed


def serverSerializeData(filename):
    results, pubkey, time_needed = computeData(filename)
    encrypted_data = {}
    encrypted_data["pubkey"] = {"n": pubkey.n}
    encrypted_data["values"] = (str(results.ciphertext()), results.exponent)
    serialized = json.dumps(encrypted_data)
    return serialized, time_needed

In [57]:
import os

dir_path = "enc_data"
count = 0
# Iterate directory
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        count += 1
print('File count in `enc_data`:', count)

File count in `enc_data`: 100


In [None]:
times = []
for i in range(2, min(count, desired)):
    output_file = "outputs/data_" + str(i) + ".json"
    input_file = "enc_data/data_" + str(i) + ".json"

    output_data, time_needed = serverSerializeData(input_file)
    times.append(time_needed)
    with open(output_file, "w") as file:
        json.dump(output_data, file)

print("Result generated successfully for all files!")

total_time = sum(times)
avg_time = total_time / len(times)

print("Time taken to get result on test data:")
print("--- %s seconds ---" % (avg_time))
print("--- %s milisec ---" % (avg_time * 1000))

In [None]:
dir_path = "outputs"
count = 0
# Iterate directory
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        count += 1
print('File count in `outputs`:', count)

In [None]:
def resultSerializeData(public_key, data):
    encrypted_data_list = [public_key.encrypt(x) for x in data]
    encrypted_data = {}
    encrypted_data["public_key"] = {"n": public_key.n}
    encrypted_data["values"] = [
        (str(x.ciphertext()), x.exponent) for x in encrypted_data_list
    ]
    serialized = json.dumps(encrypted_data)
    return serialized


def loadAnswer(filename):
    with open(filename, "r") as file:
        ans = json.load(file)
        answer = json.loads(ans)
        return answer

In [None]:
times = []
results = []
for i in range(min(count, desired)):
    output_file = "outputs/data_" + str(i) + ".json"

    answer_file = loadAnswer(output_file)
    answer_key = paillier.PaillierPublicKey(n=int(answer_file["pubkey"]["n"]))
    answer = paillier.EncryptedNumber(
        answer_key, int(answer_file["values"][0]), int(answer_file["values"][1])
    )
    if answer_key == pub_key:
        start_time = time.time()
        raw_output = priv_key.decrypt(answer)
        end_time = time.time()
        times.append(end_time - start_time)
        results.append(raw_output)
    
    
print("Result decrypted successfully for all files!")

total_time = sum(times)
avg_time = total_time / len(times)

print("Time taken to get result on test data:")
print("--- %s seconds ---" % (avg_time))
print("--- %s milisec ---" % (avg_time * 1000))