In [1]:
import pandas as pd
import numpy as np
import glob
from hashlib import sha256

from Pyfhel import Pyfhel, PyCtxt

In [3]:
def init_scheme():
    ckks_settings = glob.glob('ckks-*')
    if len(ckks_settings) == 5:
        HE = Pyfhel()
        HE.load_context('ckks-context')
        HE.load_public_key('ckks-pubkey')
        HE.load_secret_key('ckks-seckey')
        HE.load_relin_key('ckks-relinkey')
        HE.load_rotate_key('ckks-rotate-key')
    else:
        HE = Pyfhel()
        HE.contextGen(scheme="ckks", n=2**14, scale=2**30, qi_sizes=[60, 30, 30, 30, 60])
        HE.keyGen()
        HE.relinKeyGen()
        HE.rotateKeyGen()

        HE.save_context('ckks-context')
        HE.save_public_key('ckks-pubkey')
        HE.save_secret_key('ckks-seckey')
        HE.save_relin_key('ckks-relinkey')
        HE.save_rotate_key('ckks-rotate-key')
    
    return HE

In [4]:
HE = init_scheme()

In [5]:
data = pd.read_csv('data.csv')
data

Unnamed: 0,Bankrupt,ROA(C) before interest and depreciation before interest,ROA(A) before interest and % after tax,ROA(B) before interest and depreciation after tax,Operating Gross Margin,Realized Sales Gross Margin,Operating Profit Rate,Pre-tax net Interest Rate,After-tax net Interest Rate,Non-industry income and expenditure/revenue,...,Net Income to Total Assets,Total assets to GNP price,No-credit Interval,Gross Profit to Sales,Net Income to Stockholder's Equity,Liability to Equity,Degree of Financial Leverage (DFL),Interest Coverage Ratio (Interest expense to EBIT),Net Income Flag,Equity to Liability
0,1,0.370594,0.424389,0.405750,0.601457,0.601457,0.998969,0.796887,0.808809,0.302646,...,0.716845,0.009219,0.622879,0.601453,0.827890,0.290202,0.026601,0.564050,1,0.016469
1,1,0.464291,0.538214,0.516730,0.610235,0.610235,0.998946,0.797380,0.809301,0.303556,...,0.795297,0.008323,0.623652,0.610237,0.839969,0.283846,0.264577,0.570175,1,0.020794
2,1,0.426071,0.499019,0.472295,0.601450,0.601364,0.998857,0.796403,0.808388,0.302035,...,0.774670,0.040003,0.623841,0.601449,0.836774,0.290189,0.026555,0.563706,1,0.016474
3,1,0.399844,0.451265,0.457733,0.583541,0.583541,0.998700,0.796967,0.808966,0.303350,...,0.739555,0.003252,0.622929,0.583538,0.834697,0.281721,0.026697,0.564663,1,0.023982
4,1,0.465022,0.538432,0.522298,0.598783,0.598783,0.998973,0.797366,0.809304,0.303475,...,0.795016,0.003878,0.623521,0.598782,0.839973,0.278514,0.024752,0.575617,1,0.035490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6814,0,0.493687,0.539468,0.543230,0.604455,0.604462,0.998992,0.797409,0.809331,0.303510,...,0.799927,0.000466,0.623620,0.604455,0.840359,0.279606,0.027064,0.566193,1,0.029890
6815,0,0.475162,0.538269,0.524172,0.598308,0.598308,0.998992,0.797414,0.809327,0.303520,...,0.799748,0.001959,0.623931,0.598306,0.840306,0.278132,0.027009,0.566018,1,0.038284
6816,0,0.472725,0.533744,0.520638,0.610444,0.610213,0.998984,0.797401,0.809317,0.303512,...,0.797778,0.002840,0.624156,0.610441,0.840138,0.275789,0.026791,0.565158,1,0.097649
6817,0,0.506264,0.559911,0.554045,0.607850,0.607850,0.999074,0.797500,0.809399,0.303498,...,0.811808,0.002837,0.623957,0.607846,0.841084,0.277547,0.026822,0.565302,1,0.044009


In [6]:
# Construction of encrypted dataset

enc_data = {}

for col_name in data.columns:
    hashed_col_name = sha256(col_name.encode()).hexdigest()
    col_data = data[col_name].to_numpy(copy=True)
    data_ctxt = HE.encrypt(col_data)
    enc_data[hashed_col_name] = (data_ctxt, len(col_data))

In [65]:
def cipher_sum(ctxt: PyCtxt, size: int):
    if size <= 1:
        return ctxt

    fold = size // 2
    if size % 2 == 0:
        return cipher_sum(ctxt + (ctxt << fold), fold)

    hold = ctxt.copy()
    return (hold << (size - 1)) + cipher_sum(ctxt + (ctxt << fold), fold)

def cipher_average(ctxt: PyCtxt, size: int):
    """
    returns a ciphertext, which the first element of the decrypted message is the average
    """
    avg_cipher = cipher_sum(ctxt, size) / size
    return avg_cipher

def cipher_inner_product(ctxt1: PyCtxt, ctxt2: PyCtxt, size: int):
    ctxt_mul = ~(ctxt1 * ctxt2)
    return cipher_sum(ctxt_mul, size)

def cipher_covariance(ctxt1: PyCtxt, ctxt2: PyCtxt, size: int):
    x_mean = cipher_average(ctxt1, size)
    y_mean = cipher_average(ctxt2, size)
    print(x_mean ** (0.5))
    print(x_mean.decrypt())
    print(y_mean.decrypt())

    return cipher_inner_product(ctxt1 - x_mean, ctxt2 - y_mean, size) / (size - 1)

In [80]:
def cipher_new_sum(ctxt: PyCtxt, size: int):
    d = ctxt.decrypt().round()
    print(d[:d.size])
    if size <= 1:
        return ctxt

    fold = size // 2
    if size % 2 == 0:
        return cipher_new_sum(ctxt + (ctxt << fold), fold)

    hold = ctxt.copy()
    return (hold << (size - 1)) + cipher_new_sum(ctxt + (ctxt << fold), fold)

cipher_new_sum(HE.encrypt(list(range(1, 6))), 6).decrypt().round()

[ 1.  2.  3. ... -0. -0. -0.]
[5. 7. 3. ... 1. 2. 3.]
[12. 10.  7. ...  3.  5.  8.]


array([15., 14., 12., ...,  6., 10., 15.])

In [90]:
l = HE.encrypt(list(range(10)))
print(l.decrypt().round()[:10])

l = l << 1
print(l.decrypt().round()[:10])
l = l >> 1
print(l.decrypt().round()[:10])
l = l << 1
print(l.decrypt().round()[:10])
l = l << 1
print(l.decrypt().round()[:10])
l = l << 1
print(l.decrypt().round()[:10])

[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
[1. 2. 3. 4. 5. 6. 7. 8. 9. 0.]
[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
[1. 2. 3. 4. 5. 6. 7. 8. 9. 0.]
[ 2.  3.  4.  5.  6.  7.  8.  9.  0. -0.]
[ 3.  4.  5.  6.  7.  8.  9.  0. -0. -0.]


In [67]:
# Test
numbers = list(range(1, 50))
avg = sum(numbers) / len(numbers)
print(sum(numbers), np.mean(numbers), np.var(numbers), np.cov(numbers, numbers)[0,1])
# cipher_average(HE.encrypt(np.array(numbers)), len(numbers)).decrypt()[0]

ctxt = HE.encrypt(np.array(numbers))

# cipher_inner_product(HE.encrypt(np.array(numbers)), HE.encrypt(np.array(numbers)), len(numbers))
cov = cipher_sum(ctxt, len(numbers))
cov.decrypt()

1225 25.0 200.0 204.16666666666666


array([1225.00058891, 1223.9987941 , 1222.00038908, ..., 1081.00068329,
       1128.0003969 , 1175.99994249])

In [39]:
np.cov(numbers, numbers)[0, 1], cov.decrypt()

(824.9999999999999,
 array([2123.832015  , 2099.56907757, 2076.29604944, ..., 1914.24088552,
        1981.22082799, 2051.06126423]))

In [9]:
def query(col_name: str, f: str | None = None):
    print(col_name)
    hashed_col_name = sha256(col_name.encode()).hexdigest()
    ctxt, size = enc_data[hashed_col_name]
    a = cipher_average(ctxt, size).decrypt()[0]
    return a

ogm_dec = query(" Operating Gross Margin")
print('decrypted', ogm_dec)
data[" Operating Gross Margin"].mean()

 Operating Gross Margin
decrypted 0.6079470865498188


0.607948038371902

In [10]:
ctxt, size = enc_data[sha256("Bankrupt".encode()).hexdigest()]

In [13]:
import sys
sys.getsizeof(ctxt)

1048705