In [6]:
pip install -r ../../requirements.txt


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


NOTE: Currently, the circuit is too huge, we would do it by requesting covariance and stdev and just calculate it on verifier side instead! Other circuits in this example can run well!

In [7]:
import ezkl
import torch
from torch import nn
import json
import os
import time
import scipy
import numpy as np
import matplotlib.pyplot as plt
import statistics
import math

In [8]:
%run -i ../../zkstats/core.py

In [9]:
# init path
os.makedirs(os.path.dirname('shared/'), exist_ok=True)
os.makedirs(os.path.dirname('prover/'), exist_ok=True)
verifier_model_path = os.path.join('shared/verifier.onnx')
prover_model_path = os.path.join('prover/prover.onnx')
verifier_compiled_model_path = os.path.join('shared/verifier.compiled')
prover_compiled_model_path = os.path.join('prover/prover.compiled')
pk_path = os.path.join('shared/test.pk')
vk_path = os.path.join('shared/test.vk')
proof_path = os.path.join('shared/test.pf')
settings_path = os.path.join('shared/settings.json')
srs_path = os.path.join('shared/kzg.srs')
witness_path = os.path.join('prover/witness.json')
# this is private to prover since it contains actual data
sel_data_path = os.path.join('prover/sel_data.json')
# this is just dummy random value
sel_dummy_data_path = os.path.join('shared/sel_dummy_data.json')

=======================  ZK-STATS FLOW =======================

This example is not necessary. In fact, a person can just request cov(x,y), std(x), and std(y). Then just compute correlation on his own as well, but here we show that the code is composable enough to do all at once. 

In [10]:
data_path = os.path.join('data.json')
dummy_data_path = os.path.join('shared/dummy_data.json')

data = json.loads(open(data_path, "r").read())
x_vals = data['x']
y_vals = data['y']
x_vals_tensor = torch.reshape(torch.tensor(x_vals, dtype = torch.float64),(1,-1, 1))
y_vals_tensor = torch.reshape(torch.tensor(y_vals),(1,-1, 1))


create_dummy(data_path, dummy_data_path)
dummy_data = json.loads(open(dummy_data_path, "r").read())
dummy_x_vals = dummy_data['x']
dummy_y_vals = dummy_data['y']
dummy_x_vals_tensor = torch.reshape(torch.tensor(dummy_x_vals),(1,-1, 1))
dummy_y_vals_tensor = torch.reshape(torch.tensor(dummy_y_vals),(1,-1, 1))

real_corr = torch.tensor(statistics.correlation(x_vals, y_vals))
real_cov = torch.tensor(statistics.covariance(x_vals, y_vals))
x_mean = torch.mean(x_vals_tensor)
y_mean = torch.mean(y_vals_tensor)
x_std = torch.sqrt(torch.var(x_vals_tensor, correction = 1))
y_std = torch.sqrt(torch.var(y_vals_tensor, correction = 1))

print("corr: ",real_corr )
print("check: ", real_cov/(x_std*y_std))
print("x mean: ", x_mean)
print("y mean: ", y_mean)

dummy_corr = torch.tensor(statistics.correlation(dummy_x_vals, dummy_y_vals))
dummy_cov = torch.tensor(statistics.covariance(dummy_x_vals, dummy_y_vals))
dummy_x_mean = torch.mean(dummy_x_vals_tensor)
dummy_y_mean = torch.mean(dummy_y_vals_tensor)
dummy_x_std = torch.sqrt(torch.var(dummy_x_vals_tensor, correction = 1))
dummy_y_std = torch.sqrt(torch.var(dummy_y_vals_tensor, correction = 1))
print("dummy corr: ",dummy_corr )
print("dummy x mean: ", dummy_x_mean)
print("dummy y mean: ", dummy_y_mean)

corr:  tensor(0.5182)
check:  tensor(0.5182, dtype=torch.float64)
x mean:  tensor(2.5000, dtype=torch.float64)
y mean:  tensor(21.6667)
dummy corr:  tensor(0.2342)
dummy x mean:  tensor(15.5000)
dummy y mean:  tensor(13.2500)


In [11]:
scales = [3]
selected_columns = ['x', 'y']
commitment_maps = get_data_commitment_maps(data_path, scales)

In [12]:
def covariance(X, Y, cov, x_mean, y_mean):
    x_mean_cons = torch.abs(torch.sum(X)-X.size()[1]*(x_mean))<=torch.abs(0.01*X.size()[1]*(x_mean))
    y_mean_cons = torch.abs(torch.sum(Y)-Y.size()[1]*(y_mean))<=torch.abs(0.01*Y.size()[1]*(y_mean))
    return (torch.logical_and(torch.logical_and(x_mean_cons,y_mean_cons), torch.abs(torch.sum((X-x_mean)*(Y-y_mean))-(X.size()[1]-1)*(cov))<0.01*(X.size()[1]-1)*(cov)), cov)

In [13]:
def stdev(X, x_std, x_mean):
    x_mean_cons = torch.abs(torch.sum(X)-X.size()[1]*(x_mean))<=torch.abs(0.01*X.size()[1]*x_mean)
    return (torch.logical_and(torch.abs(torch.sum((X-x_mean)*(X-x_mean))-x_std*x_std*(X.size()[1]-1))<=torch.abs(0.02*x_std*x_std*(X.size()[1]-1)),x_mean_cons),x_std)

In [14]:
# precise float number is hard, so we calculate 100*correlation instead.
# Verifier/ data consumer side:
class verifier_model(nn.Module):
    def __init__(self):
        super(verifier_model, self).__init__()
        self.corr = nn.Parameter(data = dummy_corr, requires_grad = False)
        self.cov = nn.Parameter(data = dummy_cov, requires_grad = False)
        self.x_mean = nn.Parameter(data = dummy_x_mean, requires_grad = False)
        self.y_mean = nn.Parameter(data = dummy_y_mean, requires_grad = False)
        self.x_std = nn.Parameter(data = dummy_x_std, requires_grad = False)
        self.y_std = nn.Parameter(data = dummy_y_std, requires_grad = False)
    def forward(self, *x):
        X, Y = x
        #  need to enforce same length, not yet
        bool1, cov = covariance(X,Y, self.cov, self.x_mean, self.y_mean)
        bool2, x_std = stdev(X, self.x_std, self.x_mean)
        bool3, y_std = stdev(Y, self.y_std, self.y_mean)
        bool4 = torch.abs(cov - self.corr*x_std*y_std)<=0.01*cov
        return (torch.logical_and(torch.logical_and(bool1, bool2),torch.logical_and(bool3, bool4)), self.corr )

verifier_define_calculation(dummy_data_path, selected_columns,sel_dummy_data_path,verifier_model, verifier_model_path)

  return fn(g, to_cast_func(g, input, False), to_cast_func(g, other, False))


In [15]:
# Prover/ data owner side
theory_output = torch.tensor(real_corr)
print("theory output: ", theory_output)
class prover_model(nn.Module):
    def __init__(self):
        super(prover_model, self).__init__()
        self.corr = nn.Parameter(data = real_corr, requires_grad = False)
        self.cov = nn.Parameter(data = real_cov, requires_grad = False)
        self.x_mean = nn.Parameter(data = x_mean, requires_grad = False)
        self.y_mean = nn.Parameter(data = y_mean, requires_grad = False)
        self.x_std = nn.Parameter(data = x_std, requires_grad = False)
        self.y_std = nn.Parameter(data = y_std, requires_grad = False)
    def forward(self, *x):
        X, Y = x
        #  need to enforce same length, not yet
        bool1, cov = covariance(X,Y, self.cov, self.x_mean, self.y_mean)
        bool2, x_std = stdev(X, self.x_std, self.x_mean)
        bool3, y_std = stdev(Y, self.y_std, self.y_mean)
        bool4 = torch.abs(cov - self.corr*x_std*y_std)<=0.01*cov
        return (torch.logical_and(torch.logical_and(bool1, bool2),torch.logical_and(bool3, bool4)), self.corr )

prover_gen_settings(data_path, selected_columns, sel_data_path, prover_model,prover_model_path, scales, "resources", settings_path)

theory output:  tensor(0.5182)
==== Generate & Calibrate Setting ====
scale:  [3]
setting:  {"run_args":{"tolerance":{"val":0.0,"scale":1.0},"input_scale":3,"param_scale":3,"scale_rebase_multiplier":10,"lookup_range":[-3086,86],"logrows":12,"num_inner_cols":2,"variables":[["batch_size",1]],"input_visibility":{"Hashed":{"hash_is_public":true,"outlets":[]}},"output_visibility":"Public","param_visibility":"Private"},"num_rows":2624,"total_assignments":123,"total_const_size":16,"model_instance_shapes":[[1],[1]],"model_output_scales":[0,3],"model_input_scales":[3,3],"module_sizes":{"kzg":[],"poseidon":[2624,[2]],"elgamal":[0,[0]]},"required_lookups":["Abs",{"GreaterThan":{"a":0.0}}],"check_mode":"UNSAFE","version":"7.0.0","num_blinding_factors":null}


  theory_output = torch.tensor(real_corr)


In [16]:
# Here verifier & prover can concurrently call setup since all params are public to get pk.
# Here write as verifier function to emphasize that verifier must calculate its own vk to be sure
verifier_setup(verifier_model_path, verifier_compiled_model_path, settings_path,vk_path, pk_path )

print("=======================================")
# Prover generates proof
print("Theory output: ", theory_output)
prover_gen_proof(prover_model_path, sel_data_path, witness_path, prover_compiled_model_path, settings_path, proof_path, pk_path)

spawning module 0
spawning module 2


==== setting up ezkl ====


spawning module 0
spawning module 2
spawning module 0
spawning module 2


Time setup: 0.5547151565551758 seconds
Theory output:  tensor(0.5182)
==== Generating Witness ====
witness boolean:  1.0
witness result 1 : 0.5
==== Generating Proof ====
proof:  {'instances': [[[7878865789954254792, 13676651756402193216, 14598220794878025105, 2053479320262803094], [957313277933440172, 8558673717091004388, 16115511877586365498, 2713079561337169730], [12436184717236109307, 3962172157175319849, 7381016538464732718, 1011752739694698287], [7959790035488735211, 12951774245394433045, 16242874202584236123, 560012691975822483]]], 'proof': '20a2b3a65cb1b1260f55370fd47b9fce30db34bdc3911eabd86b121a8a255ed309b774babc91e067135b098dcb301970846b1d168fead22f63a8e3c535384e5e2404f7f58f7912a1517b809b38b87ac904c4c977cfd631676a2a336a068ae31a2a00ab4035d72f61caead42552612019fcbeeead6e25b873edbea960ebf88bc21037919ee4407f6790996c3930866fe75d43c4cdd260bb24942d28390cb449e6073772411cf01a8c175681fa48e6eccf6a8d3b3710d56fb5eac6975e5d3beadc23aed390e10b5be1638896d037d79d734df9523a0786f2283923b649b28f5

In [17]:
# Verifier verifies
verifier_verify(proof_path, settings_path, vk_path, selected_columns, commitment_maps)

0.5