In [1]:
pip install -r ../../requirements.txt

You should consider upgrading via the '/usr/local/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


NOTE: Currently, the circuit is too huge, we would do it by requesting covariance and stdev and just calculate it on verifier side instead! Other circuits in this example can run well!

In [2]:
import ezkl
import torch
from torch import nn
import json
import os
import time
import scipy
import numpy as np
import matplotlib.pyplot as plt
import statistics
import math

In [3]:
%run -i ../../zkstats/core.py

In [4]:
# init path
os.makedirs(os.path.dirname('shared/'), exist_ok=True)
os.makedirs(os.path.dirname('prover/'), exist_ok=True)
verifier_model_path = os.path.join('shared/verifier.onnx')
prover_model_path = os.path.join('prover/prover.onnx')
verifier_compiled_model_path = os.path.join('shared/verifier.compiled')
prover_compiled_model_path = os.path.join('prover/prover.compiled')
pk_path = os.path.join('shared/test.pk')
vk_path = os.path.join('shared/test.vk')
proof_path = os.path.join('shared/test.pf')
settings_path = os.path.join('shared/settings.json')
srs_path = os.path.join('shared/kzg.srs')
witness_path = os.path.join('prover/witness.json')
# this is private to prover since it contains actual data
sel_data_path = os.path.join('prover/sel_data.json')
# this is just dummy random value
sel_dummy_data_path = os.path.join('shared/sel_dummy_data.json')

=======================  ZK-STATS FLOW =======================

This example is not necessary. In fact, a person can just request cov(x,y), std(x), and std(y). Then just compute correlation on his own as well, but here we show that the code is composable enough to do all at once. 

In [5]:
data_path = os.path.join('data.json')
dummy_data_path = os.path.join('shared/dummy_data.json')

data = json.loads(open(data_path, "r").read())
x_vals = data['x']
y_vals = data['y']
x_vals_tensor = torch.reshape(torch.tensor(x_vals, dtype = torch.float64),(1,-1, 1))
y_vals_tensor = torch.reshape(torch.tensor(y_vals),(1,-1, 1))


create_dummy(data_path, dummy_data_path)
dummy_data = json.loads(open(dummy_data_path, "r").read())
dummy_x_vals = dummy_data['x']
dummy_y_vals = dummy_data['y']
dummy_x_vals_tensor = torch.reshape(torch.tensor(dummy_x_vals),(1,-1, 1))
dummy_y_vals_tensor = torch.reshape(torch.tensor(dummy_y_vals),(1,-1, 1))

real_corr = torch.tensor(statistics.correlation(x_vals, y_vals))
real_cov = torch.tensor(statistics.covariance(x_vals, y_vals))
x_mean = torch.mean(x_vals_tensor)
y_mean = torch.mean(y_vals_tensor)
x_std = torch.sqrt(torch.var(x_vals_tensor, correction = 1))
y_std = torch.sqrt(torch.var(y_vals_tensor, correction = 1))

print("corr: ",real_corr )
print("check: ", real_cov/(x_std*y_std))
print("x mean: ", x_mean)
print("y mean: ", y_mean)

dummy_corr = torch.tensor(statistics.correlation(dummy_x_vals, dummy_y_vals))
dummy_cov = torch.tensor(statistics.covariance(dummy_x_vals, dummy_y_vals))
dummy_x_mean = torch.mean(dummy_x_vals_tensor)
dummy_y_mean = torch.mean(dummy_y_vals_tensor)
dummy_x_std = torch.sqrt(torch.var(dummy_x_vals_tensor, correction = 1))
dummy_y_std = torch.sqrt(torch.var(dummy_y_vals_tensor, correction = 1))
print("dummy corr: ",dummy_corr )
print("dummy x mean: ", dummy_x_mean)
print("dummy y mean: ", dummy_y_mean)

corr:  tensor(0.7295)
check:  tensor(0.7295, dtype=torch.float64)
x mean:  tensor(24.5000, dtype=torch.float64)
y mean:  tensor(56.2012)
dummy corr:  tensor(0.1248)
dummy x mean:  tensor(5.6500)
dummy y mean:  tensor(5.5680)


In [6]:
    def covariance(X, Y, cov, x_mean, y_mean):
        x_mean_cons = torch.abs(torch.sum(X)-X.size()[1]*(x_mean))<=torch.abs(0.01*X.size()[1]*(x_mean))
        y_mean_cons = torch.abs(torch.sum(Y)-Y.size()[1]*(y_mean))<=torch.abs(0.01*Y.size()[1]*(y_mean))
        return (torch.logical_and(torch.logical_and(x_mean_cons,y_mean_cons), torch.abs(torch.sum((X-x_mean)*(Y-y_mean))-(X.size()[1]-1)*(cov))<0.01*(X.size()[1]-1)*(cov)), cov)

In [7]:
    def stdev(X, x_std, x_mean):
        x_mean_cons = torch.abs(torch.sum(X)-X.size()[1]*(x_mean))<=torch.abs(0.01*X.size()[1]*x_mean)
        return (torch.logical_and(torch.abs(torch.sum((X-x_mean)*(X-x_mean))-x_std*x_std*(X.size()[1]-1))<=torch.abs(0.02*x_std*x_std*(X.size()[1]-1)),x_mean_cons),x_std)

In [8]:
# precise float number is hard, so we calculate 100*correlation instead.
# Verifier/ data consumer side:
class verifier_model(nn.Module):
    def __init__(self):
        super(verifier_model, self).__init__()
        self.corr = nn.Parameter(data = dummy_corr, requires_grad = False)
        self.cov = nn.Parameter(data = dummy_cov, requires_grad = False)
        self.x_mean = nn.Parameter(data = dummy_x_mean, requires_grad = False)
        self.y_mean = nn.Parameter(data = dummy_y_mean, requires_grad = False)
        self.x_std = nn.Parameter(data = dummy_x_std, requires_grad = False)
        self.y_std = nn.Parameter(data = dummy_y_std, requires_grad = False)
    def forward(self,X,Y):
        #  need to enforce same length, not yet
        bool1, cov = covariance(X,Y, self.cov, self.x_mean, self.y_mean)
        bool2, x_std = stdev(X, self.x_std, self.x_mean)
        bool3, y_std = stdev(Y, self.y_std, self.y_mean)
        bool4 = torch.abs(cov - self.corr*x_std*y_std)<=0.01*cov
        return (torch.logical_and(torch.logical_and(bool1, bool2),torch.logical_and(bool3, bool4)), self.corr )

verifier_define_calculation(dummy_data_path, ['x', 'y'],sel_dummy_data_path,verifier_model, verifier_model_path)

  return fn(g, to_cast_func(g, input, False), to_cast_func(g, other, False))


In [12]:
# Prover/ data owner side
theory_output = torch.tensor(real_corr)
print("theory output: ", theory_output)
class prover_model(nn.Module):
    def __init__(self):
        super(prover_model, self).__init__()
        self.corr = nn.Parameter(data = real_corr, requires_grad = False)
        self.cov = nn.Parameter(data = real_cov, requires_grad = False)
        self.x_mean = nn.Parameter(data = x_mean, requires_grad = False)
        self.y_mean = nn.Parameter(data = y_mean, requires_grad = False)
        self.x_std = nn.Parameter(data = x_std, requires_grad = False)
        self.y_std = nn.Parameter(data = y_std, requires_grad = False)
    def forward(self,X,Y):
        #  need to enforce same length, not yet
        bool1, cov = covariance(X,Y, self.cov, self.x_mean, self.y_mean)
        bool2, x_std = stdev(X, self.x_std, self.x_mean)
        bool3, y_std = stdev(Y, self.y_std, self.y_mean)
        bool4 = torch.abs(cov - self.corr*x_std*y_std)<=0.01*cov
        return (torch.logical_and(torch.logical_and(bool1, bool2),torch.logical_and(bool3, bool4)), self.corr )

prover_gen_settings(data_path,['x', 'y'], sel_data_path, prover_model,prover_model_path, [3], "resources", settings_path)

theory output:  tensor(0.7295)
==== Generate & Calibrate Setting ====
scale:  [3]
setting:  {"run_args":{"tolerance":{"val":0.0,"scale":1.0},"input_scale":3,"param_scale":3,"scale_rebase_multiplier":10,"lookup_range":[-64240,14942],"logrows":17,"num_inner_cols":2,"variables":[["batch_size",1]],"input_visibility":{"Hashed":{"hash_is_public":true,"outlets":[]}},"output_visibility":"Public","param_visibility":"Private"},"num_rows":7872,"total_assignments":651,"total_const_size":16,"model_instance_shapes":[[1],[1]],"model_output_scales":[0,3],"model_input_scales":[3,3],"module_sizes":{"kzg":[],"poseidon":[7872,[2]],"elgamal":[0,[0]]},"required_lookups":["Abs",{"GreaterThan":{"a":0.0}}],"check_mode":"UNSAFE","version":"7.0.0","num_blinding_factors":null}


  theory_output = torch.tensor(real_corr)


In [13]:
# Here verifier & prover can concurrently call setup since all params are public to get pk.
# Here write as verifier function to emphasize that verifier must calculate its own vk to be sure
verifier_setup(verifier_model_path, verifier_compiled_model_path, settings_path,vk_path, pk_path )

print("=======================================")
# Prover generates proof
print("Theory output: ", theory_output)
prover_gen_proof(prover_model_path, sel_data_path, witness_path, prover_compiled_model_path, settings_path, proof_path, pk_path)

spawning module 0
spawning module 2


==== setting up ezkl ====


spawning module 0
spawning module 2


Time setup: 11.328320980072021 seconds
Theory output:  tensor(0.7295)
!@# compiled_model exists? True
!@# compiled_model exists? True
==== Generating Witness ====
witness boolean:  1.0
witness result 1 : 0.75
==== Generating Proof ====


spawning module 0
spawning module 2


proof:  {'instances': [[[4361588021930202802, 11452771843424695699, 17406203981540687110, 449645176536645986], [17475259421703607016, 4715985628661660007, 18118935885768207713, 749760773435915941], [12436184717236109307, 3962172157175319849, 7381016538464732718, 1011752739694698287], [14385415396251402209, 2429374486035521128, 12558163205804149944, 2583518171365219058]]], 'proof': '28a5f1df34661a88b2bbd09442323bcd93183407bcda9d7ab85c5572a82d073217710cf2a5f874a391e721199ad458be9ef3a06cee2d8ff1a20fafccf3ef4bf104301350ecc5772470ee90ebdfb873d689277133c97f9d3bf2eaf79998119946237315d006f9d3b64eee027853da53a7a41e8a7674878384385f2b933de2c678022228aa5f2d74db8e3f77fe51b9855b52762692b7b6bcd346aeb0038ccea3e915de17ef2da0bb88785cabb6ce69e00d09e85231cb6276b0c09c4238b8c0c0a720e871c01ea8449f1cdc6fc5d0654bb800b0f4cc71fb38f389752cba8a1a6e8f2e310814631629e4b932c1fbb7772e75b66687bcbb7c8cb65d9df9df0c4c2f290ffcbda8b44e703b0e6547925964d1151e08b0f243c4d55c00b92421ddf4ec4a23314aaa58737a3193c5e04ea765e87d3be1028

In [14]:
# Verifier verifies
verifier_verify(proof_path, settings_path, vk_path)

num_inputs:  2
prf instances:  [[[4361588021930202802, 11452771843424695699, 17406203981540687110, 449645176536645986], [17475259421703607016, 4715985628661660007, 18118935885768207713, 749760773435915941], [12436184717236109307, 3962172157175319849, 7381016538464732718, 1011752739694698287], [14385415396251402209, 2429374486035521128, 12558163205804149944, 2583518171365219058]]]
proof boolean:  1.0
proof result 1 : 0.75
verified
