In [1]:
import pandas as pd
import numpy as np

In [3]:
# Read the processed Cleveland file from the UCI Heart Disease dataset and insert column names
# Since most research focuses on the binary case of disease existence and nonexistence, the target column is binary encoded

cleveland = pd.read_csv("processed.cleveland.data", header=None)
cleveland.columns = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal", "target"]
cleveland['target'] = cleveland['target'].apply(lambda x: 1 if x > 0 else 0)

In [None]:
cleveland

In [None]:
# Rows that have missing values are dropped

cleveland = cleveland.replace({'?': np.nan}).dropna().astype(float)
cleveland

In [None]:
X = np.array(cleveland.drop(['target'], 1), dtype=float)
y = np.array(cleveland['target'])

In [8]:
# Normalize the dataset

mean = X.mean(axis=0)
X -= mean
std = X.std(axis=0)
X /= std

In [9]:
import torch

X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)

In [10]:
# Deep neural network definition based on the work of Safial Islam Ayon et al.

import torch.nn as nn

class DNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(13, 14)
        self.act1 = nn.ReLU()
        self.layer2 = nn.Linear(14, 16)
        self.act2 = nn.ReLU()
        self.layer3 = nn.Linear(16, 16)
        self.act3 = nn.ReLU()
        self.layer4 = nn.Linear(16, 14)
        self.act4 = nn.ReLU()
        self.output = nn.Linear(14, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.act1(self.layer1(x))
        x = self.act2(self.layer2(x))
        x = self.act3(self.layer3(x))
        x = self.act4(self.layer4(x))
        x = self.sigmoid(self.output(x))
        return x

In [11]:
import copy
import torch.optim as optim
import tqdm

def model_train(model, X_train, y_train, X_val, y_val):
    loss_fn = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    n_epochs = 500
    batch_size = 10
    batch_start = torch.arange(0, len(X_train), batch_size)

    best_acc = -np.inf
    best_weights = None

    for epoch in range(n_epochs):
        model.train()

        with tqdm.tqdm(batch_start, unit="batch", mininterval=0, disable=False) as bar:
            bar.set_description(f"Epoch {epoch}")
            for start in bar:
                X_batch = X_train[start:start+batch_size]
                y_batch = y_train[start:start+batch_size]
                y_pred = model(X_batch)
                loss = loss_fn(y_pred, y_batch)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                acc = (y_pred.round() == y_batch).float().mean()
                bar.set_postfix(
                    loss=float(loss),
                    acc=float(acc)
                )

        model.eval()
        y_pred = model(X_val)
        acc = (y_pred.round() == y_val).float().mean()
        acc = float(acc)

        if acc > best_acc:
            best_acc = acc
            best_weights = copy.deepcopy(model.state_dict())

    model.load_state_dict(best_weights)
    return best_acc

In [None]:
from sklearn.model_selection import StratifiedKFold

kfold = StratifiedKFold(n_splits=5, shuffle=True)
cv_scores = []

for train, test in kfold.split(X, y):
    model = DNN()
    acc = model_train(model, X[train], y[train], X[test], y[test])
    print("Accuracy: %.2f" % acc)
    cv_scores.append(acc)

acc = np.mean(cv_scores)
print("Model accuracy: %.2f%%" % acc*100)

In [None]:
!pip install ezkl
!pip install onnx

In [14]:
import os
import json
import ezkl

In [15]:
# Define EZKL related file paths

model_path = os.path.join('network.onnx')
compiled_model_path = os.path.join('network.ezkl')
pk_path = os.path.join('test.pk')
vk_path = os.path.join('test.vk')
settings_path = os.path.join('settings.json')
witness_path = os.path.join('witness.json')
data_path = os.path.join('input.json')
cal_data_path = os.path.join('cal_data.json')

In [16]:
x = X[0].reshape(1, 13)

model.eval()

torch.onnx.export(model,
                  x,
                  model_path,
                  export_params=True,
                  opset_version=10,
                  do_constant_folding=True,
                  input_names = ['input'],
                  output_names = ['output'],
                  dynamic_axes={'input' : {0 : 'batch_size'}, 'output' : {0 : 'batch_size'}}
                  )

data_array = ((x).detach().numpy()).reshape([-1]).tolist()
data = dict(input_data = [data_array])
json.dump(data, open(data_path, 'w'))

cal_data = dict(input_data = X.flatten().tolist())
json.dump(data, open(cal_data_path, 'w'))

In [17]:
# Hashed visibility for the input means that only the Poseidon hash of the inputs will be known to the prover and verifier
# Fixed visibility for the parameters means that model weights are committed to and are used for all proofs

py_run_args = ezkl.PyRunArgs()
py_run_args.input_visibility = "hashed"
py_run_args.output_visibility = "public"
py_run_args.param_visibility = "fixed"

!RUST_LOG=trace
res = ezkl.gen_settings(model_path, settings_path, py_run_args=py_run_args)
assert res == True

res = ezkl.calibrate_settings(cal_data_path, model_path, settings_path, "resources")

In [18]:
res = ezkl.compile_circuit(model_path, compiled_model_path, settings_path)
assert res == True

In [19]:
# Retrieve the Structured Reference String

res = ezkl.get_srs(settings_path)

In [None]:
res = ezkl.setup(
        compiled_model_path,
        vk_path,
        pk_path,
    )


assert res == True
assert os.path.isfile(vk_path)
assert os.path.isfile(pk_path)
assert os.path.isfile(settings_path)

In [93]:
# Generate the witness for the proof

witness_path = os.path.join('witness.json')

res = ezkl.gen_witness(data_path, compiled_model_path, witness_path)
assert os.path.isfile(witness_path)

In [None]:
# Generate the proof

proof_path = os.path.join('proof.json')

proof = ezkl.prove(
        witness_path,
        compiled_model_path,
        pk_path,
        proof_path,
        "single",
    )

print(proof)
assert os.path.isfile(proof_path)

In [None]:
# Verify the generated proof using ezkl.verify()

res = ezkl.verify(
        proof_path,
        settings_path,
        vk_path,
    )

assert res == True
print("verified")

In [None]:
# solc must be set to version 0.8.20

!pip install solc-select
!solc-select install 0.8.20
!solc-select use 0.8.20
!solc --version

In [96]:
# Create the Solidity smart contract which can be deployed to EVM compatible chains

sol_code_path = os.path.join('Verifier.sol')
abi_path = os.path.join('Verifier.abi')

res = ezkl.create_evm_verifier(
        vk_path,
        settings_path,
        sol_code_path,
        abi_path
    )

assert res == True
assert os.path.isfile(sol_code_path)

In [None]:
# Generate inputs needed to perform the verification using the Solidity smart contract
# This includes the hashed input tesnor, output of the model and the generated proof

onchain_input_array = []

formatted_output = "["
for i, value in enumerate(proof["instances"]):
    for j, field_element in enumerate(value):
        onchain_input_array.append(ezkl.vecu64_to_felt(field_element))
        formatted_output += str(onchain_input_array[-1])
        if j != len(value) - 1:
            formatted_output += ", "
    formatted_output += "]"

print("proof: ", "0x" + proof["proof"])
print("instances: ", formatted_output)

In [None]:
# Generate the Poseidon hash for a specific model input tensor and compare it to the provided hash above
# This is done to assure users that the model was correctly run on their specific record data/input
# Set test_input to the desired input array containing the 13 record data values

test_input = []

test_tensor = torch.tensor(np.array(test_input), dtype=torch.float).reshape(1, 13)
test_tensor = test_tensor.unsqueeze(0)
test_tensor = test_tensor.reshape(1, 13)
data_array = ((test_tensor).detach().numpy()).reshape([-1]).tolist()

input_scale = 8
vecu64s = []
for i in data_array:
    vecu64s.append(ezkl.float_to_vecu64(i, input_scale))

print("serialized felts array", vecu64s)

hash = ezkl.poseidon_hash(vecu64s)
print("hash of serialized felts array", hash)

print("final Poseidon hash:", ezkl.vecu64_to_felt(hash[0]))