
## Model Architecture and training

In [1]:
import numpy as np

import torch
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load and preprocess data
data = pd.read_csv("resale-flat-price.csv")

# Handle missing values if any
data.dropna(inplace=True)
label_encodings = {}

# Encode categorical data
labelencoder = LabelEncoder()
data['town'] = labelencoder.fit_transform(data['town'])
label_encodings['town'] = dict(zip(labelencoder.classes_, labelencoder.transform(labelencoder.classes_)))
data['flat_type'] = labelencoder.fit_transform(data['flat_type'])
label_encodings['flat_type'] = dict(zip(labelencoder.classes_, labelencoder.transform(labelencoder.classes_)))

# Convert 'month' column to numerical values
data['month'] = pd.to_datetime(data['month'])
data['year'] = data['month'].dt.year
data['month'] = data['month'].dt.month

# Drop the noise
data.drop(columns=['block', 'street_name', 'storey_range', 'flat_model', 'remaining_lease', 'floor_area_sqm', 'lease_commence_date'], inplace=True)

# Split data into features and target
y = data['resale_price'].copy()
X = data.drop('resale_price', axis=1).copy()

# print(label_encodings)
print(X.head())

   month  town  flat_type  year
0      1     0          1  2017
1      1     0          2  2017
2      1     0          2  2017
3      1     0          2  2017
4      1     0          2  2017


In [2]:
import tqdm
import copy

# train-test split of the dataset
scaler = StandardScaler()
X_transformed = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, train_size=0.7, shuffle=True)
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32).reshape(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32).reshape(-1, 1)

model = nn.Sequential(
    nn.Linear(4, 12),
    nn.ReLU(),
    nn.Linear(12, 6),
    nn.Linear(6, 1)
    # nn.Linear(8, 24),
    # nn.ReLU(),
    # nn.Linear(24, 12),
    # nn.ReLU(),
    # nn.Liner(12,6),
    # nn.ReLU(),
    # nn.Linear(6,1) 
)

# loss function and optimizer
loss_fn = nn.MSELoss()  # mean square error
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# training parameters
n_epochs = 10   # number of epochs to run
batch_size = 5  # size of each batch
batch_start = torch.arange(0, len(X_train), batch_size)

# Hold the best model
best_mse = np.inf   # init to infinity
best_weights = None
history = []

# training loop
for epoch in range(n_epochs):
    model.train()
    with tqdm.tqdm(batch_start, unit="batch", mininterval=0, disable=True) as bar:
        bar.set_description(f"Epoch {epoch}")
        for start in bar:
            # take a batch
            X_batch = X_train[start:start+batch_size]
            y_batch = y_train[start:start+batch_size]
            # forward pass
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            # backward pass
            optimizer.zero_grad()
            loss.backward()
            # update weights
            optimizer.step()
            # print progress
            bar.set_postfix(mse=float(loss))
    # evaluate accuracy at end of each epoch
    model.eval()
    y_pred = model(X_test)
    mse = loss_fn(y_pred, y_test)
    mse = float(mse)
    history.append(mse)
    print(f'Epoch [{epoch+1}], Loss: {mse:.4f}')
    if mse < best_mse:
        best_mse = mse
        best_weights = copy.deepcopy(model.state_dict())

# restore model and return best accuracy
model.load_state_dict(best_weights)

Epoch [1], Loss: 273080320000.0000
Epoch [2], Loss: 263882375168.0000
Epoch [3], Loss: 241921359872.0000
Epoch [4], Loss: 204132171776.0000
Epoch [5], Loss: 151773085696.0000
Epoch [6], Loss: 92637626368.0000
Epoch [7], Loss: 42275205120.0000
Epoch [8], Loss: 18843754496.0000
Epoch [9], Loss: 15562397696.0000
Epoch [10], Loss: 15113315328.0000


<All keys matched successfully>

In [3]:
print(best_mse)
print(np.sqrt(best_mse))

15113315328.0
122936.22463700437


In [4]:
torch.save(model.state_dict(), 'model_state_dict')
model_scripted = torch.jit.script(model)
model_scripted.save('model_scripted.pt')

In [5]:
# data = {
#     'month': 4,
#     'town': 'ANG MO KIO',
#     'flat_type': '3 ROOM',
#     'year': 2024
# }

# data['town'] = label_encodings['town'][data['town']]
# data['flat_type'] = label_encodings['flat_type'][data['flat_type']]
# data = pd.DataFrame([data])
# data['year'] = data['year']
# print(data)
# data = scaler.transform(data)
# data = torch.tensor(data, dtype=torch.float32)
# price = model(data)
# print(price.item())

In [6]:
towns = label_encodings['town']
# print(towns)
flat_types = label_encodings['flat_type']
# print(flat_types)
months = list(range(1, 13))
# print(months)
years = [2024]

# Initialize results list
results = []
batch_data = []

# Accumulate data for prediction
for town in towns:
    for flat_type in flat_types:
        for month in months:
            for year in years:
                # Create data dictionary
                data_dict = {
                    'month': month,
                    'town': town,
                    'flat_type': flat_type,
                    'year': year
                }
                
                # Encode categorical features
                data_dict['town'] = label_encodings['town'][data_dict['town']]
                data_dict['flat_type'] = label_encodings['flat_type'][data_dict['flat_type']]
                
                # Create DataFrame with single row
                data = pd.DataFrame([data_dict])
                
                # Append data to batch_data
                batch_data.append(data)

# Concatenate all dataframes into one
all_data = pd.concat(batch_data, ignore_index=True)

# Scale the data
scaled_data = scaler.transform(all_data)

# Convert to tensor
tensor_data = torch.tensor(scaled_data, dtype=torch.float32)
print(all_data)
print(tensor_data.shape)
# Perform prediction
# prices = model(tensor_data)

# Store results
# for i, price in enumerate(prices):
#     results.append({
#         'town': all_data.at[i, 'town'],
#         'flat_type': all_data.at[i, 'flat_type'],
#         'month': all_data.at[i, 'month'],
#         'year': all_data.at[i, 'year'],
#         'price': price.item()
#     })
# Print or process results further as needed
# print(results)

      month  town  flat_type  year
0         1     0          0  2024
1         2     0          0  2024
2         3     0          0  2024
3         4     0          0  2024
4         5     0          0  2024
...     ...   ...        ...   ...
2179      8    25          6  2024
2180      9    25          6  2024
2181     10    25          6  2024
2182     11    25          6  2024
2183     12    25          6  2024

[2184 rows x 4 columns]
torch.Size([2184, 4])


In [7]:
# # Loop all the town, flat_type, and all months of 2024 to predict
# towns = label_encodings['town']
# # print(towns)
# flat_types = label_encodings['flat_type']
# # print(flat_types)
# months = list(range(1, 13))
# # print(months)
# years = [2024]

# results = []

# for town in towns:
#     for flat_type in flat_types:
#         for month in months:
#             for year in years:
#                 data = {
#                     'month': month,
#                     'town': town,
#                     'flat_type': flat_type,
#                     'year': year
#                 }
#                 data['town'] = label_encodings['town'][data['town']]
#                 data['flat_type'] = label_encodings['flat_type'][data['flat_type']]
#                 data = pd.DataFrame([data])

#                 # print(data)
#                 data = scaler.transform(data)
#                 data = torch.tensor(data, dtype=torch.float32)
#                 price = model(data)
#                 # print(price)
#                 results.append({
#                     'town': town,
#                     'flat_type': flat_type,
#                     'month': month,
#                     'year': year,
#                     'price': price.item()
#                 })

# # export the results to csv
# results = pd.DataFrame(results)
# results.to_csv('results.csv', index=False)
# print('Results exported to results.csv')

In [8]:
import os
import ezkl

model_path = os.path.join('proof/network.onnx')
compiled_model_path = os.path.join('proof/network.compiled')
pk_path = os.path.join('proof/test.pk')
vk_path = os.path.join('proof/test.vk')
settings_path = os.path.join('proof/settings.json')

witness_path = os.path.join('proof/witness.json')
data_path = os.path.join('proof/input.json')
results_csv_path = os.path.join('proof/results.csv')

In [9]:
# print(tensor_data.shape)

In [10]:
import json

# Flips the neural net into inference mode
model.eval()
model.to('cpu')

dummy_input = torch.randn(2184, 4)
torch.onnx.export(model, dummy_input, model_path, export_params=True)

x = tensor_data
data_array = ((x).detach().numpy()).reshape([-1]).tolist()
data_json = dict(input_data = [data_array])

# Serialize data into file:
json.dump( data_json, open(data_path, 'w' ))

# x = torch.randn(2, 4)
# print(x)
# # Flatten each row separately
# data_array = [row.tolist() for row in x]
# print("\nFlattened data array:")
# print(data_array)

# x = {
#     'month': 4,
#     'town': 'ANG MO KIO',
#     'flat_type': '3 ROOM',
#     'year': 2024
# }

# x['town'] = label_encodings['town'][x['town']]
# x['flat_type'] = label_encodings['flat_type'][x['flat_type']]
# x = pd.DataFrame([x])
# x['year'] = x['year']
# # print(data_array)
# x = scaler.transform(x)
# x = torch.tensor(data_array, dtype=torch.float32)

# print(tensor_data)
# x = torch.randn(2, 4)
# data_array = [row.tolist() for row in x]
# print(data_array)
# x = torch.randn(2, 4)

# print(data_array)

# print(data_json['inputs'])

In [11]:
run_args = ezkl.PyRunArgs()
run_args.variables = [("batch_size", 1)]
run_args.input_visibility = "public"
run_args.output_visibility = "public"
run_args.param_visibility = "private"

# TODO: Dictionary outputs
res = ezkl.gen_settings(model_path, settings_path, py_run_args=run_args)
assert res == True

# res = ezkl.calibrate_settings(data_path, model_path, settings_path, "resources")
# assert res == True

In [12]:
# cal_path = os.path.join("calibration.json")

# data_array = (torch.randn(10, *shape).detach().numpy()).reshape([-1]).tolist()

# data = dict(input_data = [data_array])

# # Serialize data into file:
# json.dump(data, open(cal_path, 'w'))

# ezkl.calibrate_settings(cal_path, model_path, settings_path, "resources")

In [13]:
res = ezkl.compile_circuit(model_path, compiled_model_path, settings_path)
assert res == True

In [14]:
# srs path
res = ezkl.get_srs( settings_path)

In [15]:
# now generate the witness file
# witness_path = "lstmwitness.json"
res = ezkl.gen_witness(data_path, compiled_model_path, witness_path)
assert os.path.isfile(witness_path)

# load the witness json file
with open(witness_path, 'r') as f:
    witness = json.load(f)
    
    # print(witness['pretty_elements']['rescaled_outputs'][0])
    prices = witness['pretty_elements']['rescaled_outputs'][0]
    results = []
    # Store results
    for i, price in enumerate(prices):
        town_index = all_data.at[i, 'town']
        flat_type_index = all_data.at[i, 'flat_type']
        town = list(label_encodings['town'].keys())[list(label_encodings['town'].values()).index(town_index)]
        flat_type = list(label_encodings['flat_type'].keys())[list(label_encodings['flat_type'].values()).index(flat_type_index)]
        
        results.append({
            'town': town,
            'flat_type': flat_type,
            'month': all_data.at[i, 'month'],
            'year': all_data.at[i, 'year'],
            'price': prices[i]
        })

    # print(results)
    # export the results to csv
    results = pd.DataFrame(results)
    results.to_csv(results_csv_path, index=False)
    print('Results exported.')

Results exported.


In [16]:
res = ezkl.mock(witness_path, compiled_model_path)
print(res)
assert res == True

True


In [17]:
# HERE WE SETUP THE CIRCUIT PARAMS
# WE GOT KEYS
# WE GOT CIRCUIT PARAMETERS
# EVERYTHING ANYONE HAS EVER NEEDED FOR ZK

res = ezkl.setup(
        compiled_model_path,
        vk_path,
        pk_path,

    )
print(res)

assert res == True
assert os.path.isfile(vk_path)
assert os.path.isfile(pk_path)
assert os.path.isfile(settings_path)

True


In [18]:
# GENERATE A PROOF

proof_path = os.path.join('test.pf')

res = ezkl.prove(
        witness_path,
        compiled_model_path,
        pk_path,
        proof_path,
        "single",
    )

print(res)
assert os.path.isfile(proof_path)

{'instances': [['35ffffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', '1fffffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', '4dfeffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', 'e200000000000000000000000000000000000000000000000000000000000000', '5affffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', '1fffffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', '4dfeffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', 'e200000000000000000000000000000000000000000000000000000000000000', '7fffffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', '1fffffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', '4dfeffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', 'e200000000000000000000000000000000000000000000000000000000000000', 'a4ffffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', '1fffffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', '4dfeffef93f5e1439170b97948e8332

In [19]:
# VERIFY IT
res = ezkl.verify(
        proof_path,
        settings_path,
        vk_path,

    )

assert res == True
print("verified")

verified
