In [14]:
import json
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error as mse

In [2]:
df = pd.read_csv('operators_data.csv')

In [3]:
df.head()

Unnamed: 0,operator_address,operator_id,percentage,quorum_id,total_batches,total_unsigned_batches,timestamp
0,0x00107cfdeaddc0a3160ed2f6fedd627f313e7b1a,0x57309111be2293fbcfd77e8479a4f2570f8956aae8cd...,0.0,0,144,0,1717934400
1,0x006b988f89579e5842bcd029955dfbfc334b6826,0x852f5c1ecf96472bb48fc8e9373b40c74ac3e2cdf32a...,0.0,1,144,0,1717934400
2,0x01a7c2568693d65a367fde016b48c63f6673d4dc,0x7e06688af02ac562ee27c843b7e688eb1c67b469b3e6...,0.0,1,144,0,1717934400
3,0x033bfb405e809a303df875a6e018f1a64e5dbae9,0x2e2ee953aa3c2a499640270eaaf2ba1973265f7e0d0b...,0.0625,0,144,9,1717934400
4,0x033bfb405e809a303df875a6e018f1a64e5dbae9,0x2e2ee953aa3c2a499640270eaaf2ba1973265f7e0d0b...,0.0625,1,144,9,1717934400


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13181 entries, 0 to 13180
Data columns (total 7 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   operator_address        13181 non-null  object 
 1   operator_id             13181 non-null  object 
 2   percentage              13181 non-null  float64
 3   quorum_id               13181 non-null  int64  
 4   total_batches           13181 non-null  int64  
 5   total_unsigned_batches  13181 non-null  int64  
 6   timestamp               13181 non-null  int64  
dtypes: float64(1), int64(4), object(2)
memory usage: 721.0+ KB


In [5]:
# change operator address to string
df['operator_address'] = df['operator_address'].astype(str)
df['operator_id'] = df['operator_id'].astype(str)

In [6]:
len(df), len(df['operator_address'].unique())

(13181, 263)

In [7]:
# drop rows in which operator_address and timestamp are same and quorum_id is 1. keep the one in which quoram_id is 0
df = df.drop_duplicates(subset=['operator_address', 'timestamp'], keep='first')

In [8]:
len(df), len(df['operator_address'].unique())

(9732, 263)

In [9]:
# sum of all percentage of a certain operator
df['total_percentage'] = df.groupby('operator_address')['percentage'].transform('sum')

# frequency of each operator
df['frequency'] = df.groupby('operator_address')['operator_address'].transform('count')

In [10]:
df.head()

Unnamed: 0,operator_address,operator_id,percentage,quorum_id,total_batches,total_unsigned_batches,timestamp,total_percentage,frequency
0,0x00107cfdeaddc0a3160ed2f6fedd627f313e7b1a,0x57309111be2293fbcfd77e8479a4f2570f8956aae8cd...,0.0,0,144,0,1717934400,0.019536,52
1,0x006b988f89579e5842bcd029955dfbfc334b6826,0x852f5c1ecf96472bb48fc8e9373b40c74ac3e2cdf32a...,0.0,1,144,0,1717934400,0.010856,17
2,0x01a7c2568693d65a367fde016b48c63f6673d4dc,0x7e06688af02ac562ee27c843b7e688eb1c67b469b3e6...,0.0,1,144,0,1717934400,0.007636,29
3,0x033bfb405e809a303df875a6e018f1a64e5dbae9,0x2e2ee953aa3c2a499640270eaaf2ba1973265f7e0d0b...,0.0625,0,144,9,1717934400,0.267125,20
5,0x047438c5ceaa6d47e8691b51f5b34c6d41a00e3d,0xdbf03be4c90917ec42220446cee2b1226ac96fd074c5...,0.0,1,144,0,1717934400,0.0,29


In [11]:
# order by timestamp
df = df.sort_values(by='timestamp')

In [18]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the CSV file
# df = pd.read_csv('operator_data.csv')

# Function to create lag features
def create_lag_features(df, lag=1):
    for i in range(1, lag + 1):
        df[f'lag_{i}'] = df['percentage'].shift(i)
    return df

# Create lag features for the past 5 days
df = df.groupby('operator_address').apply(create_lag_features, lag=7)
df = df.dropna().reset_index(drop=True)
# print(df.head())
 
# Define features and target
# features = [col for col in df.columns if 'lag_' in col]
features = ['lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'quorum_id', 'total_batches', 'total_unsigned_batches', 'frequency', 'total_percentage', 'timestamp']
target = 'percentage'

# Split the data into training and testing sets
train_df = df[df['timestamp'] < 1717934400]
test_df = df[df['timestamp'] >= 1717934400]

X_train = train_df[features]
Y_train = train_df[target]
X_test = test_df[features]
Y_test = test_df[target]

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df = df.groupby('operator_address').apply(create_lag_features, lag=7)


In [20]:
def train_model(
    X_train: pd.DataFrame,
    X_test: pd.DataFrame,
    Y_train: pd.DataFrame,
    Y_test: pd.DataFrame,
):
    model = nn.Sequential(
        nn.Linear(X_train.shape[1], 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 1),
    )

    # Loss and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.RMSprop(model.parameters())

    # Convert data to PyTorch tensors
    X_tensor = torch.tensor(X_train.values, dtype=torch.float32)
    y_tensor = torch.tensor(Y_train.values.reshape(-1, 1), dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)

    # Training loop
    epochs_trial = np.arange(100, 400, 4)
    batch_trial = np.arange(100, 400, 4)
    DL_pred = []
    DL_RMSE = []
    for i, j, k in zip(range(4), epochs_trial, batch_trial):
        for epoch in range(j):
            optimizer.zero_grad()
            outputs = model(X_tensor)
            loss = criterion(outputs, y_tensor)
            loss.backward()
            optimizer.step()

        with torch.no_grad():
            DL_predict = model(X_test_tensor).numpy()
            DL_RMSE.append(
                np.sqrt(mse(Y_test.values / 100, DL_predict.flatten() / 100))
            )
            DL_pred.append(DL_predict)
            print("DL_RMSE_{}:{:.6f}".format(i + 1, DL_RMSE[i]))

    return model


In [21]:
len(X_test), len(X_train), len(df)

(219, 4255, 4474)

In [22]:
def serialize_to_onnx(
    model: nn.Module, X_train: pd.DataFrame, save_path="torch_operator_model"
):
    # Ensure the model is in evaluation mode
    model.eval()

    # Dummy input matching the input size
    sample_input = torch.randn(
        1, X_train.shape[1]
    )  # Replace 1 with the batch size you'd like to use

    # Specify the path to save the ONNX model
    onnx_file_path = save_path + ".onnx"

    torch.onnx.export(
        model,
        sample_input,
        onnx_file_path,
        export_params=True,
        opset_version=10,
        do_constant_folding=True,
        input_names=["input"],
        output_names=["output"],
        dynamic_axes={
            "input": {0: "batch_size"},
            "output": {0: "batch_size"},
        },
    )
    print(f"Saved serialized ONNX model to {onnx_file_path}.")

In [23]:
model = train_model(X_train, X_test, Y_train, Y_test)
serialize_to_onnx(model, X_train)

DL_RMSE_1:0.040714
DL_RMSE_2:0.040714
DL_RMSE_3:0.058086
DL_RMSE_4:0.058086
Saved serialized ONNX model to torch_operator_model.onnx.


In [29]:
!giza transpile torch_operator_model.onnx --output-path verifiable_nn

[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m14:10:04[0m.[1;36m029[0m[1m][0m No model id provided, checking if model exists ✅
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m14:10:04[0m.[1;36m031[0m[1m][0m Model name is: torch_operator_model
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m14:10:07[0m.[1;36m631[0m[1m][0m Model Created with id -> [1;36m818[0m! ✅
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m14:10:10[0m.[1;36m382[0m[1m][0m Version Created with id -> [1;36m1[0m! ✅
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m14:10:10[0m.[1;36m383[0m[1m][0m Sending model for transpilation ✅ 
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m14:10:57[0

In [38]:
!giza endpoints deploy --model-id 818 --version-id 1

[2K▰▰▰▰▰▰▰ Creating endpoint!t!
[?25h[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m14:15:20[0m.[1;36m497[0m[1m][0m Endpoint is successful ✅
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m14:15:20[0m.[1;36m501[0m[1m][0m Endpoint created with id -> [1;36m371[0m ✅
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m14:15:20[0m.[1;36m502[0m[1m][0m Endpoint created with endpoint URL: [4;94mhttps://endpoint-ashq-818-1-7f03ffa7-7i3yxzspbq-ew.a.run.app[0m 🎉


In [41]:
input =  np.array([X_test.iloc[0]]).astype(np.float32)

In [42]:
input

array([[0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        1.4400000e+02, 0.0000000e+00, 5.3000000e+01, 7.5797118e-02,
        1.7179343e+09]], dtype=float32)

In [45]:
from giza.agents.model import GizaModel
from sklearn.metrics import mean_squared_error as MSE

MODEL_ID = 818  # Update with your model ID
VERSION_ID = 1  # Update with your version ID

def prediction(input, model_id, version_id):
    model = GizaModel(id=model_id, version=version_id)

    (result, proof_id) = model.predict(
        input_feed={'input': input}, verifiable=True
    )

    return result, proof_id

def execution():
    # The input data type should match the model's expected input
    # input = np.array([[0.980643, 0.979649, 0.975971, 0.974366, 0.970623, 0.965317, 0.979038, 875.450182]]).astype(np.float32)

    (result, proof_id) = prediction(input, MODEL_ID, VERSION_ID)

    print(
        f"Predicted value for input {input.flatten()[0]} is {result[0].flatten()[0]}")
    
    rmse = np.sqrt(MSE(Y_test, result))
    print("RMSE : % f" %(rmse))

    return result, proof_id


execution()

An error occurred in predict: 500 Server Error: Internal Server Error for url: https://endpoint-ashq-818-1-7f03ffa7-7i3yxzspbq-ew.a.run.app/cairo_run
Deployment predict error: Running the Cairo  programm failed
An error occurred in predict: 500 Server Error: Internal Server Error for url: https://endpoint-ashq-818-1-7f03ffa7-7i3yxzspbq-ew.a.run.app/cairo_run


HTTPError: 500 Server Error: Internal Server Error for url: https://endpoint-ashq-818-1-7f03ffa7-7i3yxzspbq-ew.a.run.app/cairo_run

In [44]:
!giza endpoints logs -e {371} 

[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m14:21:22[0m.[1;36m946[0m[1m][0m Getting logs for endpoint [1;36m371[0m ✅ 
[2m2024-06-14T08:45:11.247679Z[0m [32m INFO[0m [2morion_runner[0m[2m:[0m ✅ Sierra program downloaded successfully!
[2m2024-06-14T08:45:11.247758Z[0m [32m INFO[0m [2morion_runner[0m[2m:[0m 🚀 Server running on 0.0.0.0:8080
Default STARTUP TCP probe succeeded after 1 attempt for container "orion-runner-1" on port 8080.
[2m2024-06-14T08:45:43.948874Z[0m [32m INFO[0m [2morion_runner[0m[2m:[0m 🔧 Running Sierra program with request ID: fd3e370210494a87bd26404ce59b76d6
The error: Program panicked with [Felt(FieldElement { value: UnsignedInteger { limbs: [153902630003410548, 18443949052653605432, 14829000198995122502, 11775634334075962405] } })]
[2m2024-06-14T08:45:45.236224Z[0m [31mERROR[0m [2morion_runner::handlers[0m[2m:[0m ⛔️ Failed to run Sierra program: Program panicked with [Felt(F