In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the CSV file
df = pd.read_csv('validator_data.csv')

# Function to create lag features
def create_lag_features(df, lag=1):
    for i in range(1, lag + 1):
        df[f'lag_{i}'] = df['effectiveness'].shift(i)
    return df

# Create lag features for the past 5 days
df = df.groupby('id').apply(create_lag_features, lag=7)
df = df.dropna().reset_index(drop=True)
# print(df.head())
 
# Define features and target
# features = [col for col in df.columns if 'lag_' in col]
features = ['lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'total_reward']
target = 'effectiveness'

# Split the data into training and testing sets
train_df = df[df['partition_date'] < '2024-05-02']
test_df = df[df['partition_date'] >= '2024-05-02']

X_train = train_df[features]
y_train = train_df[target]
X_test = test_df[features]
y_test = test_df[target]

# Initialize and train the XGBoost model
model = LinearRegression()
model.fit(X_train, y_train)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df = df.groupby('id').apply(create_lag_features, lag=7)


In [2]:
X_test.iloc[0]

lag_1             0.980643
lag_2             0.979649
lag_3             0.975971
lag_4             0.974366
lag_5             0.970623
lag_6             0.965317
lag_7             0.979038
total_reward    875.450182
Name: 3650, dtype: float64

In [5]:
from sklearn.metrics import mean_squared_error as MSE 
# Predict the model 
pred = model.predict(X_test) 

# RMSE Computation 
rmse = np.sqrt(MSE(y_test, pred)) 
print("RMSE : % f" %(rmse)) 

RMSE :  0.008284


In [6]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

# Define the initial types for the ONNX model
initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))]

# Convert the scikit-learn model to ONNX
onnx_model = convert_sklearn(model, initial_types=initial_type)

# Save the ONNX model to a file
with open("linear_regression.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

In [4]:
!giza transpile linear_regression.onnx --output-path verifiable_lr

[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:42:31[0m.[1;36m248[0m[1m][0m No model id provided, checking if model exists ✅
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:42:31[0m.[1;36m249[0m[1m][0m Model name is: linear_regression
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:42:32[0m.[1;36m810[0m[1m][0m Model Created with id -> [1;36m782[0m! ✅
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:42:34[0m.[1;36m102[0m[1m][0m Version Created with id -> [1;36m1[0m! ✅
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:42:34[0m.[1;36m103[0m[1m][0m Sending model for transpilation ✅ 
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:43:19[0m.

In [7]:
!giza endpoints deploy --model-id 782 --version-id 1

[2K▰▰▰▰▰▱▱ Creating endpoint!t!
[?25h[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:45:52[0m.[1;36m044[0m[1m][0m Endpoint is successful ✅
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:45:52[0m.[1;36m048[0m[1m][0m Endpoint created with id -> [1;36m349[0m ✅
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:45:52[0m.[1;36m048[0m[1m][0m Endpoint created with endpoint URL: [4;94mhttps://endpoint-ashq-782-1-16576534-7i3yxzspbq-ew.a.run.app[0m 🎉


In [10]:
from giza.agents.model import GizaModel

MODEL_ID = 782  # Update with your model ID
VERSION_ID = 1  # Update with your version ID

def prediction(input, model_id, version_id):
    model = GizaModel(id=model_id, version=version_id)

    (result, proof_id) = model.predict(
        input_feed={'input': input}, verifiable=True
    )

    return result, proof_id

def execution():
    # The input data type should match the model's expected input
    input = np.array([[0.980643, 0.979649, 0.975971, 0.974366, 0.970623, 0.965317, 0.979038, 875.450182]]).astype(np.float32)

    (result, proof_id) = prediction(input, MODEL_ID, VERSION_ID)

    print(
        f"Predicted value for input {input.flatten()[0]} is {result[0].flatten()[0]}")

    return result, proof_id


execution()

Payload:  Tensor<FP16x16>
['[1 8] [64267 0 64202 0 63961 0 63856 0 63610 0 63263 0 64162 0 57373504 0]']
🚀 Starting deserialization process...
✅ Deserialization completed! 🎉
Predicted value for input 0.9806429743766785 is 0.97735595703125


(array([[0.97735596]]), 'c25307bdb25941c8b6301e2f65212f9b')

In [11]:
!giza endpoints get-proof --endpoint-id 349 --proof-id "c25307bdb25941c8b6301e2f65212f9b"

[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:50:18[0m.[1;36m371[0m[1m][0m Getting proof from endpoint [1;36m349[0m ✅ 
[1m{[0m
  [1;34m"id"[0m: [1;36m1188[0m,
  [1;34m"job_id"[0m: [1;36m1361[0m,
  [1;34m"metrics"[0m: [1m{[0m
    [1;34m"proving_time"[0m: [1;36m13.399027[0m
  [1m}[0m,
  [1;34m"created_date"[0m: [32m"2024-06-10T06:19:41.190629"[0m
[1m}[0m


In [12]:
! giza endpoints download-proof --endpoint-id 349 --proof-id "c25307bdb25941c8b6301e2f65212f9b" --output-path zklr.proof

[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:51:09[0m.[1;36m895[0m[1m][0m Getting proof from endpoint [1;36m349[0m ✅ 
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:51:15[0m.[1;36m665[0m[1m][0m Proof downloaded to zklr.proof ✅ 


In [13]:
!giza verify --proof-id 349

[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:51:16[0m.[1;36m359[0m[1m][0m Verifying proof[33m...[0m
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:51:18[0m.[1;36m290[0m[1m][0m Verification result: [3;92mTrue[0m
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m11:51:18[0m.[1;36m290[0m[1m][0m Verification time: [1;36m0.485350108[0m


In [14]:
df.head()

Unnamed: 0,id,total_validators,p_active_slots,p_created_blocks,non_empty_block,empty_block,vanilla_blocks,a_active_epochs,a_attested_epochs,a_attested_source,...,att_effectiveness,effectiveness,partition_date,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7
0,network,907305,7200,7141,7121.0,0.0,637.0,203555453,202525961,202542456,...,0.958115,0.969707,2024-01-24,0.962516,0.94852,0.946696,0.977647,0.97868,0.977154,0.971395
1,Kukis Global,10001,85,85,85.0,0.0,2.0,2250225,2250199,2249655,...,0.979543,0.987215,2024-01-24,0.982342,0.974881,0.975008,0.98605,0.991527,0.989457,0.980635
2,ParaFi Technologies LLC,5982,39,37,37.0,0.0,0.0,1336358,1336313,1336041,...,0.980181,0.968382,2024-01-24,0.973834,0.974682,0.809424,0.990879,0.985424,0.989861,0.985903
3,Blockscape,8830,89,89,88.0,0.0,0.0,1986750,1985181,1985842,...,0.975361,0.980387,2024-01-24,0.980054,0.971271,0.970209,0.984391,0.989299,0.986823,0.983941
4,Figment,8928,62,62,62.0,0.0,0.0,2008800,2008537,2007946,...,0.976136,0.985085,2024-01-24,0.976818,0.967221,0.972273,0.989095,0.983391,0.986877,0.958156


In [25]:
latest_date = df['partition_date'].max()
latest_record = df[(df['id'] == 'Figment') & (df['partition_date'] == latest_date)]
lag_features = latest_record[features].values

model = GizaModel(id=782, version=1)
input = lag_features.astype(np.float32)
(result, proof_id) = model.predict(
    input_feed={'input': input}, verifiable=True, job_size='XL'
)
result[0][0]

Payload:  Tensor<FP16x16>
['[1 8] [63400 0 63827 0 63577 0 62155 0 62148 0 62457 0 62217 0 1815227 0]']
🚀 Starting deserialization process...
✅ Deserialization completed! 🎉


0.9665679931640625

In [104]:
latest_date = df['partition_date'].max()
latest_record = df[(df['id'] == 'Figment') & (df['partition_date'] == latest_date)]
# join 2 dataframes
latest_record2 = df[(df['id'] == 'P2P.org Lido') & (df['partition_date'] == latest_date)]
latest_rec = pd.concat([latest_record, latest_record2])

lag_features = latest_rec[features].values
# latest_record = df[(df['id'] == 'P2P.org Lido') & (df['partition_date'] == latest_date)]
model = GizaModel(id=782, version=1)
input = lag_features.astype(np.float32)
(result, proof_id) = model.predict(
    input_feed={'input': input}, verifiable=True, job_size='L'
)
# result[0][0]
result, proof_id

Payload:  Tensor<FP16x16>
['[2 8] [63400 0 63827 0 63577 0 62155 0 62148 0 62457 0 62217 0 1815227 0 64596 0 64573 0 64333 0 64162 0 64468 0 64474 0 64395 0 1960349 0]']
🚀 Starting deserialization process...
✅ Deserialization completed! 🎉


(array([[0.96656799],
        [0.98202515]]),
 '9dcf8e4ab36041fda333e6cd96d0eef2')

In [80]:
# latest date
latest_date = df['partition_date'].max()
unique_ids = df['id'].unique()
model = GizaModel(id=782, version=1)

validators = df[df['partition_date']==latest_date]

# for id in unique_ids:
    # Get the latest record for each validator
    # latest_record = df[(df['id'] == id) & (df['partition_date'] == latest_date)]
    # pd.concat([test, latest_record[features]])
    
    # Get the lag features for the latest record
    # lag_features = latest_record[features].values
    # input = lag_features.astype(np.float32)
    # Predict the effectiveness for the latest record
    # (result, proof_id) = model.predict(
    #     input_feed={'input': input}, verifiable=True, job_size='XL'
    # )
    # Update the effectiveness for the latest record
    # df.loc[(df['id'] == id) & (df['partition_date'] == latest_date), 'pred_effectiveness'] = result[0][0]
validators.head()

Unnamed: 0,id,total_validators,p_active_slots,p_created_blocks,non_empty_block,empty_block,vanilla_blocks,a_active_epochs,a_attested_epochs,a_attested_source,...,effectiveness,partition_date,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,pred_effectiveness
4848,Launchnodes,3062,24,24,24.0,0.0,0.0,688950,688903,688760,...,0.984796,2024-06-02,0.98544,0.985436,0.982277,0.979982,0.984333,0.983956,0.985236,0.98201
4849,network,296087,2089,2088,2083.0,0.0,254.0,66553690,66530018,66510859,...,0.979356,2024-06-02,0.980668,0.981052,0.974743,0.972522,0.978541,0.978494,0.978385,0.977921
4850,Staking Facilities,8400,51,51,50.0,0.0,0.0,1890000,1889188,1888308,...,0.968891,2024-06-02,0.977592,0.982853,0.978015,0.974768,0.984854,0.982768,0.981518,0.977325
4851,Blockscape,8895,58,58,58.0,0.0,0.0,1998470,1998389,1998065,...,0.987108,2024-06-02,0.987899,0.987766,0.978826,0.981397,0.986767,0.973869,0.987814,0.983704
4852,RockLogic GmbH,8892,67,67,67.0,0.0,0.0,1998080,1996333,1994654,...,0.966401,2024-06-02,0.97191,0.971303,0.959734,0.958134,0.954854,0.95184,0.941334,


In [81]:
lag_features = validators[features].values
input = lag_features.astype(np.float32)
model = GizaModel(id=782, version=1)
(result, proof_id) = model.predict(
        input_feed={'input': input}, verifiable=True, job_size='L'
)
print("Result: ", result, " Proof Id: ",  proof_id)

Payload:  Tensor<FP16x16>
['[39 8] [64581 0 64581 0 64374 0 64224 0 64509 0 64484 0 64568 0 623110 0 64269 0 64294 0 63880 0 63735 0 64129 0 64126 0 64119 0 60159384 0 64067 0 64412 0 64095 0 63882 0 64543 0 64406 0 64324 0 1732198 0 64742 0 64734 0 64148 0 64316 0 64668 0 63823 0 64737 0 1736128 0 63695 0 63655 0 62897 0 62792 0 62577 0 62379 0 61691 0 1840848 0 64207 0 64298 0 64377 0 63741 0 64121 0 64491 0 64450 0 4822903 0 64377 0 64253 0 64512 0 64249 0 64659 0 64627 0 64746 0 2093314 0 64596 0 64573 0 64333 0 64162 0 64468 0 64474 0 64395 0 1960349 0 64596 0 64573 0 64333 0 64162 0 64468 0 64474 0 64395 0 1960349 0 64126 0 64249 0 61282 0 63766 0 63583 0 64167 0 63267 0 1916147 0 64632 0 64559 0 64424 0 64243 0 64546 0 64503 0 64179 0 1971628 0 62973 0 63113 0 62384 0 61511 0 63005 0 62658 0 62754 0 1815926 0 64757 0 64670 0 64544 0 64325 0 64619 0 64626 0 64654 0 1878481 0 63339 0 63583 0 63465 0 63409 0 63484 0 63677 0 63366 0 1887895 0 64477 0 64411 0 64145 0 64053 0 63964 0 

In [87]:
result.flatten()

array([0.98200989, 0.97792053, 0.97732544, 0.98370361, 0.96929932,
       0.97775269, 0.98013306, 0.98202515, 0.98202515, 0.97457886,
       0.98249817, 0.9616394 , 0.9838562 , 0.96824646, 0.97998047,
       0.98377991, 0.98078918, 0.97903442, 0.98161316, 0.98231506,
       0.96656799, 0.98112488, 0.98312378, 0.98165894, 0.98204041,
       0.98220825, 0.96725464, 0.98190308, 0.96723938, 0.98402405,
       0.98362732, 0.98033142, 0.98266602, 0.9730835 , 0.95770264,
       0.97967529, 0.9828186 , 0.97854614, 0.97555542])

In [90]:
np.argmax(result.flatten())

29

In [94]:
# find max id
validator = validators.iloc[np.argmax(result.flatten())]
print(f"Validator with the highest predicted effectiveness: {validator['id']} with a predicted effectiveness of {result.flatten().max()}")

Validator with the highest predicted effectiveness: RockawayX Infra with a predicted effectiveness of 0.9840240478515625


In [106]:
!giza endpoints logs -e {349} 

[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m13:32:44[0m.[1;36m672[0m[1m][0m Getting logs for endpoint [1;36m349[0m ✅ 
[2m2024-06-10T06:15:36.219967Z[0m [32m INFO[0m [2morion_runner[0m[2m:[0m ✅ Sierra program downloaded successfully!
[2m2024-06-10T06:15:36.220032Z[0m [32m INFO[0m [2morion_runner[0m[2m:[0m 🚀 Server running on 0.0.0.0:8080
Default STARTUP TCP probe succeeded after 1 attempt for container "orion-runner-1" on port 8080.
[2m2024-06-10T06:17:20.195965Z[0m [32m INFO[0m [2morion_runner[0m[2m:[0m 🔧 Running Sierra program with request ID: 5716e3512f5d46389e5ac071be9233db
[2m2024-06-10T06:17:20.552201Z[0m [32m INFO[0m [2morion_runner::handlers[0m[2m:[0m ✅ Cairo program ran successfully with result: "[1 1] [64052 false]"
[2m2024-06-10T06:17:20.552262Z[0m [32m INFO[0m [2morion_runner::handlers[0m[2m:[0m 📡 Sending callback to https://model-registry-service-7i3yxzspbq-ew.a.run.app/api/v