In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the CSV file
df = pd.read_csv('validator_data.csv')

# Function to create lag features
def create_lag_features(df, lag=1):
    for i in range(1, lag + 1):
        df[f'lag_{i}'] = df['effectiveness'].shift(i)
    return df

# Create lag features for the past 5 days
df = df.groupby('id').apply(create_lag_features, lag=7)
df = df.dropna().reset_index(drop=True)
# print(df.head())
 
# Define features and target
# features = [col for col in df.columns if 'lag_' in col]
features = ['lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7']
target = 'effectiveness'

# Split the data into training and testing sets
train_df = df[df['partition_date'] < '2024-05-02']
test_df = df[df['partition_date'] >= '2024-05-02']

X_train = train_df[features]
y_train = train_df[target]
X_test = test_df[features]
y_test = test_df[target]

# Initialize and train the XGBoost model
model = LinearRegression()
model.fit(X_train, y_train)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df = df.groupby('id').apply(create_lag_features, lag=7)


In [2]:
X_test.iloc[0]

lag_1    0.980643
lag_2    0.979649
lag_3    0.975971
lag_4    0.974366
lag_5    0.970623
lag_6    0.965317
lag_7    0.979038
Name: 3650, dtype: float64

In [4]:
from sklearn.metrics import mean_squared_error as MSE 
# Predict the model 
pred = model.predict(X_test) 

# RMSE Computation 
rmse = np.sqrt(MSE(y_test, pred)) 
print("RMSE : % f" %(rmse)) 

RMSE :  0.008284


In [5]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

# Define the initial types for the ONNX model
initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))]

# Convert the scikit-learn model to ONNX
onnx_model = convert_sklearn(model, initial_types=initial_type)

# Save the ONNX model to a file
with open("lr_gza.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

In [6]:
!giza transpile lr_gza.onnx --output-path verifiable_lr

[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m17:09:34[0m.[1;36m839[0m[1m][0m No model id provided, checking if model exists ‚úÖ
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m17:09:34[0m.[1;36m843[0m[1m][0m Model name is: lr_gza
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m17:09:39[0m.[1;36m626[0m[1m][0m Model Created with id -> [1;36m822[0m! ‚úÖ
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m17:09:43[0m.[1;36m306[0m[1m][0m Version Created with id -> [1;36m1[0m! ‚úÖ
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m17:09:43[0m.[1;36m307[0m[1m][0m Sending model for transpilation ‚úÖ 
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m17:10:33[0m.[1;

In [8]:
!giza endpoints deploy --model-id 822 --version-id 1

[2K‚ñ∞‚ñ±‚ñ±‚ñ±‚ñ±‚ñ±‚ñ± Creating endpoint!t!
[?25h[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m17:15:07[0m.[1;36m472[0m[1m][0m Endpoint is successful ‚úÖ
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m17:15:07[0m.[1;36m476[0m[1m][0m Endpoint created with id -> [1;36m373[0m ‚úÖ
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m17:15:07[0m.[1;36m476[0m[1m][0m Endpoint created with endpoint URL: [4;94mhttps://endpoint-ashq-822-1-cf32420a-7i3yxzspbq-ew.a.run.app[0m üéâ


In [6]:
from giza.agents.model import GizaModel

MODEL_ID = 822  # Update with your model ID
VERSION_ID = 1  # Update with your version ID

def prediction(input, model_id, version_id):
    model = GizaModel(id=model_id, version=version_id)

    (result, proof_id) = model.predict(
        input_feed={'input': input}, verifiable=True
    )

    return result, proof_id

def execution():
    # The input data type should match the model's expected input
    input = np.array([[0.980643, 0.979649, 0.975971, 0.974366, 0.970623, 0.965317, 0.979038]]).astype(np.float32)

    (result, proof_id) = prediction(input, MODEL_ID, VERSION_ID)

    print(
        f"Predicted value for input {input.flatten()[0]} is {result[0].flatten()[0]}")

    return result, proof_id


execution()

  from .autonotebook import tqdm as notebook_tqdm


üöÄ Starting deserialization process...
‚úÖ Deserialization completed! üéâ
Predicted value for input 0.9806429743766785 is 0.977325439453125


(array([[0.97732544]]), 'ba403e8ba0db4e1fa1b75ac3e2d3177c')

In [15]:
!giza verify --proof-id 373

[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m17:17:00[0m.[1;36m005[0m[1m][0m Verifying proof[33m...[0m
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m17:17:03[0m.[1;36m602[0m[1m][0m Verification result: [3;92mTrue[0m
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m14[0m [1;92m17:17:03[0m.[1;36m603[0m[1m][0m Verification time: [1;36m0.451240747[0m


In [11]:
df.head()

Unnamed: 0,id,total_validators,p_active_slots,p_created_blocks,non_empty_block,empty_block,vanilla_blocks,a_active_epochs,a_attested_epochs,a_attested_source,...,att_effectiveness,effectiveness,partition_date,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7
0,network,907305,7200,7141,7121.0,0.0,637.0,203555453,202525961,202542456,...,0.958115,0.969707,2024-01-24,0.962516,0.94852,0.946696,0.977647,0.97868,0.977154,0.971395
1,Kukis Global,10001,85,85,85.0,0.0,2.0,2250225,2250199,2249655,...,0.979543,0.987215,2024-01-24,0.982342,0.974881,0.975008,0.98605,0.991527,0.989457,0.980635
2,ParaFi Technologies LLC,5982,39,37,37.0,0.0,0.0,1336358,1336313,1336041,...,0.980181,0.968382,2024-01-24,0.973834,0.974682,0.809424,0.990879,0.985424,0.989861,0.985903
3,Blockscape,8830,89,89,88.0,0.0,0.0,1986750,1985181,1985842,...,0.975361,0.980387,2024-01-24,0.980054,0.971271,0.970209,0.984391,0.989299,0.986823,0.983941
4,Figment,8928,62,62,62.0,0.0,0.0,2008800,2008537,2007946,...,0.976136,0.985085,2024-01-24,0.976818,0.967221,0.972273,0.989095,0.983391,0.986877,0.958156


In [16]:
latest_date = df['partition_date'].max()
latest_record = df[(df['id'] == 'Figment') & (df['partition_date'] == latest_date)]
lag_features = latest_record[features].values

model = GizaModel(id=822, version=1)
input = lag_features.astype(np.float32)
(result, proof_id) = model.predict(
    input_feed={'input': input}, verifiable=True, job_size='XL'
)
result[0][0]

üöÄ Starting deserialization process...
‚úÖ Deserialization completed! üéâ


0.966522216796875

In [17]:
latest_date = df['partition_date'].max()
latest_record = df[(df['id'] == 'Figment') & (df['partition_date'] == latest_date)]
# join 2 dataframes
latest_record2 = df[(df['id'] == 'P2P.org Lido') & (df['partition_date'] == latest_date)]
latest_rec = pd.concat([latest_record, latest_record2])

lag_features = latest_rec[features].values
# latest_record = df[(df['id'] == 'P2P.org Lido') & (df['partition_date'] == latest_date)]
model = GizaModel(id=782, version=1)
input = lag_features.astype(np.float32)
(result, proof_id) = model.predict(
    input_feed={'input': input}, verifiable=True, job_size='L'
)
# result[0][0]
result, proof_id

üöÄ Starting deserialization process...
‚úÖ Deserialization completed! üéâ


(array([[0.96656799],
        [0.98202515]]),
 '56bf410363de436eba80a1c73456c549')

In [7]:
# latest date
latest_date = df['partition_date'].max()
unique_ids = df['id'].unique()
model = GizaModel(id=822, version=1)

validators = df[df['partition_date']==latest_date]

# for id in unique_ids:
    # Get the latest record for each validator
    # latest_record = df[(df['id'] == id) & (df['partition_date'] == latest_date)]
    # pd.concat([test, latest_record[features]])
    
    # Get the lag features for the latest record
    # lag_features = latest_record[features].values
    # input = lag_features.astype(np.float32)
    # Predict the effectiveness for the latest record
    # (result, proof_id) = model.predict(
    #     input_feed={'input': input}, verifiable=True, job_size='XL'
    # )
    # Update the effectiveness for the latest record
    # df.loc[(df['id'] == id) & (df['partition_date'] == latest_date), 'pred_effectiveness'] = result[0][0]
validators.head()

Unnamed: 0,id,total_validators,p_active_slots,p_created_blocks,non_empty_block,empty_block,vanilla_blocks,a_active_epochs,a_attested_epochs,a_attested_source,...,att_effectiveness,effectiveness,partition_date,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7
4848,Launchnodes,3062,24,24,24.0,0.0,0.0,688950,688903,688760,...,0.975673,0.984796,2024-06-02,0.98544,0.985436,0.982277,0.979982,0.984333,0.983956,0.985236
4849,network,296087,2089,2088,2083.0,0.0,254.0,66553690,66530018,66510859,...,0.968694,0.979356,2024-06-02,0.980668,0.981052,0.974743,0.972522,0.978541,0.978494,0.978385
4850,Staking Facilities,8400,51,51,50.0,0.0,0.0,1890000,1889188,1888308,...,0.96199,0.968891,2024-06-02,0.977592,0.982853,0.978015,0.974768,0.984854,0.982768,0.981518
4851,Blockscape,8895,58,58,58.0,0.0,0.0,1998470,1998389,1998065,...,0.979372,0.987108,2024-06-02,0.987899,0.987766,0.978826,0.981397,0.986767,0.973869,0.987814
4852,RockLogic GmbH,8892,67,67,67.0,0.0,0.0,1998080,1996333,1994654,...,0.946242,0.966401,2024-06-02,0.97191,0.971303,0.959734,0.958134,0.954854,0.95184,0.941334


In [9]:
lag_features = validators[features].values
input = lag_features.astype(np.float32)
model = GizaModel(id=822, version=1)
(result, proof_id) = model.predict(
        input_feed={'input': input}, verifiable=True, job_size='L'
)
print("Result: ", result, " Proof Id: ",  proof_id)

üöÄ Starting deserialization process...
‚úÖ Deserialization completed! üéâ
Result:  [[0.98197937]
 [0.97787476]
 [0.97729492]
 [0.9836731 ]
 [0.9692688 ]
 [0.97772217]
 [0.98010254]
 [0.98199463]
 [0.98199463]
 [0.97454834]
 [0.98246765]
 [0.96160889]
 [0.98382568]
 [0.96821594]
 [0.97994995]
 [0.98374939]
 [0.98075867]
 [0.97900391]
 [0.98158264]
 [0.98228455]
 [0.96652222]
 [0.98109436]
 [0.98309326]
 [0.98162842]
 [0.98200989]
 [0.98217773]
 [0.96722412]
 [0.98187256]
 [0.96720886]
 [0.98397827]
 [0.9835968 ]
 [0.9803009 ]
 [0.9826355 ]
 [0.97305298]
 [0.95765686]
 [0.97964478]
 [0.98278809]
 [0.97851562]
 [0.9755249 ]]  Proof Id:  cc7a0d8752e84ff0a4e079aa44e4e174


In [13]:
result.flatten()

array([0.98197937, 0.97787476, 0.97729492, 0.9836731 , 0.9692688 ,
       0.97772217, 0.98010254, 0.98199463, 0.98199463, 0.97454834,
       0.98246765, 0.96160889, 0.98382568, 0.96821594, 0.97994995,
       0.98374939, 0.98075867, 0.97900391, 0.98158264, 0.98228455,
       0.96652222, 0.98109436, 0.98309326, 0.98162842, 0.98200989,
       0.98217773, 0.96722412, 0.98187256, 0.96720886, 0.98397827,
       0.9835968 , 0.9803009 , 0.9826355 , 0.97305298, 0.95765686,
       0.97964478, 0.98278809, 0.97851562, 0.9755249 ])

In [11]:
np.argmax(result.flatten())

29

In [12]:
# find max id
validator = validators.iloc[np.argmax(result.flatten())]
print(f"Validator with the highest predicted effectiveness: {validator['id']} with a predicted effectiveness of {result.flatten().max()}")

Validator with the highest predicted effectiveness: RockawayX Infra with a predicted effectiveness of 0.983978271484375


In [106]:
!giza endpoints logs -e {349} 

[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m10[0m [1;92m13:32:44[0m.[1;36m672[0m[1m][0m Getting logs for endpoint [1;36m349[0m ‚úÖ 
[2m2024-06-10T06:15:36.219967Z[0m [32m INFO[0m [2morion_runner[0m[2m:[0m ‚úÖ Sierra program downloaded successfully!
[2m2024-06-10T06:15:36.220032Z[0m [32m INFO[0m [2morion_runner[0m[2m:[0m üöÄ Server running on 0.0.0.0:8080
Default STARTUP TCP probe succeeded after 1 attempt for container "orion-runner-1" on port 8080.
[2m2024-06-10T06:17:20.195965Z[0m [32m INFO[0m [2morion_runner[0m[2m:[0m üîß Running Sierra program with request ID: 5716e3512f5d46389e5ac071be9233db
[2m2024-06-10T06:17:20.552201Z[0m [32m INFO[0m [2morion_runner::handlers[0m[2m:[0m ‚úÖ Cairo program ran successfully with result: "[1 1] [64052 false]"
[2m2024-06-10T06:17:20.552262Z[0m [32m INFO[0m [2morion_runner::handlers[0m[2m:[0m üì° Sending callback to https://model-registry-service-7i3yxzspbq-ew.