In [68]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the CSV file
df = pd.read_csv('validator_data.csv')

# Function to create lag features
def create_lag_features(df, lag=1):
    for i in range(1, lag + 1):
        df[f'lag_{i}'] = df['effectiveness'].shift(i)
    return df

# Create lag features for the past 5 days
df = df.groupby('id').apply(create_lag_features, lag=1)
df = df.dropna().reset_index(drop=True)
# print(df.head())

# Define features and target
features = [col for col in df.columns if 'lag_' in col]
target = 'effectiveness'

# Split the data into training and testing sets
train_df = df[df['partition_date'] < '2024-05-02']
test_df = df[df['partition_date'] >= '2024-05-02']

X_train = train_df[features]
y_train = train_df[target]
X_test = test_df[features]
y_test = test_df[target]

# Initialize and train the XGBoost model
model = XGBRegressor()
model.fit(X_train.values, y_train.values)

print(type(X_test.values), type(df), X_test.values.shape, X_train.values.shape)

# Make predictions
test_df['predicted_change'] = model.predict(X_test.values)

print(model.predict(X_test.values[:1]))

# Evaluate the model
mse = mean_squared_error(y_test, test_df['predicted_change'])
print(f'Mean Squared Error: {mse}')

# Select the stock with the highest predicted change for tomorrow
investment_stock = test_df.loc[test_df['predicted_change'].idxmax()]
print(f'Stock to invest in: {investment_stock["id"]}, Predicted Change: {investment_stock["predicted_change"]}')


<class 'numpy.ndarray'> <class 'pandas.core.frame.DataFrame'> (3561, 1) (1559, 1)
[0.9801609]
Mean Squared Error: 0.00024923739187480336
Stock to invest in: Everstake, Predicted Change: 0.9871302843093872


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df = df.groupby('id').apply(create_lag_features, lag=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['predicted_change'] = model.predict(X_test.values)


In [9]:
!pip install giza-sdk

Collecting giza-sdk
  Using cached giza_sdk-0.1.2-py3-none-any.whl (2.5 kB)
Collecting giza-agents>=0.4.2
  Using cached giza_agents-0.4.2-py3-none-any.whl (18 kB)
Collecting giza-cli>=0.17.0
  Using cached giza_cli-0.17.0-py3-none-any.whl (51 kB)
Collecting giza-datasets>=0.3.0
  Using cached giza_datasets-0.3.1-py3-none-any.whl (18 kB)
Collecting giza-zkcook<0.3.0,>=0.2.1
  Using cached giza_zkcook-0.2.2-py3-none-any.whl (17 kB)
Collecting ape-etherscan<0.8.0,>=0.7.2
  Using cached ape_etherscan-0.7.4-py3-none-any.whl (25 kB)
Collecting diskcache<6.0.0,>=5.6.3
  Using cached diskcache-5.6.3-py3-none-any.whl (45 kB)
Collecting distlib<0.4.0,>=0.3.8
  Using cached distlib-0.3.8-py2.py3-none-any.whl (468 kB)
Collecting eth-ape<0.8.0,>=0.7.10
  Using cached eth_ape-0.7.23-py3-none-any.whl (333 kB)
Collecting giza-osiris<1.0.0,>=0.2.8
  Using cached giza_osiris-0.2.8-py3-none-any.whl (12 kB)
Collecting httpx<0.26.0,>=0.25.1
  Using cached httpx-0.25.2-py3-none-any.whl (74 kB)
Collecting l

In [69]:
!giza users create-api-key

[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m09[0m [1;92m11:44:02[0m.[1;36m300[0m[1m][0m Creating API Key ✅ 
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m09[0m [1;92m11:44:03[0m.[1;36m721[0m[1m][0m Successfully created API Key. It will be used for future requests ✅


In [4]:
import sklearn
print(sklearn.__version__)

1.4.2


In [70]:
from giza.zkcook import serialize_model
serialize_model(model, "validator_data.json")

In [72]:
! giza transpile validator_data.json --output-path validator_data

[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m09[0m [1;92m11:49:02[0m.[1;36m755[0m[1m][0m No model id provided, checking if model exists ✅
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m09[0m [1;92m11:49:02[0m.[1;36m758[0m[1m][0m Model name is: validator_data
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m09[0m [1;92m11:49:03[0m.[1;36m672[0m[1m][0m Model already exists, using existing model ✅ 
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m09[0m [1;92m11:49:03[0m.[1;36m673[0m[1m][0m Model found with id -> [1;36m761[0m! ✅
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m09[0m [1;92m11:49:04[0m.[1;36m953[0m[1m][0m Version Created with id -> [1;36m2[0m! ✅
[2K[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m09[0m [1;92m11:49:04

In [73]:
! giza endpoints deploy --size XL --model-id 761 --version-id 2

[2K▰▰▰▱▱▱▱ Creating endpoint!t!
[?25h[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m09[0m [1;92m11:50:03[0m.[1;36m437[0m[1m][0m Endpoint is successful ✅
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m09[0m [1;92m11:50:03[0m.[1;36m441[0m[1m][0m Endpoint created with id -> [1;36m329[0m ✅
[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m09[0m [1;92m11:50:03[0m.[1;36m442[0m[1m][0m Endpoint created with endpoint URL: [4;94mhttps://endpoint-ashu-761-2-274f6734-7i3yxzspbq-ew.a.run.app[0m 🎉


In [56]:
print(X_test.values[0].shape)

(15,)


In [60]:
v = X_test.values[:1]
args = v[0]*100000
int_args = [int(arg) for arg in args]
print(int_args)
input = X_test.values[1, :]
print(X_test.head())
print(model.predict([args]))

[98759, 98082, 97765, 98141, 97745, 97501, 98390, 98056, 96446, 97040, 97703, 95983, 96340, 96707, 97315]
         lag_1     lag_2     lag_3     lag_4     lag_5     lag_6     lag_7  \
1054  0.987593  0.980825  0.977657  0.981416  0.977456  0.975018  0.983906   
1055  0.965672  0.984462  0.915772  0.984897  0.975365  0.985054  0.988435   
1056  0.983922  0.984427  0.980115  0.987708  0.984691  0.987880  0.989143   
1057  0.989389  0.983755  0.985990  0.987211  0.978822  0.987083  0.988192   
1058  0.984704  0.970356  0.977534  0.968923  0.956117  0.971122  0.956697   

         lag_8     lag_9    lag_10    lag_11    lag_12    lag_13    lag_14  \
1054  0.980564  0.964466  0.970404  0.977035  0.959832  0.963410  0.967075   
1055  0.984454  0.980887  0.975032  0.947271  0.972258  0.968034  0.971474   
1056  0.979856  0.981389  0.972380  0.963603  0.975064  0.971483  0.975633   
1057  0.985370  0.980067  0.977581  0.981469  0.962889  0.970328  0.973710   
1058  0.957808  0.962124  0.903128 

In [88]:
X_train.iloc[0]

lag_1    0.984954
Name: 0, dtype: float64

In [93]:
new_data = pd.DataFrame({
    'lag_1': [0.987593]
})
model.predict(new_data.values)

array([0.9801609], dtype=float32)

In [97]:
import xgboost as xgb
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

from giza.agents.model import GizaModel


MODEL_ID = 761  # Update with your model ID
VERSION_ID = 2  # Update with your version ID

def prediction(input, model_id, version_id):
    model = GizaModel(id=model_id, version=version_id)

    (result, proof_id) = model.predict(
        input_feed={"input": input}, verifiable=True, model_category="XGB"
    )

    return result, proof_id


def execution():
    # The input data type should match the model's expected input
    # input = X_test.values[:1]
    # print(input, input.shape)
    # print(model.predict(input))
    input = new_data.astype(np.float32).values

    (result, proof_id) = prediction(input, MODEL_ID, VERSION_ID)

    print(f"Predicted value for input {input.flatten()[0]} is {result}")

    return result, proof_id


if __name__ == "__main__":
    _, proof_id = execution()
    print(f"Proof ID: {proof_id}")

An error occurred in predict: 503 Server Error: Service Unavailable for url: https://endpoint-ashu-761-2-274f6734-7i3yxzspbq-ew.a.run.app/cairo_run
Deployment predict error: Service Unavailable
An error occurred in predict: 503 Server Error: Service Unavailable for url: https://endpoint-ashu-761-2-274f6734-7i3yxzspbq-ew.a.run.app/cairo_run


HTTPError: 503 Server Error: Service Unavailable for url: https://endpoint-ashu-761-2-274f6734-7i3yxzspbq-ew.a.run.app/cairo_run

In [96]:
!giza endpoints logs -e {329}

[1;33m[[0m[33mgiza[0m[1;33m][0m[1m[[0m[1;36m2024[0m-[1;36m06[0m-[1;36m09[0m [1;92m12:20:35[0m.[1;36m364[0m[1m][0m Getting logs for endpoint [1;36m329[0m ✅ 
[2m2024-06-09T06:19:50.134531Z[0m [32m INFO[0m [2morion_runner[0m[2m:[0m ✅ Sierra program downloaded successfully!
[2m2024-06-09T06:19:50.134607Z[0m [32m INFO[0m [2morion_runner[0m[2m:[0m 🚀 Server running on 0.0.0.0:8080
Default STARTUP TCP probe succeeded after 1 attempt for container "orion-runner-1" on port 8080.
The request failed because either the HTTP response was malformed or connection to the instance had an error. Additional troubleshooting documentation can be found at: https://cloud.google.com/run/docs/troubleshooting#malformed-response-or-connection-error
thread 'tokio-runtime-worker' panicked at crates/cairo_runner/src/utils.rs:16:78:
called `Result::unwrap()` on an `Err` value: FromStrError
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
[2m2024-06-0