## Create and Train an LR Model
### We'll start by creating a linear regression model with data from insurance ( highly used public dataset) modified.

In [3]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

from giza.datasets import DatasetsLoader
from giza.agents import GizaAgent
from giza.zkcook import serialize_model

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
data  = pd.read_csv('insurance-short.csv')
data.head(4)

Unnamed: 0,age,sex,bmi,children,smoker,region,expenses
0,18,male,16.0,0,no,northeast,1694.8
1,18,male,17.3,2,yes,northeast,12829.46
2,18,female,20.8,0,no,southeast,1607.51
3,18,male,21.5,0,no,northeast,1702.46


In [8]:
print(data)


#df = pd.DataFrame(data_2d, columns=['Column1'])

data2 = pd.DataFrame(data, columns=['age','sex','bmi','children','smoker','region','expenses'],  )

print(data2)


      age     sex   bmi  children smoker     region  expenses
0      18    male  16.0         0     no  northeast   1694.80
1      18    male  17.3         2    yes  northeast  12829.46
2      18  female  20.8         0     no  southeast   1607.51
3      18    male  21.5         0     no  northeast   1702.46
4      18    male  21.6         0    yes  northeast  13747.87
...   ...     ...   ...       ...    ...        ...       ...
1333   64  female  39.1         3     no  southeast  16085.13
1334   64    male  39.2         1     no  southeast  14418.28
1335   64  female  39.3         0     no  northeast  14901.52
1336   64  female  39.7         0     no  southwest  14319.03
1337   64    male  40.5         0     no  southeast  13831.12

[1338 rows x 7 columns]
      age     sex   bmi  children smoker     region  expenses
0      18    male  16.0         0     no  northeast   1694.80
1      18    male  17.3         2    yes  northeast  12829.46
2      18  female  20.8         0     no  sou

In [13]:
#X, y = data.data, data.target

# Drop rows with missing values
df_cleaned = data.dropna(subset=['bmi'])

# Prepare your data
X = df_cleaned[['bmi']]
y = df_cleaned['expenses']

print(X)
print(y)

#X = df_cleaned.reshape(-1, 1)
#y = y.reshape(-1, 1)


#X2 = X.reshape(-1, 1)/99998
#y2 = y.reshape(-1, 1)

#X2 = X.reshape(1,)
#y2 = y.reshape(1,)

X2 = X([0],)
y2 = y([0],)

#df = pd.DataFrame(data_2d, columns=['Column1'])

print(X2)
print(y2)

       bmi
0     16.0
1     17.3
2     20.8
3     21.5
4     21.6
...    ...
1333  39.1
1334  39.2
1335  39.3
1336  39.7
1337  40.5

[1338 rows x 1 columns]
0        1694.80
1       12829.46
2        1607.51
3        1702.46
4       13747.87
          ...   
1333    16085.13
1334    14418.28
1335    14901.52
1336    14319.03
1337    13831.12
Name: expenses, Length: 1338, dtype: float64


TypeError: 'DataFrame' object is not callable

In [10]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a linear regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

In [11]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

# Define the initial types for the ONNX model
#initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]  ]))]

initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]  ]))]

# Convert the scikit-learn model to ONNX
onnx_model = convert_sklearn(model, initial_types=initial_type)

# Save the ONNX model to a file
with open("linear_regression-ins.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

## Save the model
### Save the model in Json format

## Transpile your model to Orion Cairo
### We will use Giza-CLI to transpile our saved model to Orion Cairo.


$ giza transpile 

## Deploy an inference endpoint
### Now that our model is transpiled to Cairo we can deploy an endpoint to run verifiable inferences. We will use Giza CLI again to run and deploy an endpoint. Ensure to replace model-id and version-id with your ids provided during transpilation.


giza endpoints deploy --model-id 665 --version-id 1

$ giza endpoints deploy --model-id XXX --version-id X


## Run a verifiable inference
##### To streamline verifiable inference, you might consider using the endpoint URL obtained after transpilation. However, this approach requires manual serialization of the input for the Cairo program and handling the deserialization process. To make this process more user-friendly and keep you within a Python environment, we've introduced a Python SDK designed to facilitate the creation of ML workflows and execution of verifiable predictions. When you initiate a prediction, our system automatically retrieves the endpoint URL you deployed earlier, converts your input into Cairo-compatible format, executes the prediction, and then converts the output back into a numpy object. 

In [13]:
from giza.agents.model import GizaModel

MODEL_ID = 719  # Update with your model ID
VERSION_ID = 3  # Update with your version ID

def prediction(input, model_id, version_id):
    model = GizaModel(id=model_id, version=version_id)

    (result, proof_id) = model.predict(
        input_feed={'input': input}, verifiable=True
    )

    return result, proof_id

def execution():
    # The input data type should match the model's expected input

    input = np.array([[30.1]]).astype(np.float32)
    #input = np.array([[31,28.1]]).astype(np.float32)
    (result, proof_id) = prediction(input, MODEL_ID, VERSION_ID)

    print(
        f"Predicted value for input {input.flatten()[0]} is {result[0].flatten()[0]}")

    return result, proof_id


execution()

🚀 Starting deserialization process...
✅ Deserialization completed! 🎉
Predicted value for input 30.100000381469727 is 13053.928756713867


(array([[13053.92875671]]), 'b7ee4fb8426644c3a6f12a7a72a95b8d')

## Download the proof
#### Initiating a verifiable inference sets off a proving job on our server, sparing you the complexities of installing and configuring the prover yourself. Upon completion, you can download your proof.

First, let's check the status of the proving job to ensure that it has been completed.

$ giza endpoints get-proof --endpoint-id XXX --proof-id "xxxxxxxxxxxxxxxxxxxxxxxxxx"
[giza][2024-05-30 00:40:39.691] Getting proof from endpoint xxx
{
  "id": 967,
  "job_id": 1121,
  "metrics": {
    "proving_time": 17.249508
  },
  "created_date": "2024-05-30T07:33:12.532659"
}

Once the proof is ready, you can download it.

$ giza endpoints download-proof --endpoint-id xxx --proof-id "xxxxxxxxxxxxxxxxx" --output-path zk_xgboost.proof
[giza][2024-05-30 00:51:52.048] Getting proof from endpoint xxx ✅
[giza][2024-05-30 00:51:53.800] Proof downloaded to zk_xgboost.proof ✅
(giza3)

## Verify the proof
#### Finally, you can verify the proof.

$ giza verify --proof-id 967
[giza][2024-05-30 00:56:05.847] Verifying proof...
[giza][2024-05-30 00:56:07.140] Verification result: True
[giza][2024-05-30 00:56:07.145] Verification time: 0.454667226
(giza3)