<a target="_blank" href="https://colab.research.google.com/github/jackma-00/house-price-prediction/blob/main/experiments/4_house_price_online_inference.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# <span style="font-width:bold; font-size: 3rem; color:#333;">Online Inference Pipeline</span>

Installing packages

In [56]:
!pip uninstall scikit-learn -y

Found existing installation: scikit-learn 1.5.2
Uninstalling scikit-learn-1.5.2:
  Successfully uninstalled scikit-learn-1.5.2


In [57]:
!pip install scikit-learn==1.5.2

Collecting scikit-learn==1.5.2
  Using cached scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Using cached scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.3 MB)
Installing collected packages: scikit-learn
Successfully installed scikit-learn-1.5.2


In [58]:
!pip install xgboost==2.1.3



In [59]:
!pip install hopsworks[python]



### <span style='color:#ff5f27'> Imports

In [60]:
from xgboost import XGBRegressor
from datetime import datetime
import pandas as pd
import numpy as np
import hopsworks
import uuid
import os

import warnings
warnings.filterwarnings("ignore")

Hopsworks API Key

In [61]:
# If you haven't set the env variable 'HOPSWORKS_API_KEY', then uncomment the next line and enter your API key
os.environ["HOPSWORKS_API_KEY"] = "DMT7cBmSbXxvrmlm.SGi5E7zfqXqjsMJWWgiJFpiMlQep8mMiP5hAlvVCIVBXw5nCOzV67kVhGxIua122"

In [None]:
proj = hopsworks.login()
fs = proj.get_feature_store()
mr = proj.get_model_registry()


Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1158295


## <a class="anchor" id="1.5_bullet" style="color:#ff5f27">Testing Inference</a>

### <span style='color:#ff5f27'> Download the model from the model registry

In [66]:
retrieved_model = mr.get_model(
    name="house_price_xgboost_model",
    version=16,                        # Latest version
)

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()



In [None]:
# Loading the XGBoost regressor model from the saved model directory
# retrieved_xgboost_model = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
retrieved_xgboost_model = XGBRegressor()

retrieved_xgboost_model.load_model(saved_model_dir + "/model.json")

# Displaying the retrieved XGBoost regressor model
retrieved_xgboost_model

In [None]:
# Get feature view
feature_view = retrieved_model.get_feature_view()  # house_price_fv v5 attached to the model

In [None]:
feature_view.version

5

### <span style='color:#ff5f27'> Get Inference data

In [157]:
# Mock inference data
inference_data = {
    "agencyid": 169110.0,
    "bedroomsnumber": 3.0,
    "buildingyear": 2023.0,
    "codcom": 26086.0,
    "gsm": 181.0,
    "surface": 253,
    "latitude": 45.6674,
    "longitude": 12.244,
    "isluxury": 1,            # Changed to int
    "isnew": 0,               # Changed to int
    "on_the_market": 0,       # Changed to int
    "zeroenergybuilding": 0,  # Changed to int
    "airconditioning": "autonomo, freddo",
    "bathrooms": "3",
    "city": "Treviso",
    "condition": "Nuovo / In costruzione",
    "energyclass": "A2",
    "ga4heating": "Autonomo",
    "garage": "1 in box privato/box in garage",
    "heatingtype": "autonomo, a pavimento",
    "pricerange": "oltre 500.000 &euro;",
    'id_zona_omi':"F704-B11",
    "rooms": "4",
}


In [None]:
inference_data

In [158]:
# Apply Model-dependent transformations to the inference data
transformed_data = feature_view.get_feature_vector(
    entry={'id_zona_omi': inference_data['id_zona_omi']},
    passed_features=inference_data,
    return_type="list",
)

In [86]:
transformed_data

[169110.0,
 3.0,
 2023.0,
 26086.0,
 181.0,
 253,
 45.6674,
 12.244,
 1,
 0,
 0,
 0,
 1915.0,
 2663.0,
 4,
 3,
 87,
 2,
 0,
 3,
 0,
 0,
 1,
 539,
 5,
 4,
 0,
 0]

### <span style='color:#ff5f27'> Making the predictions

In [106]:
predicted_price = retrieved_xgboost_model.predict(np.asarray(transformed_data).reshape(1, -1)).tolist()

In [107]:
predicted_price

[1426200.5]

### <span style='color:#ff5f27'> Saving the predictions (for monitoring) to a feature group

In [None]:
# Update price with actual predicted price
inference_data["price"] = predicted_price

In [None]:
# Incorporate uuid
def generate_numeric_uuid():
    # Generate a UUID and extract its integer form
    raw_uuid = uuid.uuid4()
    numeric_uuid = str(raw_uuid.int)[:9]  # Take the first 9 digits
    return int(numeric_uuid)

# Generate the numeric UUID
genrated_uuid = generate_numeric_uuid()

inference_data["id"] = genrated_uuid

In [None]:
# Generate the current event timestamp
current_timestamp = datetime.today()

inference_data["timestamp"] = current_timestamp.date()

In [None]:
inference_data

In [None]:
inference_data = pd.DataFrame(inference_data, index=[0])

# Reorder columns to match properties fg order
inference_data = inference_data[[
    'id',
    'timestamp',  # In properties fg this is called scraping_date
    'agencyid',
    'bedroomsnumber',
    'buildingyear',
    'codcom',
    'gsm',
    'surface',
    'latitude',
    'longitude',
    'isluxury',
    'isnew',
    'on_the_market',
    'zeroenergybuilding',
    'airconditioning',
    'bathrooms',
    'city',
    'condition',
    'energyclass',
    'ga4heating',
    'garage',
    'heatingtype',
    'pricerange',
    'rooms',
    'id_zona_omi',
    'price',
]]

In [None]:
inference_data

Unnamed: 0,id,timestamp,agencyid,bedroomsnumber,buildingyear,codcom,gsm,surface,latitude,longitude,...,city,condition,energyclass,ga4heating,garage,heatingtype,pricerange,rooms,id_zona_omi,price
0,181222906,2024-12-20,169110.0,3.0,2023.0,26086.0,181.0,253,45.6674,12.244,...,Treviso,Nuovo / In costruzione,A2,Autonomo,1 in box privato/box in garage,"autonomo, a pavimento",oltre 500.000 &euro;,4,F704-B11,1426200.5


In [None]:
# Get or create the 'property_preds' feature group
property_preds = fs.get_or_create_feature_group(
    name='property_preds',
    version=3,
    description='Property predicted prices',
    primary_key=['id'],
    event_time='timestamp'
)

In [None]:
# Insert data into feature group
property_preds.insert(inference_data)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1158295/fs/1148998/fg/1393016


Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: property_preds_3_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1158295/jobs/named/property_preds_3_offline_fg_materialization/executions


(Job('property_preds_3_offline_fg_materialization', 'SPARK'), None)

## <a class="anchor" id="1.5_bullet" style="color:#ff5f27"> 🚀 Model Deployment</a>

### <span style="color:#ff5f27;">📎 Predictor script for Python models</span>

In [177]:
%%writefile predict_house_price.py
from xgboost import XGBRegressor
import hopsworks
import numpy as np

class Predict(object):

    def __init__(self):
        """ Initializes the serving state, reads a trained model"""
        # Get feature store handle
        proj = hopsworks.login()
        self.mr = proj.get_model_registry()
        self.xgboost_regressor = XGBRegressor()

        # Retrieve the feature view from the model
        retrieved_model = self.mr.get_model(
            name="house_price_xgboost_model",
            version=16,  # Latest version
        )

        # Download the saved model artifacts to a local directory
        saved_model_dir = retrieved_model.download()

        self.feature_view = retrieved_model.get_feature_view()  # house_price_fv v4 attached to the model

        # Load the trained model
        self.xgboost_regressor.load_model(saved_model_dir + "/model.json")
        print("Initialization Complete")

    def predict(self, inputs):
        """ Serves a prediction request usign a trained model"""
        # feature_vector = self.feature_view.get_feature_vector(
            # entry={'id_zona_omi': inputs['id_zona_omi']},
            # passed_features=inputs,
            # return_type="list",
        # )

        return self.xgboost_regressor.predict(np.asarray(inputs).reshape(1, -1)).tolist()

Overwriting predict_house_price.py


In [178]:
# Get the dataset API from the project
dataset_api = proj.get_dataset_api()

# Specify the file to upload ("predict_example.py") to the "Models" directory, and allow overwriting
uploaded_file_path = dataset_api.upload("predict_house_price.py", "Models", overwrite=True)

# Construct the full path to the uploaded predictor script
predictor_script_path = os.path.join("/Projects", proj.name, uploaded_file_path)

Uploading: 0.000%|          | 0/1315 elapsed<00:00 remaining<?

### <span style="color:#ff5f27;">👩🏻‍🔬 Create the deployment</span>

Here, you fetch the model you want from the model registry and define a configuration for the deployment. For the configuration, you need to specify the serving type (default or KFserving).

In [179]:
# Deploy the fraud model
deployment = retrieved_model.deploy(
    name="house",                 # Specify the deployment name
    script_file=predictor_script_path,  # Provide the path to the predictor script
)

Deployment created, explore it at https://c.app.hopsworks.ai:443/p/1158295/deployments/353287
Before making predictions, start the deployment by using `.start()`


In [180]:
# Start the deployment and wait for it to be running, with a maximum waiting time of 180 seconds
deployment.start(await_running=180)

  0%|          | 0/5 [00:00<?, ?it/s]

Start making predictions by using `.predict()`


In [181]:
# Get the current state of the deployment and describe its details
deployment_state = deployment.get_state().describe()

{
    "available_instances": 1,
    "available_transformer_instances": 0,
    "condition": {
        "reason": "Deployment is ready",
        "status": true,
        "type": "READY"
    },
    "deployed": "2024-12-20T10:59:14.000Z",
    "hopsworks_inference_path": "/project/1158295/inference/models/house",
    "model_server_inference_path": "/v1/models/house",
    "revision": "50633510",
    "status": "Running"
}


## <span style="color:#ff5f27;"> 📡 Test your Model with an Inference Request </span>

Finally you can start making predictions with your model!

Send inference requests to the deployed model as follows:

In [182]:
transformed_data

[169110.0,
 3.0,
 2023.0,
 26086.0,
 181.0,
 253,
 45.6674,
 12.244,
 1,
 0,
 0,
 0,
 1915.0,
 2663.0,
 4,
 3,
 87,
 2,
 0,
 3,
 0,
 0,
 1,
 539,
 5,
 4,
 0,
 0]

In [183]:
# Convert NumPy int64 to native Python int
transformed_data_python = [int(x) if isinstance(x, np.int64) else x for x in transformed_data]

In [201]:
# Make predictions using the deployed model
predictions = deployment.predict(
    {"instances": [transformed_data_python]},
)
predictions

{'predictions': [1426200.5]}

In [185]:
deployment.get_logs()

Explore all the logs and filters in the Kibana logs at https://c.app.hopsworks.ai:443/p/1158295/deployments/353287

DeployableComponentLogs(instance_name: 'house-predictor-00001-deployment-848c6d8567-5w9g4', date: datetime.datetime(2024, 12, 20, 11, 0, 20, 672889)) 
2024-12-20 10:59:38.569 uvicorn.error INFO:     Application startup complete.
2024-12-20 10:59:38.569 uvicorn.error INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
2024-12-20 10:59:57.728 uvicorn.access INFO:     127.0.0.1:56382 8 - "GET /metrics HTTP/1.1" 200 OK
2024-12-20 10:59:57.728 kserve.trace kserve.io.kserve.protocol.rest.server.metrics_handler: 0.0007534027099609375 ['http_status:200', 'http_method:GET', 'time:wall']
2024-12-20 10:59:57.728 kserve.trace kserve.io.kserve.protocol.rest.server.metrics_handler: 0.0007429999999999382 ['http_status:200', 'http_method:GET', 'time:cpu']
INFO:root:Received request via 'v1 protocol'
2024-12-20 11:00:14.774 kserve.trace requestId: 5f01f73d-0902-46ae-a1

## <span style="color:#ff5f27;"> 👾 Try out your Model Interactively </span>

We will build a user interface with Gradio to allow you to enter a credit card category and amount to see if the credit card transaction will be marked as suspected of fraud or not.

In [186]:
!pip install gradio --quiet
!pip install typing-extensions==4.3.0

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.2/57.2 MB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.4/320.4 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m70.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing-extensions==4.3.0
  Downloading typing_extensions-4.3.0-py3-none-any.whl.metadata (6.3 kB)
Downloading typing_extensions-4.3.0-py3-none-any.whl (25 kB)
Installing collected packages: typing-extensions
  Attempting uninstall: typing-extensions
    Found existing in

In [207]:
import gradio as gr
import numpy as np

def predict_price(
    agencyid,
    bedroomsnumber,
    buildingyear,
    codcom,
    gsm,
    surface,
    latitude,
    longitude,
    isluxury,
    isnew,
    on_the_market,
    zeroenergybuilding,
    airconditioning,
    bathrooms,
    city,
    condition,
    energyclass,
    ga4heating,
    garage,
    heatingtype,
    pricerange,
    id_zona_omi,
    rooms,
):
    inference_data = {
        "agencyid": agencyid,
        "bedroomsnumber": bedroomsnumber,
        "buildingyear": buildingyear,
        "codcom": codcom,
        "gsm": gsm,
        "surface": surface,
        "latitude": latitude,
        "longitude": longitude,
        "isluxury": int(isluxury),
        "isnew": int(isnew),
        "on_the_market": int(on_the_market),
        "zeroenergybuilding": int(zeroenergybuilding),
        "airconditioning": airconditioning,
        "bathrooms": bathrooms,
        "city": city,
        "condition": condition,
        "energyclass": energyclass,
        "ga4heating": ga4heating,
        "garage": garage,
        "heatingtype": heatingtype,
        "pricerange": pricerange,
        'id_zona_omi':id_zona_omi,
        "rooms": rooms,
    }

    print(inference_data)

    # Apply Model-dependent transformations to the inference data
    transformed_data = feature_view.get_feature_vector(
        entry={'id_zona_omi': inference_data['id_zona_omi']},
        passed_features=inference_data,
        return_type="list",
    )

    print(transformed_data)

    # Convert NumPy int64 to native Python int
    transformed_data_python = [int(x) if isinstance(x, np.int64) else x for x in transformed_data]

    print(transformed_data_python)
    print(type(transformed_data_python))

    # Make predictions using the deployed model
    predictions = deployment.predict(
        {"instances": [transformed_data_python]},
    )

    return predictions['predictions'][0]

demo = gr.Interface(
    fn=predict_price,
    inputs=[
        "number",
        "number",
        "number",
        "number",
        "number",
        "number",
        "number",
        "number",
        "checkbox",
        "checkbox",
        "checkbox",
        "checkbox",
        "text",
        "text",
        "text",
        "text",
        "text",
        "text",
        "text",
        "text",
        "text",
        "text",
        "text",
    ],
    outputs=[gr.Number(label="price")],
    examples=[
        [
            169110.0,
            3.0,
            2023.0,
            26086.0,
            181.0,
            253,
            45.6674,
            12.244,
            True,   # Changed to int
            False,  # Changed to int
            False,  # Changed to int
            False,  # Changed to int
            "autonomo, freddo",
            "3",
            "Treviso",
            "Nuovo / In costruzione",
            "A2",
            "Autonomo",
            "1 in box privato/box in garage",
            "autonomo, a pavimento",
            "oltre 500.000 &euro;",
            "F704-B11",
            "4",
        ],
    ],
    title="Italian House Price Predictor",
    description="Enter house details.",
)


demo.launch(debug=True)

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://d3d4b89ab1bd49b6e1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


{'agencyid': 169110, 'bedroomsnumber': 3, 'buildingyear': 2023, 'codcom': 26086, 'gsm': 181, 'surface': 253, 'latitude': 45.6674, 'longitude': 12.244, 'isluxury': 1, 'isnew': 0, 'on_the_market': 0, 'zeroenergybuilding': 0, 'airconditioning': 'autonomo, freddo', 'bathrooms': '3', 'city': 'Treviso', 'condition': 'Nuovo / In costruzione', 'energyclass': 'A2', 'ga4heating': 'Autonomo', 'garage': '1 in box privato/box in garage', 'heatingtype': 'autonomo, a pavimento', 'pricerange': 'oltre 500.000 &euro;', 'id_zona_omi': 'F704-B11', 'rooms': '4'}
[169110, 3, 2023, 26086, 181, 253, 45.6674, 12.244, 1, 0, 0, 0, 1915.0, 2663.0, 4, 3, 87, 2, 0, 3, 0, 0, 1, 539, 5, 4, 0, 0]
[169110, 3, 2023, 26086, 181, 253, 45.6674, 12.244, 1, 0, 0, 0, 1915.0, 2663.0, 4, 3, 87, 2, 0, 3, 0, 0, 1, 539, 5, 4, 0, 0]
<class 'list'>
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7862 <> https://d3d4b89ab1bd49b6e1.gradio.live


