<a target="_blank" href="https://colab.research.google.com/github/jackma-00/house-price-prediction/blob/main/experiments/4_house_price_online_inference.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# <span style="font-width:bold; font-size: 3rem; color:#333;">Online Inference Pipeline</span>

Installing packages

In [1]:
!pip uninstall scikit-learn -y

Found existing installation: scikit-learn 1.6.0
Uninstalling scikit-learn-1.6.0:
  Successfully uninstalled scikit-learn-1.6.0


In [2]:
!pip install scikit-learn==1.5.2

Collecting scikit-learn==1.5.2
  Downloading scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.3/13.3 MB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scikit-learn
Successfully installed scikit-learn-1.5.2


In [3]:
!pip install xgboost==2.1.3



In [4]:
!pip install hopsworks[python]

Collecting hopsworks[python]
  Downloading hopsworks-4.1.4-py3-none-any.whl.metadata (11 kB)
Collecting pyhumps==1.6.1 (from hopsworks[python])
  Downloading pyhumps-1.6.1-py3-none-any.whl.metadata (3.7 kB)
Collecting furl (from hopsworks[python])
  Downloading furl-2.1.3-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting boto3 (from hopsworks[python])
  Downloading boto3-1.35.85-py3-none-any.whl.metadata (6.7 kB)
Collecting pandas<2.2.0 (from hopsworks[python])
  Downloading pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting pyjks (from hopsworks[python])
  Downloading pyjks-20.0.0-py2.py3-none-any.whl.metadata (1.7 kB)
Collecting mock (from hopsworks[python])
  Downloading mock-5.1.0-py3-none-any.whl.metadata (3.0 kB)
Collecting avro==1.11.3 (from hopsworks[python])
  Downloading avro-1.11.3.tar.gz (90 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.6/90.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Inst

### <span style='color:#ff5f27'> Imports

In [5]:
from xgboost import XGBRegressor
from datetime import datetime
import pandas as pd
import numpy as np
import hopsworks
import uuid
import os

import warnings
warnings.filterwarnings("ignore")

Hopsworks API Key

In [6]:
# If you haven't set the env variable 'HOPSWORKS_API_KEY', then uncomment the next line and enter your API key
os.environ["HOPSWORKS_API_KEY"] = ""

In [7]:
proj = hopsworks.login()
fs = proj.get_feature_store()
mr = proj.get_model_registry()


Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1158295


## <a class="anchor" id="1.5_bullet" style="color:#ff5f27">Testing Inference</a>

### <span style='color:#ff5f27'> Download the model from the model registry

In [8]:
retrieved_model = mr.get_model(
    name="house_price_xgboost_model",
    version=16,                        # Latest version
)

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()



In [9]:
# Loading the XGBoost regressor model from the saved model directory
# retrieved_xgboost_model = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
retrieved_xgboost_model = XGBRegressor()

retrieved_xgboost_model.load_model(saved_model_dir + "/model.json")

# Displaying the retrieved XGBoost regressor model
retrieved_xgboost_model

In [10]:
# Get feature view
feature_view = retrieved_model.get_feature_view()  # house_price_fv v5 attached to the model

In [11]:
feature_view.version

5

### <span style='color:#ff5f27'> Get Inference data

In [41]:
# Mock inference data
inference_data = {
    "agencyid": 169110.0,
    "bedroomsnumber": 3.0,
    "buildingyear": 2023.0,
    "codcom": 26086.0,
    "gsm": 181.0,
    "surface": 253,
    "latitude": 45.6674,
    "longitude": 12.244,
    "isluxury": 1,            # Changed to int
    "isnew": 0,               # Changed to int
    "on_the_market": 0,       # Changed to int
    "zeroenergybuilding": 0,  # Changed to int
    "airconditioning": "autonomo, freddo",
    "bathrooms": "3",
    "city": "Treviso",
    "condition": "Nuovo / In costruzione",
    "energyclass": "A2",
    "ga4heating": "Autonomo",
    "garage": "1 in box privato/box in garage",
    "heatingtype": "autonomo, a pavimento",
    "pricerange": "oltre 500.000 &euro;",
    'id_zona_omi':"F704-B11",
    "rooms": "4",
}


In [None]:
inference_data

In [43]:
# Apply Model-dependent transformations to the inference data
transformed_data = feature_view.get_feature_vector(
    entry={'id_zona_omi': inference_data['id_zona_omi']},
    passed_features=inference_data,
    return_type="pandas",
)

In [44]:
transformed_data

Unnamed: 0,agencyid,bedroomsnumber,buildingyear,codcom,gsm,surface,latitude,longitude,isluxury,isnew,...,label_encoder_condizione_,label_encoder_energyclass_,label_encoder_ga4heating_,label_encoder_garage_,label_encoder_heatingtype_,label_encoder_id_zona_omi_,label_encoder_pricerange_,label_encoder_rooms_,label_encoder_tipologia_,label_encoder_zona_omi_type_
0,169110.0,3.0,2023.0,26086.0,181.0,253.0,45.6674,12.244,1.0,0.0,...,0.0,3.0,0.0,0.0,1.0,539.0,5.0,4.0,0.0,0.0


### <span style='color:#ff5f27'> Making the predictions

In [45]:
predicted_price = retrieved_xgboost_model.predict(transformed_data)

In [46]:
predicted_price = predicted_price[0]
predicted_price

1426200.5

### <span style='color:#ff5f27'> Saving the predictions (for monitoring) to a feature group

In [47]:
# Update price with actual predicted price
inference_data["price"] = predicted_price

In [48]:
# Incorporate uuid
def generate_numeric_uuid():
    # Generate a UUID and extract its integer form
    raw_uuid = uuid.uuid4()
    numeric_uuid = str(raw_uuid.int)[:9]  # Take the first 9 digits
    return int(numeric_uuid)

# Generate the numeric UUID
genrated_uuid = generate_numeric_uuid()

inference_data["id"] = genrated_uuid

In [49]:
# Generate the current event timestamp
current_timestamp = datetime.today()

inference_data["timestamp"] = current_timestamp.date()

In [None]:
inference_data

In [52]:
inference_data = pd.DataFrame(inference_data, index=[0])

# Reorder columns to match properties fg order
inference_data = inference_data[[
    'id',
    'timestamp',  # In properties fg this is called scraping_date
    'agencyid',
    'bedroomsnumber',
    'buildingyear',
    'codcom',
    'gsm',
    'surface',
    'latitude',
    'longitude',
    'isluxury',
    'isnew',
    'on_the_market',
    'zeroenergybuilding',
    'airconditioning',
    'bathrooms',
    'city',
    'condition',
    'energyclass',
    'ga4heating',
    'garage',
    'heatingtype',
    'pricerange',
    'rooms',
    'id_zona_omi',
    'price',
]]

In [53]:
inference_data

Unnamed: 0,id,timestamp,agencyid,bedroomsnumber,buildingyear,codcom,gsm,surface,latitude,longitude,...,city,condition,energyclass,ga4heating,garage,heatingtype,pricerange,rooms,id_zona_omi,price
0,181222906,2024-12-20,169110.0,3.0,2023.0,26086.0,181.0,253,45.6674,12.244,...,Treviso,Nuovo / In costruzione,A2,Autonomo,1 in box privato/box in garage,"autonomo, a pavimento",oltre 500.000 &euro;,4,F704-B11,1426200.5


In [54]:
# Get or create the 'property_preds' feature group
property_preds = fs.get_or_create_feature_group(
    name='property_preds',
    version=3,
    description='Property predicted prices',
    primary_key=['id'],
    event_time='timestamp'
)

In [55]:
# Insert data into feature group
property_preds.insert(inference_data)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1158295/fs/1148998/fg/1393016


Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: property_preds_3_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1158295/jobs/named/property_preds_3_offline_fg_materialization/executions


(Job('property_preds_3_offline_fg_materialization', 'SPARK'), None)

## <a class="anchor" id="1.5_bullet" style="color:#ff5f27"> 🚀 Model Deployment</a>

### <span style="color:#ff5f27;">📎 Predictor script for Python models</span>

In [None]:
%%writefile predict_house_price.py

class Predict(object):

    def __init__(self):
        """ Initializes the serving state, reads a trained model"""
        # Get feature store handle
        project = hopsworks.login()
        self.mr = project.get_model_registry()
        self.xgboost_regressor = XGBRegressor()

        # Retrieve the feature view from the model
        retrieved_model = self.mr.get_model(
            name="house_price_xgboost_model",
            version=15,  # Latest version
        )

        # Download the saved model artifacts to a local directory
        saved_model_dir = retrieved_model.download()

        self.feature_view = retrieved_model.get_feature_view()  # house_price_fv v4 attached to the model

        # Load the trained model
        self.xgboost_regressor.load_model(saved_model_dir + "/model.json")
        print("Initialization Complete")

    def predict(self, inputs):
        """ Serves a prediction request usign a trained model"""
        feature_vector = self.feature_view.get_feature_vector(inputs)
        feature_vector = feature_vector.drop(columns=["price"])

        return self.model.predict(feature_vector).tolist() # Numpy Arrays are not JSON serializable