<a target="_blank" href="https://colab.research.google.com/github/jackma-00/house-price-prediction/blob/main/experiments/4_house_price_online_inference.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# <span style="font-width:bold; font-size: 3rem; color:#333;">Online Inference Pipeline</span>

Installing packages

In [1]:
!pip install hopsworks[python]

Collecting hopsworks[python]
  Downloading hopsworks-4.1.4-py3-none-any.whl.metadata (11 kB)
Collecting pyhumps==1.6.1 (from hopsworks[python])
  Downloading pyhumps-1.6.1-py3-none-any.whl.metadata (3.7 kB)
Collecting furl (from hopsworks[python])
  Downloading furl-2.1.3-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting boto3 (from hopsworks[python])
  Downloading boto3-1.35.83-py3-none-any.whl.metadata (6.7 kB)
Collecting pandas<2.2.0 (from hopsworks[python])
  Downloading pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting pyjks (from hopsworks[python])
  Downloading pyjks-20.0.0-py2.py3-none-any.whl.metadata (1.7 kB)
Collecting mock (from hopsworks[python])
  Downloading mock-5.1.0-py3-none-any.whl.metadata (3.0 kB)
Collecting avro==1.11.3 (from hopsworks[python])
  Downloading avro-1.11.3.tar.gz (90 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.6/90.6 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Inst

### <span style='color:#ff5f27'> Imports

In [2]:
import os
from datetime import datetime, timedelta
import pandas as pd
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from xgboost import plot_importance
from sklearn.metrics import mean_squared_error, r2_score
import hopsworks
from hopsworks.hsfs.builtin_transformations import label_encoder
from hopsworks import udf
from hsml.schema import Schema
from hsml.model_schema import ModelSchema
import uuid
from datetime import datetime

import warnings
warnings.filterwarnings("ignore")

### <span style='color:#ff5f27'> Connect to Hopsworks Feature Store

In [3]:
# If you haven't set the env variable 'HOPSWORKS_API_KEY', then uncomment the next line and enter your API key
os.environ["HOPSWORKS_API_KEY"] = "DMT7cBmSbXxvrmlm.SGi5E7zfqXqjsMJWWgiJFpiMlQep8mMiP5hAlvVCIVBXw5nCOzV67kVhGxIua122"
proj = hopsworks.login()
fs = proj.get_feature_store()
mr = proj.get_model_registry()


Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1158295


### <span style='color:#ff5f27'> Download the model from the model registry

In [4]:
retrieved_model = mr.get_model(
    name="house_price_xgboost_model",
    version=1,
)

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()



In [5]:
# Loading the XGBoost regressor model from the saved model directory
# retrieved_xgboost_model = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
retrieved_xgboost_model = XGBRegressor()

retrieved_xgboost_model.load_model(saved_model_dir + "/model.json")

# Displaying the retrieved XGBoost regressor model
retrieved_xgboost_model

In [6]:
# Get feature view
#feature_view = retrieved_model.get_feature_view()  # house_price_fv v2 attached to the model

feature_view = fs.get_feature_view(name="house_price_fv", version=2)

### <span style='color:#ff5f27'> Get Inference data

In [7]:
# Mock inference data
inference_data = {
        "agencyid": [169110.0],
        "bedroomsnumber": [3.0],
        "buildingyear": [2023.0],
        "codcom": [26086.0],
        "gsm": [181.0],
        "surface": [253],
        "latitude": [45.6674],
        "longitude": [12.244],
        "isluxury": [True],
        "isnew": [False],
        "on_the_market": [False],
        "zeroenergybuilding": [False],
        "airconditioning": ["autonomo, freddo"],
        "bathrooms": ["3"],
        "city": ["Treviso"],
        "condition": ["Nuovo / In costruzione"],
        "energyclass": ["A2"],
        "ga4heating": ["Autonomo"],
        "garage": ["1 in box privato/box in garage"],
        "heatingtype": ["autonomo, a pavimento"],
        "pricerange": ["oltre 500.000 &euro;"],
        "rooms": ["4"],
        }

In [8]:
# Intialize predicted price to match feature view schema
inference_data["price"] = 0.0

In [12]:
# Incorporate uuid
def generate_numeric_uuid():
    # Generate a UUID and extract its integer form
    raw_uuid = uuid.uuid4()
    numeric_uuid = str(raw_uuid.int)[:9]  # Take the first 9 digits
    return numeric_uuid

# Generate the numeric UUID
genrated_uuid = generate_numeric_uuid()

serving_key = {"id": genrated_uuid}

# Add serving key as first key in the dictionary
inference_data.update(serving_key)

In [13]:
inference_data

{'agencyid': [169110.0],
 'bedroomsnumber': [3.0],
 'buildingyear': [2023.0],
 'codcom': [26086.0],
 'gsm': [181.0],
 'surface': [253],
 'latitude': [45.6674],
 'longitude': [12.244],
 'isluxury': [True],
 'isnew': [False],
 'on_the_market': [False],
 'zeroenergybuilding': [False],
 'airconditioning': ['autonomo, freddo'],
 'bathrooms': ['3'],
 'city': ['Treviso'],
 'condition': ['Nuovo / In costruzione'],
 'energyclass': ['A2'],
 'ga4heating': ['Autonomo'],
 'garage': ['1 in box privato/box in garage'],
 'heatingtype': ['autonomo, a pavimento'],
 'pricerange': ['oltre 500.000 &euro;'],
 'rooms': ['4'],
 'price': 0.0,
 'id': '982818902'}

In [14]:
# Apply Model-dependent transformations to the inference data
transformed_data = feature_view.get_feature_vector(inference_data)

FeatureStoreException: Provided key agencyid is not a serving key. Required serving keys: ['id'].

In [None]:
# Drop price column
transformed_data = transformed_data.drop(columns=["price"])

In [None]:
transformed_data

### <span style='color:#ff5f27'> Making the predictions

In [None]:
predicted_price = retrieved_xgboost_model.predict(transformed_data)

### <span style='color:#ff5f27'> Saving the predictions (for monitoring) to a feature group

In [None]:
# Update price with actual predicted price
inference_data["price"] = predicted_price

In [None]:
# Incorporate uuid
def generate_numeric_uuid():
    # Generate a UUID and extract its integer form
    raw_uuid = uuid.uuid4()
    numeric_uuid = str(raw_uuid.int)[:9]  # Take the first 9 digits
    return numeric_uuid

# Generate the numeric UUID
genrated_uuid = generate_numeric_uuid()

inference_data["id"] = genrated_uuid

In [None]:
# Generate the current event timestamp
current_timestamp = datetime.now()

inference_data["timestamp"] = current_timestamp

In [None]:
# Get or create the 'property_preds' feature group
property_preds = fs.get_or_create_feature_group(
    name='property_preds',
    description='Properties predicted prices',
    version=2,
    primary_key=['id'],
    online_enabled=True,
    event_time='timestamp'
)

In [None]:
# Insert data into feature group
property_preds.insert(inference_data)