In [1]:
import hopsworks
import os
project = hopsworks.login()
mr = project.get_model_registry()

# get model object
model = mr.get_model("xgboost_fraud_model", version=1)

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://pocs.cloud.hopsworks.ai/p/125
Connected. Call `.close()` to terminate connection gracefully.


In [9]:
%%writefile /tmp/predict_example.py
import os
import numpy as np
import pandas as pd
import hsfs
import joblib


class Predict(object):

    def __init__(self):
        """Initializes the serving state, reads a trained model"""
        # get feature store handle
        fs_conn = hsfs.connection()
        fs = fs_conn.get_feature_store()

        # get feature view
        self.fraud_fv = fs.get_feature_view("fraud_model_fv", 1)

        # initialize serving
        self.fraud_fv.init_serving(1)

        # load the trained model
        self.model = joblib.load(
            os.environ["ARTIFACT_FILES_PATH"] + "/xgboost_fraud_model.pkl"
        )

        print("Initialization Complete")

    def predict(self, inputs):
        feature_vector = self.prepare_feature_vector(inputs[0])
        return self.model.predict(np.asarray(feature_vector).reshape(1, -1)).tolist()

    def prepare_feature_vector(self, input_transaction):
        # Fetch Inference Helper feature values to compute on-demand features
        helpers_df = self.fraud_fv.get_inference_helper(
            entry={"account_id": input_transaction["account_id"]}
        )

        # Compute on-demand features
        passed_features = {
            "time_delta_t_minus_1": (
                (pd.Timestamp.now() - helpers_df["last_transaction_datetime"])
                / np.timedelta64(1, "D")
            ).iloc[0],
            "loc_delta_t_minus_1": self._haversine(
                helpers_df["latitude"].iloc[0],
                helpers_df["longitude"].iloc[0],
                input_transaction["latitude"],
                input_transaction["longitude"],
            ),
            "outside_city": (
                0 if helpers_df["city"].iloc[0] == input_transaction["city"] else 1
            ),
        }

        # Merge on-demand features with input_transaction
        passed_features.update(input_transaction)

        # Fetch feature vector
        return self.fraud_fv.get_feature_vector(
            entry={"account_id": input_transaction["account_id"]},
            passed_features=passed_features,
            return_type="pandas",
        )

    def _haversine(self, lat_1, long_1, lat_2, long_2):
        long_diff = long_1 - long_2
        lat_diff = lat_1 - lat_2

        a = np.sin(lat_diff / 2.0) ** 2
        b = np.cos(lat_1) * np.cos(lat_2) * np.sin(long_diff / 2.0) ** 2
        return 2 * np.arcsin(np.sqrt(a + b))

Overwriting /tmp/predict_example.py


In [10]:
# Get the dataset API for the current project
dataset_api = project.get_dataset_api()

# Specify the local file path of the Python script to be uploaded
local_script_path = "/tmp/predict_example.py"

# Upload the Python script to the "Models", and overwrite if it already exists
uploaded_file_path = dataset_api.upload(local_script_path, "Models", overwrite=True)

# Create the full path to the uploaded script for future reference
predictor_script_path = os.path.join("/Projects", project.name, uploaded_file_path)

Uploading: 0.000%|          | 0/2387 elapsed<00:00 remaining<?

In [11]:
# Deploy the fraud model
deployment = model.deploy(
    name="fraudonlinedep1",
    script_file=predictor_script_path,
)

Deployment created, explore it at https://pocs.cloud.hopsworks.ai/p/125/deployments/14
Before making predictions, start the deployment by using `.start()`


In [12]:
deployment.start(await_running=300)

  0%|          | 0/5 [00:00<?, ?it/s]

Start making predictions by using `.predict()`


In [13]:
# Print the name of the deployment
print("Deployment: " + deployment.name)

# Display information about the deployment
deployment.describe()

Deployment: fraudonlinedep1
{
    "artifact_version": 2,
    "batching_configuration": {
        "batching_enabled": false
    },
    "created": "2024-04-11T22:53:42.985Z",
    "creator": "Fabio Buso",
    "description": null,
    "id": 14,
    "inference_logging": "NONE",
    "model_framework": "PYTHON",
    "model_name": "xgboost_fraud_model",
    "model_path": "/Projects/real_time_fraud_python/Models/xgboost_fraud_model",
    "model_server": "PYTHON",
    "model_version": 1,
    "name": "fraudonlinedep1",
    "predictor": "predict_example.py",
    "predictor_resources": {
        "limits": {
            "cores": 0.5,
            "gpus": 0,
            "memory": 1024
        },
        "requests": {
            "cores": 0.2,
            "gpus": 0,
            "memory": 32
        }
    },
    "requested_instances": 0,
    "serving_tool": "KSERVE"
}


In [18]:
# Make a prediction
deployment.predict(inputs = {
    'account_id': '3b91e417a8584cdc56544cf372b8fe3f',
    'amount': 1234,
    'category': 'Cash Withdrawal',
    'city': 'Collinwood',
    'latitude': 40.65538,
    'longitude': -74.38987
})

{'predictions': [0]}