In [None]:
import sys
from pathlib import Path

def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False

def clone_repository() -> None:
    !git clone https://github.com/featurestorebook/mlfs-book.git
    %cd mlfs-book

def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml

if is_google_colab():
    clone_repository()
    install_dependencies()
    root_dir = str(Path().absolute())
    print("⛳️ Google Colab environment")
else:
    root_dir = str(Path().absolute().parent.parent)
    print("⛳️ Local environment")

# Add the root directory to the `PYTHONPATH` to use the `mlfs` Python module from the notebook.
if root_dir not in sys.path:
    print(f"Adding the following directory to the PYTHONPATH: {root_dir}")
    sys.path.append(root_dir)

In [1]:
import hopsworks
import pandas as pd
import numpy as np

In [2]:
project = hopsworks.login()
fs = project.get_feature_store()

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://snurran.hops.works/p/15479
Connected. Call `.close()` to terminate connection gracefully.


In [10]:
input_transaction = {
    'account_id': '3e348c098166ca93f95803b943bae475',
    'amount': 123,
    'category': 'Cash Withdrawal',
    'city': 'Collinwood',
    'latitude': 40.65538,
    'longitude': -74.38987
}

In [11]:
fraud_fv = fs.get_feature_view("fraud_model_fv", 1)

In [12]:
helpers_df = fraud_fv.get_inference_helper(entry={'account_id': input_transaction['account_id']})


2024-06-26 13:16:13,737 INFO: Default Online Store Client is set to sql.
2024-06-26 13:16:14,392 INFO: Initialising Vector Server Online SQL client
2024-06-26 13:16:17,508 INFO: Default Online Store Client is set to sql.
2024-06-26 13:16:18,124 INFO: Initialising Vector Server Online SQL client


In [13]:
helpers_df

Unnamed: 0,city,last_transaction_datetime,longitude,latitude
0,Opportunity,2024-06-24 04:30:15,-111.78903,33.35283


In [6]:
def haversine(lat_1, long_1, lat_2, long_2):
    long_diff = long_1 - long_2
    lat_diff = lat_1 - lat_2

    a = np.sin(lat_diff/2.0)**2
    b = np.cos(lat_1) * np.cos(lat_2) * np.sin(long_diff/2.0)**2
    return 2*np.arcsin(np.sqrt(a + b))

In [14]:
passed_features = {
    "time_delta_t_minus_1": ((pd.Timestamp.now() - helpers_df["last_transaction_datetime"]) / np.timedelta64(1, 'D')).iloc[0],
    "loc_delta_t_minus_1": haversine(helpers_df['latitude'].iloc[0], helpers_df['longitude'].iloc[0], input_transaction['latitude'], input_transaction['longitude']),
    "outside_city": 0 if helpers_df['city'].iloc[0] == input_transaction['city'] else 1
}

In [15]:
passed_features.update(input_transaction)

In [16]:
feature_vector = fraud_fv.get_feature_vector(
                  entry={'account_id': input_transaction['account_id']},
                  passed_features=passed_features,
                  return_type="pandas",
            )


2024-06-26 13:16:29,726 INFO: get_feature_vector Online SQL client


In [17]:
feature_vector

Unnamed: 0,amount,category,time_delta_t_minus_1,loc_delta_t_minus_1,outside_city,cc_provider,cc_type,age,cc_expiration_days,count,min_amount,max_amount,mean
0,123.0,2.0,2.36542,1.037706,1.0,0.0,1.0,34.419178,1100.0,1.0,80.45,80.45,80.45
