In [1]:
import sys
from pathlib import Path
import warnings
warnings.filterwarnings("ignore", module="IPython")

root_dir = Path().absolute()
# Strip ~/notebooks/ccfraud from PYTHON_PATH if notebook started in one of these subdirectories
if root_dir.parts[-1:] == ('notebooks',):
    root_dir = Path(*root_dir.parts[:-1])
    sys.path.append(str(root_dir))
if root_dir.parts[-1:] == ('ccfraud',):
    root_dir = Path(*root_dir.parts[:-1])
    sys.path.append(str(root_dir))
root_dir = str(root_dir) 

print(f"Root dir: {root_dir}")

# Set the environment variables from the file <root_dir>/.env
from mlfs import config
settings = config.HopsworksSettings(_env_file=f"{root_dir}/.env")

Root dir: /home/jdowling/Projects/mlfs-book
HopsworksSettings initialized!


In [2]:
import hopsworks

proj = hopsworks.login()
fs = proj.get_feature_store()
mr = proj.get_model_registry()

2025-12-23 08:23:03,576 INFO: Initializing external client
2025-12-23 08:23:03,577 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-23 08:23:04,753 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/120


In [3]:
merchant_fg = fs.get_feature_group("merchant_details", version=1)
account_fg = fs.get_feature_group("account_details", version=1)
bank_fg = fs.get_feature_group("bank_details", version=1)
card_fg = fs.get_feature_group("card_details", version=1)
cc_trans_aggs_fg = fs.get_feature_group("cc_trans_aggs_fg", version=1)
cc_trans_fg = fs.get_feature_group("cc_trans_fg", version=1)

In [4]:
	
# merchant_id,
# ip_address,
# merchant_country,
# bank_country,
# account_id
# bank_id,
# prev_ip_transaction,
# prev_ts_transaction,


In [5]:
subtree1 = cc_trans_aggs_fg.select_except(['account_id', 'bank_id', 'prev_ip_transaction', 'prev_st_transaction', 'event_time'])\
    .join(account_fg.select_features(), on="account_id")\
    .join(bank_fg.select(['credit_rating', 'days_since_bank_cr_changed', 'country']), on="bank_id")

2025-12-23 08:23:09,904 INFO: Using ['name', 'address', 'debt_end_prev_month', 'creation_date', 'end_date', 'home_country'] from feature group `account_details` as features for the query. To include primary key and event time use `select_all`.


In [6]:
selection = cc_trans_fg.select_except(['t_id', 'cc_num', 'merchant_id', 'ip_address', 'ts'])\
    .join(merchant_fg.select_features(), prefix="merchant_", on="merchant_id")\
    .join(subtree1, on="cc_num")

2025-12-23 08:23:09,917 INFO: Using ['category', 'country', 'cnt_chrgeback_prev_day', 'cnt_chrgeback_prev_week', 'cnt_chrgeback_prev_month'] from feature group `merchant_details` as features for the query. To include primary key and event time use `select_all`.


In [7]:
df = selection.show(100)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (30.95s) 


In [8]:
df

Unnamed: 0,account_id,amount,time_since_last_trans,days_to_card_expiry,is_fraud,haversine_distance,merchant_category,merchant_country,merchant_cnt_chrgeback_prev_day,merchant_cnt_chrgeback_prev_week,...,prev_ts_transaction,name,address,debt_end_prev_month,creation_date,end_date,home_country,credit_rating,days_since_bank_cr_changed,country
0,ACC_008551,30.24,-2562085,0,False,True,Pharmacy,India,3.24,20.76,...,NaT,,,,NaT,NaT,,,,
1,ACC_000560,10.50,-2542468,0,False,True,,,,,...,NaT,,,,NaT,NaT,,,,
2,ACC_006041,135.98,-2356408,0,False,True,Restaurants,United States,1.11,7.28,...,NaT,,,,NaT,NaT,,,,
3,ACC_006041,22.87,24243,0,False,True,Jewelry,United States,0.79,4.90,...,NaT,,,,NaT,NaT,,,,
4,ACC_006041,6.81,139170,0,False,True,Entertainment,United Kingdom,4.89,31.40,...,NaT,,,,NaT,NaT,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,ACC_004281,19.97,39867,0,False,True,Entertainment,United Kingdom,4.89,31.40,...,NaT,,,,NaT,NaT,,,,
96,ACC_004281,10.75,194645,0,False,True,Sports,United Kingdom,2.28,18.36,...,NaT,,,,NaT,NaT,,,,
97,ACC_005720,56.98,-2360568,0,False,True,Books,Australia,7.04,50.56,...,NaT,,,,NaT,NaT,,,,
98,ACC_005720,5.43,37930,0,False,True,Fashion,Australia,5.77,40.51,...,NaT,,,,NaT,NaT,,,,


In [9]:
fv = fs.get_or_create_feature_view(name="cc_fraud_fv", 
                                   version=1, 
                                   description="features for a credit card fraud prediction model",
                                   query=selection,
                                   labels=['is_fraud']
                                  )

Feature view created successfully, explore it at 
https://eu-west.cloud.hopsworks.ai:443/p/120/fs/68/fv/cc_fraud_fv/version/1


In [10]:
test_start="2025-09-15 00:00"
X_train, X_test, y_train, y_test = fv.train_test_split(test_start=test_start)
X_train

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (29.63s) 
2025-12-23 08:25:00,196 INFO: Computing insert statistics
2025-12-23 08:25:01,888 INFO: Computing insert statistics



Unnamed: 0,account_id,amount,time_since_last_trans,days_to_card_expiry,haversine_distance,merchant_category,merchant_country,merchant_cnt_chrgeback_prev_day,merchant_cnt_chrgeback_prev_week,merchant_cnt_chrgeback_prev_month,...,prev_ts_transaction,name,address,debt_end_prev_month,creation_date,end_date,home_country,credit_rating,days_since_bank_cr_changed,country
0,ACC_003228,24.44,-2549820,0,True,Pharmacy,India,3.24,20.76,115.61,...,NaT,,,,NaT,NaT,,,,
1,ACC_003228,43.96,6993,0,False,Automotive,Canada,0.07,0.40,2.44,...,NaT,,,,NaT,NaT,,,,
2,ACC_003228,21.16,3324,0,False,Fashion,India,3.12,23.07,99.20,...,NaT,,,,NaT,NaT,,,,
3,ACC_003228,23.70,121777,0,True,Pet Supplies,India,2.37,15.57,79.34,...,NaT,,,,NaT,NaT,,,,
4,ACC_009242,14.67,-2455822,0,True,Home Improvement,United States,0.73,4.75,20.57,...,NaT,,,,NaT,NaT,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499579,ACC_008978,11.21,235697,0,True,Automotive,Canada,2.15,16.04,57.10,...,2025-09-07 22:59:15,Scott Wade,"50107 Palmer Cape Apt. 490, East Madeline, CA ...",4276.09,2023-11-28 00:00:00+00:00,NaT,Canada,6.0,21.0,Brazil
499580,ACC_001127,154.06,117809,0,True,,,,,,...,2025-09-14 15:10:56,,,,NaT,NaT,,6.0,21.0,Brazil
499581,ACC_009673,35.75,261319,0,True,Groceries,China,4.05,27.64,102.57,...,2025-09-14 13:12:47,Christine Johnson,"12185 Brenda Freeway Apt. 313, East Jessica, N...",4073.50,2021-07-27 00:00:00+00:00,NaT,China,6.0,21.0,Brazil
499582,ACC_001127,30.54,6623,0,True,Department Stores,Canada,4.25,30.60,137.08,...,2025-09-14 15:10:56,,,,NaT,NaT,,6.0,21.0,Brazil
