In [None]:
import sys
from pathlib import Path
import warnings
warnings.filterwarnings("ignore", module="IPython")

def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False

def clone_repository() -> None:
    !git clone https://github.com/featurestorebook/mlfs-book.git
    %cd mlfs-book

def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml

if is_google_colab():
    clone_repository()
    install_dependencies()
    root_dir = str(Path().absolute())
    print("Google Colab environment")
else:
    root_dir = Path().absolute()
    # Strip ~/notebooks/ccfraud from PYTHON_PATH if notebook started in one of these subdirectories
    if root_dir.parts[-1:] == ('ccfraud',):
        root_dir = Path(*root_dir.parts[:-1])
    if root_dir.parts[-1:] == ('notebooks',):
        root_dir = Path(*root_dir.parts[:-1])
    root_dir = str(root_dir) 
    print("Local environment")

print(f"Root dir: {root_dir}")

# Add the root directory to the `PYTHONPATH` 
if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

# Set the environment variables from the file <root_dir>/.env
from mlfs import config
settings = config.HopsworksSettings(_env_file=f"{root_dir}/.env")

In [4]:
import hopsworks
from datetime import datetime, timedelta
import synth_transactions as st
from hsfs.feature import Feature
from mlfs.ccfraud.features import cc_trans_fg

last_processed_date = datetime(2025, 9, 5)
current_date = datetime(2025, 10, 5)

project = hopsworks.login()
fs = project.get_feature_store()

name = "cc_trans_fg"

trans = fs.get_feature_group("credit_card_transactions")
# cc_trans_aggs_fg = fs.get_feature_group("cc_trans_aggs_fg")
cc_fraud_fg = fs.get_feature_group("cc_fraud_fg")

cc_trans_fg = fs.get_or_create_feature_group(
    name=name,
    primary_key=["tid"],
    online_enabled=True,
    version=1,
    event_time="event_time",
    features=[        
        Feature("tid", type="bigint"),
        Feature("cc_num", type="string"),
        Feature("merchant_id", type="string"),
        Feature("amount", type="double"),
        Feature("ip_address", type="string"),
        Feature("card_present", type="double"),
        Feature("haversine_distance", type="bool"),
        Feature("time_since_last_trans", type="bigint"),
        Feature("days_to_card_expiry", type="bigint"),
        Feature("is_fraud", type="boolean"),
        Feature("event_time", type="timestamp"),
    ],
    transformations=[cc_trans_fg.time_since_last_trans]
)



2025-10-27 06:51:18,795 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-10-27 06:51:18,801 INFO: Initializing external client
2025-10-27 06:51:18,802 INFO: Base URL: https://stagingmain.devnet.hops.works:443






2025-10-27 06:51:19,531 INFO: Python Engine initialized.

Logged in to project, explore it here https://stagingmain.devnet.hops.works:443/p/119





In [None]:
try:
    cc_trans_fg.save()
except Exception as e:
    print(e)

In [None]:
trans_df = trans.read(start_time=last_processed_date)

In [None]:
# Ensure timestamps are proper datetime
trans_df["ts"] = pd.to_datetime(trans_df["ts"])

# Sort by cc_num and ts
trans_df = trans_df.sort_values(["cc_num", "ts"])

# Compute previous timestamp per cc_num
# trans_df["prev_ts"] = trans_df.groupby("cc_num")["ts"].shift(1)

trans_df["prev_ts"] = trans_df["ts"].shift(1)

# trans_df["time_since_last_trans"] = cc_trans_fg.time_since_last_trans.time_since_last_trans(trans_df['ts'], trans_df['ts'].shift(1))

In [None]:
# Check if any of the new transactions are marked as fraudulent
# Note. There is another 'batch fraud pipeline' to also update later arriving fraud updates
trans_df["is_fraud"] = trans_df["t_id"].isin(fraud_df["t_id"])

In [None]:
# This require updating existing rows
fraud_df = cc_fraud_fg.read(start_time=last_processed_date)
trans_df = trans.read()
trans_df["is_fraud"] = trans_df["t_id"].isin(fraud_df["t_id"])
# Only keep the rows where is_fraud is true, and update them 
# trans_df = trans_df[trans_df["is_fraud"]]

In [None]:
# This will also apply any on-demand transformations
cc_trans_fg.insert(df)