In [21]:
import hopsworks
from datetime import datetime

In [22]:
project = hopsworks.login()
fs = project.get_feature_store()

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://pocs.cloud.hopsworks.ai/p/125
Connected. Call `.close()` to terminate connection gracefully.


In [28]:
transactions = fs.get_feature_group("transactions", version=1)
profiles = fs.get_feature_group("profiles", version=1)
profiles_activity_5m = fs.get_feature_group("profiles_activity_5m", version=1)
profiles_last_transaction = fs.get_feature_group("profiles_last_transaction", version=1)

In [29]:
query = transactions.select(['fraud_label', 'amount', 'category', 'time_delta_t_minus_1', 'loc_delta_t_minus_1', 'outside_city']).join(
    profiles.select_all(include_primary_key=False, include_event_time=False), on='account_id'
).join(
    profiles_activity_5m.select_all(include_primary_key=False, include_event_time=False), on='account_id'
).join(
    # Use Left join type as we don't need this data in the training dataset. This is only here to populate the inference helper columns
    profiles_last_transaction.select_all(include_primary_key=False), on='account_id', join_type="left"
)

In [30]:
query.show(5)



Finished: Reading data from Hopsworks, using Hive (61.68s) 


Unnamed: 0,fraud_label,amount,category,time_delta_t_minus_1,loc_delta_t_minus_1,outside_city,cc_provider,cc_type,city,age,cc_expiration_days,count,min_amount,max_amount,mean,last_transaction_datetime,latitude,longitude
0,0,71.23,Domestic Transport,5.864687,0.112752,1,visa,debit,Brighton Beach,45.106849,1572,1,71.23,71.23,71.23,,,
1,0,36.26,Grocery,0.759375,0.422781,1,visa,debit,Brighton Beach,45.106849,1572,1,36.26,36.26,36.26,,,
2,0,97.49,Grocery,2.077928,0.036124,1,visa,debit,Brighton Beach,45.106849,1572,1,97.49,97.49,97.49,,,
3,0,67.81,Clothing,1.909352,0.018586,1,mastercard,debit,Brenham,45.490411,-42,1,67.81,67.81,67.81,,,
4,0,51.13,Grocery,3.828102,0.31758,1,mastercard,credit,Deltona,41.879452,476,1,51.13,51.13,51.13,,,


In [32]:
# Load transformation functions to encode categorical features
label_encoder = fs.get_transformation_function(name="label_encoder")

In [33]:
# Register the feature view with the feature store
fraud_model_fv = fs.create_feature_view(
    name="fraud_model_fv",
    version=1,
    description="Fraud model feature view",
    query=query,
    labels=['fraud_label'],
    inference_helper_columns=['city', 'last_transaction_datetime', 'latitude', 'longitude'],
    transformation_functions = {
        "category": label_encoder,
        "cc_provider": label_encoder,
        "cc_type": label_encoder
    },
)

Feature view created successfully, explore it at 
https://pocs.cloud.hopsworks.ai/p/125/fs/73/fv/fraud_model_fv/version/1


In [34]:
# Create training dataset for the model. 
# Split train and test data by time
train_start = datetime(year=2023, month=8, day=1, hour=0, minute=0, second=0)
train_end = datetime(year=2024, month=3, day=31, hour=0, minute=0, second=0)
test_start = datetime(year=2024, month=4, day=1, hour=0, minute=0, second=0)
test_end = datetime(year=2024, month=4, day=10, hour=0, minute=0, second=0)

fraud_model_fv.create_train_test_split(
    train_start = train_start,
    train_end = train_end,
    test_start = test_start,
    test_end = test_end,
    data_format = "csv",
    coalesce = True,
    statistics_config = {'histograms': True, 'correlations': True}
)

Training dataset job started successfully, you can follow the progress at 
https://pocs.cloud.hopsworks.ai/p/125/jobs/named/fraud_model_fv_1_create_fv_td_11042024222110/executions




(1, <hsfs.core.job.Job at 0x7f3a8d976470>)