In [4]:
import time

notebook_start_time = time.time()

In [5]:
import sys
from pathlib import Path


def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False


def clone_repository() -> None:
    !git clone https://github.com/XayHanmonty/smart-fit-recs.git
    %cd smart-fit-recs/


def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml


if is_google_colab():
    clone_repository()
    install_dependencies()

    root_dir = str(Path().absolute())
    print("⛳️ Google Colab environment")
else:
    root_dir = str(Path().absolute().parent)
    print("⛳️ Local environment")

# Add the root directory to the `PYTHONPATH` to use the `recsys` Python module from the notebook.
if root_dir not in sys.path:
    print(f"Adding the following directory to the PYTHONPATH: {root_dir}")
    sys.path.append(root_dir)

⛳️ Local environment
Adding the following directory to the PYTHONPATH: /Users/xayhanmonty/Desktop/smart-fit-recs


# Import Libraries

In [45]:
%load_ext autoreload
%autoreload 2

import warnings
from pprint import pprint

import polars as pl
import torch
from loguru import logger
from sentence_transformers import SentenceTransformer

warnings.filterwarnings("ignore")

from recsys import hopsworks_integration
from recsys.config import settings
from recsys.features.articles import (
    compute_features_articles,
    generate_embeddings_for_dataframe
)

from recsys.features.customers import DatasetSampler, compute_features_customers
from recsys.features.transactions import compute_features_transactions
from recsys.features.interaction import generate_interaction_data
from recsys.features.ranking import compute_ranking_dataset
from recsys.hopsworks_integration import feature_store
from recsys.raw_data_sources import h_m as h_and_m_raw_data

pprint(dict(settings))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
{'CUSTOMER_DATA_SIZE': <CustomerDatasetSize.SMALL: 'SMALL'>,
 'CUSTOM_HOPSWORKS_INFERENCE_ENV': 'custom_env_name',
 'FEATURES_EMBEDDING_MODEL_ID': 'all-MiniLM-L6-v2',
 'HOPSWORKS_API_KEY': SecretStr('**********'),
 'OPENAI_API_KEY': SecretStr('**********'),
 'OPENAI_MODEL_ID': 'gpt-4o-mini',
 'RANKING_EARLY_STOPPING_ROUNDS': 5,
 'RANKING_ITERATIONS': 100,
 'RANKING_LEARNING_RATE': 0.2,
 'RANKING_MODEL_TYPE': 'ranking',
 'RANKING_SCALE_POS_WEIGHT': 10,
 'RECSYS_DIR': PosixPath('/Users/xayhanmonty/Desktop/smart-fit-recs/recsys'),
 'TWO_TOWER_DATASET_TEST_SPLIT_SIZE': 0.1,
 'TWO_TOWER_DATASET_VALIDATON_SPLIT_SIZE': 0.1,
 'TWO_TOWER_LEARNING_RATE': 0.01,
 'TWO_TOWER_MODEL_BATCH_SIZE': 2048,
 'TWO_TOWER_MODEL_EMBEDDING_SIZE': 16,
 'TWO_TOWER_NUM_EPOCHS': 10,
 'TWO_TOWER_WEIGHT_DECAY': 0.001}


In [7]:
DatasetSampler.get_supported_sizes()

{<CustomerDatasetSize.LARGE: 'LARGE'>: 50000,
 <CustomerDatasetSize.MEDIUM: 'MEDIUM'>: 5000,
 <CustomerDatasetSize.SMALL: 'SMALL'>: 1000}

# Connect to Hopsworks Feature Store

In [8]:
project, fs = hopsworks_integration.get_feature_store()

[32m2025-07-27 19:09:50.454[0m | [1mINFO    [0m | [36mrecsys.hopsworks_integration.feature_store[0m:[36mget_feature_store[0m:[36m10[0m - [1mLoging to Hopsworks using HOPSWORKS_API_KEY env var.[0m


2025-07-27 19:09:50,455 INFO: Initializing external client
2025-07-27 19:09:50,455 INFO: Base URL: https://c.app.hopsworks.ai:443






2025-07-27 19:09:51,915 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1239220


# Articles data

In [9]:
articles_df = h_and_m_raw_data.extract_articles_df()
articles_df.shape

(105542, 25)

In [10]:
articles_df.head(3)

article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,perceived_colour_value_id,perceived_colour_value_name,perceived_colour_master_id,perceived_colour_master_name,department_no,department_name,index_code,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc
i64,i64,str,i64,str,str,i64,str,i64,str,i64,str,i64,str,i64,str,str,str,i64,str,i64,str,i64,str,str
108775015,108775,"""Strap top""",253,"""Vest top""","""Garment Upper body""",1010016,"""Solid""",9,"""Black""",4,"""Dark""",5,"""Black""",1676,"""Jersey Basic""","""A""","""Ladieswear""",1,"""Ladieswear""",16,"""Womens Everyday Basics""",1002,"""Jersey Basic""","""Jersey top with narrow shoulde…"
108775044,108775,"""Strap top""",253,"""Vest top""","""Garment Upper body""",1010016,"""Solid""",10,"""White""",3,"""Light""",9,"""White""",1676,"""Jersey Basic""","""A""","""Ladieswear""",1,"""Ladieswear""",16,"""Womens Everyday Basics""",1002,"""Jersey Basic""","""Jersey top with narrow shoulde…"
108775051,108775,"""Strap top (1)""",253,"""Vest top""","""Garment Upper body""",1010017,"""Stripe""",11,"""Off White""",1,"""Dusty Light""",9,"""White""",1676,"""Jersey Basic""","""A""","""Ladieswear""",1,"""Ladieswear""",16,"""Womens Everyday Basics""",1002,"""Jersey Basic""","""Jersey top with narrow shoulde…"


In [11]:
articles_df.null_count()

article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,perceived_colour_value_id,perceived_colour_value_name,perceived_colour_master_id,perceived_colour_master_name,department_no,department_name,index_code,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,416


# Articles feature engineering

In [12]:
articles_df = compute_features_articles(articles_df)
articles_df.shape

(105542, 27)

In [13]:
articles_df.head(3)

article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,perceived_colour_value_id,perceived_colour_value_name,perceived_colour_master_id,perceived_colour_master_name,department_no,department_name,index_code,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,prod_name_length,article_description,image_url
str,i64,str,i64,str,str,i64,str,i64,str,i64,str,i64,str,i64,str,str,str,i64,str,i64,str,i64,str,u32,str,str
"""108775015""",108775,"""Strap top""",253,"""Vest top""","""Garment Upper body""",1010016,"""Solid""",9,"""Black""",4,"""Dark""",5,"""Black""",1676,"""Jersey Basic""","""A""","""Ladieswear""",1,"""Ladieswear""",16,"""Womens Everyday Basics""",1002,"""Jersey Basic""",9,"""Strap top - Vest top in Garmen…","""https://repo.hops.works/dev/jd…"
"""108775044""",108775,"""Strap top""",253,"""Vest top""","""Garment Upper body""",1010016,"""Solid""",10,"""White""",3,"""Light""",9,"""White""",1676,"""Jersey Basic""","""A""","""Ladieswear""",1,"""Ladieswear""",16,"""Womens Everyday Basics""",1002,"""Jersey Basic""",9,"""Strap top - Vest top in Garmen…","""https://repo.hops.works/dev/jd…"
"""108775051""",108775,"""Strap top (1)""",253,"""Vest top""","""Garment Upper body""",1010017,"""Stripe""",11,"""Off White""",1,"""Dusty Light""",9,"""White""",1676,"""Jersey Basic""","""A""","""Ladieswear""",1,"""Ladieswear""",16,"""Womens Everyday Basics""",1002,"""Jersey Basic""",13,"""Strap top (1) - Vest top in Ga…","""https://repo.hops.works/dev/jd…"


# Create embeddings from the articles description

In [14]:
for i, desc in enumerate(articles_df["article_description"].head(n=3)):
    logger.info(f"Item {i+1}:\n{desc}")

[32m2025-07-27 19:09:58.158[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mItem 1:
Strap top - Vest top in Garment Upper body
Apperance:Solid
Color: Dark Black (Black)
Category: Ladieswear - Womens Everyday Basics - Jersey Basic
Details:Jersey top with narrow shoulder straps.[0m
[32m2025-07-27 19:09:58.158[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mItem 2:
Strap top - Vest top in Garment Upper body
Apperance:Solid
Color: Light White (White)
Category: Ladieswear - Womens Everyday Basics - Jersey Basic
Details:Jersey top with narrow shoulder straps.[0m
[32m2025-07-27 19:09:58.158[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mItem 3:
Strap top (1) - Vest top in Garment Upper body
Apperance:Stripe
Color: Dusty Light White (Off White)
Category: Ladieswear - Womens Everyday Basics - Jersey Basic
Details:Jersey top with narrow shoulder straps.[0m


In [15]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
logger.info(
    f"Loading '{settings.FEATURES_EMBEDDING_MODEL_ID}' embedding model to {device=}"
)

# Load the embedding model from SentenceTransformer's model registry.
model = SentenceTransformer(settings.FEATURES_EMBEDDING_MODEL_ID, device=device)

[32m2025-07-27 19:09:58.195[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mLoading 'all-MiniLM-L6-v2' embedding model to device='mps'[0m


2025-07-27 19:09:58,196 INFO: Load pretrained SentenceTransformer: all-MiniLM-L6-v2


In [16]:
articles_df = generate_embeddings_for_dataframe(
    articles_df, "article_description", model, batch_size=128
)  # Reduce batch size if getting OOM errors.

Generating embedding:   0%|          | 0/105542 [00:00<?, ?it/s]

In [17]:
articles_df[["article_description", "embeddings"]].head(3)

article_description,embeddings
str,list[f64]
"""Strap top - Vest top in Garmen…","[-0.029346, 0.080771, … 0.027139]"
"""Strap top - Vest top in Garmen…","[-0.012484, 0.088007, … 0.027018]"
"""Strap top (1) - Vest top in Ga…","[-0.028718, 0.08914, … 0.022598]"


# Looking at image links

In [18]:
articles_df["image_url"][0]

'https://repo.hops.works/dev/jdowling/h-and-m/images/010/0108775015.jpg'

In [19]:
from IPython.display import HTML, display

image_urls = articles_df["image_url"].tail(12).to_list()
grid_html = '<div style="display: grid; grid-template-columns: repeat(6, 1fr); gap: 10px; max-width: 900px;">'

for url in image_urls:
    grid_html += f'<img src="{url}" style="width: 100%; height: auto;">'

grid_html += "</div>"

display(HTML(grid_html))

# Customer Data

In [20]:
customers_df = h_and_m_raw_data.extract_customers_df()
customers_df.shape

(1371980, 7)

In [21]:
customers_df.head(3)

customer_id,FN,Active,club_member_status,fashion_news_frequency,age,postal_code
str,f64,f64,str,str,i64,str
"""00000dbacae5abe5e23885899a1fa4…",,,"""ACTIVE""","""NONE""",49,"""52043ee2162cf5aa7ee79974281641…"
"""0000423b00ade91418cceaf3b26c6a…",,,"""ACTIVE""","""NONE""",25,"""2973abc54daa8a5f8ccfe9362140c6…"
"""000058a12d5b43e67d225668fa1f8d…",,,"""ACTIVE""","""NONE""",24,"""64f17e6a330a85798e4998f62d0930…"


In [22]:
customers_df.null_count()

customer_id,FN,Active,club_member_status,fashion_news_frequency,age,postal_code
u32,u32,u32,u32,u32,u32,u32
0,895050,907576,6062,16009,15861,0


# Customers feature engineering

In [23]:
customers_df = compute_features_customers(customers_df, drop_null_age=True)
customers_df.shape

(1356119, 5)

In [24]:
customers_df.head(3)

customer_id,club_member_status,age,postal_code,age_group
str,str,f64,str,str
"""00000dbacae5abe5e23885899a1fa4…","""ACTIVE""",49.0,"""52043ee2162cf5aa7ee79974281641…","""46-55"""
"""0000423b00ade91418cceaf3b26c6a…","""ACTIVE""",25.0,"""2973abc54daa8a5f8ccfe9362140c6…","""19-25"""
"""000058a12d5b43e67d225668fa1f8d…","""ACTIVE""",24.0,"""64f17e6a330a85798e4998f62d0930…","""19-25"""


# Transactional Data

In [25]:
transactions_df = h_and_m_raw_data.extract_transactions_df()
transactions_df.shape

(31788324, 5)

In [26]:
transactions_df.head(3)

t_dat,customer_id,article_id,price,sales_channel_id
date,str,i64,f64,i64
2018-09-20,"""000058a12d5b43e67d225668fa1f8d…",663713001,0.050831,2
2018-09-20,"""000058a12d5b43e67d225668fa1f8d…",541518023,0.030492,2
2018-09-20,"""00007d2de826758b65a93dd24ce629…",505221004,0.015237,2


# Transactions feature engineering

In [27]:
transactions_df = compute_features_transactions(transactions_df)
transactions_df.shape

(31788324, 9)

In [28]:
transactions_df.head(3)

t_dat,customer_id,article_id,price,sales_channel_id,year,month,day,day_of_week
i64,str,str,f64,i64,i32,i8,i8,i8
0,"""000058a12d5b43e67d225668fa1f8d…","""663713001""",0.050831,2,2018,9,20,4
0,"""000058a12d5b43e67d225668fa1f8d…","""541518023""",0.030492,2,2018,9,20,4
0,"""00007d2de826758b65a93dd24ce629…","""505221004""",0.015237,2,2018,9,20,4


In [29]:
sampler = DatasetSampler(size=settings.CUSTOMER_DATA_SIZE)
dataset_subset = sampler.sample(
    customers_df=customers_df, transations_df=transactions_df
)
customers_df = dataset_subset["customers"]
transactions_df = dataset_subset["transactions"]

[32m2025-07-27 19:19:08.064[0m | [1mINFO    [0m | [36mrecsys.features.customers[0m:[36msample[0m:[36m46[0m - [1mSampling 1000 customers.[0m
[32m2025-07-27 19:19:08.098[0m | [1mINFO    [0m | [36mrecsys.features.customers[0m:[36msample[0m:[36m49[0m - [1mNumber of transactions for all the customers: 31788324[0m
[32m2025-07-27 19:19:09.087[0m | [1mINFO    [0m | [36mrecsys.features.customers[0m:[36msample[0m:[36m55[0m - [1mNumber of transactions for the 1000 sampled customers: 23799[0m


In [30]:
transactions_df.shape

(23799, 9)

In [31]:
for customer_id in transactions_df["customer_id"].unique().head(10):
    logger.info(f"Logging customer ID: {customer_id}")

[32m2025-07-27 19:19:09.267[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mLogging customer ID: 4314540251fce1cb1dbbecae592f959ec774759dbfd13d73ab6230348c4513ab[0m
[32m2025-07-27 19:19:09.267[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mLogging customer ID: 67c81441a40094537ef614cad873120c6b6bc19d6c308539f236633551772ac3[0m
[32m2025-07-27 19:19:09.267[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mLogging customer ID: 144aec58715a58d1ec4b2ed531c69f931f8159e83f1068ebd6d670eb1bdfc58a[0m
[32m2025-07-27 19:19:09.268[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mLogging customer ID: 42e0cd39fc3ce1bc1551a944e2cfcd5a36dc8a9385d5a9d2487fef53fe3152c6[0m
[32m2025-07-27 19:19:09.268[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mLogging customer ID: 4a19e05770e51462c3b0bfeb09c0fae2020086130b5c798c14ac4d2d3f82e537[0m
[32m2025-07-27

# Interaction Data

In [32]:
interaction_df = generate_interaction_data(transactions_df)
interaction_df.shape

Processing customer chunks: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.98s/it]


(135300, 5)

In [33]:
interaction_df.head()

t_dat,customer_id,article_id,interaction_score,prev_article_id
i64,str,str,i64,str
-381600000,"""00b203a32faa3d007dba198ef27c15…","""887937001""",0,"""START"""
-370800000,"""00b203a32faa3d007dba198ef27c15…","""721546001""",0,"""887937001"""
-367200000,"""00b203a32faa3d007dba198ef27c15…","""817198001""",0,"""721546001"""
-367200000,"""00b203a32faa3d007dba198ef27c15…","""820613003""",0,"""817198001"""
-360000000,"""00b203a32faa3d007dba198ef27c15…","""680553001""",0,"""820613003"""


In [34]:
interaction_df.group_by("interaction_score").agg(
    pl.count("interaction_score").alias("total_interactions")
)

interaction_score,total_interactions
i64,u32
0,73239
1,38262
2,23799


# Hopsworks Feature Groups

In [36]:
logger.info("Uploading 'customers' Feature Group to Hopsworks.")
customers_fg = feature_store.create_customers_feature_group(
    fs, df=customers_df, online_enabled=True
)

logger.info("✅ Uploaded 'customers' Feature Group to Hopsworks!")

[32m2025-07-27 19:22:11.409[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mUploading 'customers' Feature Group to Hopsworks.[0m
Uploading Dataframe: 100.00% |██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| Rows 1000/1000 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: customers_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1239220/jobs/named/customers_1_offline_fg_materialization/executions
2025-07-27 19:22:26,163 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-07-27 19:22:29,313 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-07-27 19:23:51,129 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2025-07-27 19:23:51,250 INFO: Waiting for log aggregation to finish.
2025-07-27 19:23:59,799 INFO: Execution finished successfully.


Online data ingestion progress: 0.00% |          | Rows 0/1000

[32m2025-07-27 19:24:03.042[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1m✅ Uploaded 'customers' Feature Group to Hopsworks![0m


In [40]:
logger.info("Uploading 'articles' Feature Group to Hopsworks.")
articles_fg = feature_store.create_articles_feature_group(
    fs,
    df=articles_df,
    articles_description_embedding_dim=model.get_sentence_embedding_dimension(),
    online_enabled=True,
)
logger.info("✅ Uploaded 'articles' Feature Group to Hopsworks!")

[32m2025-07-27 19:25:44.926[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mUploading 'articles' Feature Group to Hopsworks.[0m


Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1239220/fs/1222754/fg/1495735


Uploading Dataframe: 100.00% |██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| Rows 105542/105542 | Elapsed Time: 00:52 | Remaining Time: 00:00


Launching job: articles_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1239220/jobs/named/articles_1_offline_fg_materialization/executions
2025-07-27 19:26:54,141 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-07-27 19:26:57,293 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-07-27 19:30:02,750 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2025-07-27 19:30:02,880 INFO: Waiting for log aggregation to finish.
2025-07-27 19:30:11,358 INFO: Execution finished successfully.


Online data ingestion progress: 0.00% |          | Rows 0/105542

[32m2025-07-27 19:30:11.564[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1m✅ Uploaded 'articles' Feature Group to Hopsworks![0m


In [41]:
logger.info("Uploading 'transactions' Feature Group to Hopsworks.")
trans_fg = feature_store.create_transactions_feature_group(
    fs=fs, df=transactions_df, online_enabled=True
)
logger.info("✅ Uploaded 'transactions' Feature Group to Hopsworks!")

[32m2025-07-27 19:30:14.056[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mUploading 'transactions' Feature Group to Hopsworks.[0m


Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1239220/fs/1222754/fg/1495736


Uploading Dataframe: 100.00% |████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| Rows 23799/23799 | Elapsed Time: 00:02 | Remaining Time: 00:00


Launching job: transactions_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1239220/jobs/named/transactions_1_offline_fg_materialization/executions
2025-07-27 19:30:33,443 INFO: Waiting for execution to finish. Current state: INITIALIZING. Final status: UNDEFINED
2025-07-27 19:30:36,585 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-07-27 19:32:13,973 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2025-07-27 19:32:14,106 INFO: Waiting for log aggregation to finish.
2025-07-27 19:32:22,587 INFO: Execution finished successfully.


Online data ingestion progress: 0.00% |          | Rows 0/23799

[32m2025-07-27 19:32:29.404[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1m✅ Uploaded 'transactions' Feature Group to Hopsworks![0m


In [42]:
logger.info("Uploading 'interactions' Feature Group to Hopsworks.")
interactions_fg = feature_store.create_interactions_feature_group(
    fs=fs, df=interaction_df, online_enabled=True
)
logger.info("✅ Uploaded 'interactions' Feature Group to Hopsworks!!")

[32m2025-07-27 19:32:29.647[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mUploading 'interactions' Feature Group to Hopsworks.[0m


Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1239220/fs/1222754/fg/1495737


Uploading Dataframe: 100.00% |██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| Rows 135300/135300 | Elapsed Time: 00:07 | Remaining Time: 00:00


Launching job: interactions_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1239220/jobs/named/interactions_1_offline_fg_materialization/executions
2025-07-27 19:32:51,936 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-07-27 19:32:55,083 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-07-27 19:34:41,985 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2025-07-27 19:34:42,110 INFO: Waiting for log aggregation to finish.
2025-07-27 19:34:53,826 INFO: Execution finished successfully.


Online data ingestion progress: 0.00% |          | Rows 0/135300

[32m2025-07-27 19:34:57.240[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1m✅ Uploaded 'interactions' Feature Group to Hopsworks!![0m


In [47]:
ranking_df = compute_ranking_dataset(
    trans_fg,
    articles_fg,
    customers_fg,
)
ranking_df.shape

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.19s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (6.19s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.59s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (22.14s) 


(224136, 15)

In [48]:
ranking_df.head(3)

customer_id,age,article_id,label,product_type_name,product_group_name,graphical_appearance_name,colour_group_name,perceived_colour_value_name,perceived_colour_master_name,department_name,index_name,index_group_name,section_name,garment_group_name
str,f64,str,i32,str,str,str,str,str,str,str,str,str,str,str
"""34d30dcece38ac652e9fd05285d94d…",64.0,"""839496003""",1,"""Top""","""Garment Upper body""","""All over pattern""","""Beige""","""Dark""","""Beige""","""Jersey fancy""","""Ladieswear""","""Ladieswear""","""Womens Everyday Collection""","""Jersey Fancy"""
"""91a30617b9a5b5ff3927779204a176…",49.0,"""777093001""",1,"""Jumpsuit/Playsuit""","""Garment Full body""","""All over pattern""","""Dark Blue""","""Dark""","""Blue""","""Young Girl Dresses""","""Children Sizes 134-170""","""Baby/Children""","""Young Girl""","""Dresses/Skirts girls"""
"""23eeb5e9595c9409031f21a9c01fa3…",25.0,"""875719003""",1,"""Trousers""","""Garment Lower body""","""Solid""","""Beige""","""Dusty Light""","""Mole""","""Trousers DS""","""Divided""","""Divided""","""Divided Selected""","""Trousers"""


In [49]:
ranking_df.get_column("label").value_counts()

label,count
i32,u32
0,203760
1,20376


In [51]:
logger.info("Uploading 'ranking' Feature Group to Hopsworks.")
rank_fg = feature_store.create_ranking_feature_group(
    fs,
    df=ranking_df,
    parents=[articles_fg, customers_fg, trans_fg],
    online_enabled=False
)
logger.info("✅ Uploaded 'ranking' Feature Group to Hopsworks!!")

[32m2025-07-27 22:13:10.067[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mUploading 'ranking' Feature Group to Hopsworks.[0m


Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1239220/fs/1222754/fg/1497463


Uploading Dataframe: 100.00% |██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| Rows 224136/224136 | Elapsed Time: 00:15 | Remaining Time: 00:00


Launching job: ranking_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1239220/jobs/named/ranking_1_offline_fg_materialization/executions
2025-07-27 22:13:41,897 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-07-27 22:13:48,198 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-07-27 22:15:47,799 INFO: Waiting for execution to finish. Current state: SUCCEEDING. Final status: UNDEFINED
2025-07-27 22:15:50,952 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2025-07-27 22:15:51,269 INFO: Waiting for log aggregation to finish.
2025-07-27 22:15:51,271 INFO: Execution finished successfully.


[32m2025-07-27 22:16:00.125[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1m✅ Uploaded 'ranking' Feature Group to Hopsworks!![0m


In [52]:
notebook_end_time = time.time()
notebook_execution_time = notebook_end_time - notebook_start_time

logger.info(
    f"⌛️ Notebook Execution time: {notebook_execution_time:.2f} seconds ~ {notebook_execution_time / 60:.2f} minutes"
)

[32m2025-07-27 22:16:00.429[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1m⌛️ Notebook Execution time: 11170.66 seconds ~ 186.18 minutes[0m
