In [2]:
import time 
notebook_start_time = time.time()

In [3]:
import sys
from pathlib import Path


def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False


def clone_repository() -> None:
    !git clone https://github.com/XayHanmonty/smart-fit-recs.git
    %cd smart-fit-recs/


def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml


if is_google_colab():
    clone_repository()
    install_dependencies()

    root_dir = str(Path().absolute())
    print("⛳️ Google Colab environment")
else:
    root_dir = str(Path().absolute().parent)
    print("⛳️ Local environment")

# Add the root directory to the `PYTHONPATH` to use the `recsys` Python module from the notebook.
if root_dir not in sys.path:
    print(f"Adding the following directory to the PYTHONPATH: {root_dir}")
    sys.path.append(root_dir)

⛳️ Local environment
Adding the following directory to the PYTHONPATH: /Users/xayhanmonty/Desktop/smart-fit-recs


In [15]:
%load_ext autoreload
%autoreload 2

import warnings

warnings.filterwarnings("ignore")

from loguru import logger

from recsys import hopsworks_integration, training
from recsys.config import settings

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
from pprint import pprint

pprint(dict(settings))


{'CUSTOMER_DATA_SIZE': <CustomerDatasetSize.SMALL: 'SMALL'>,
 'CUSTOM_HOPSWORKS_INFERENCE_ENV': 'custom_env_name',
 'FEATURES_EMBEDDING_MODEL_ID': 'all-MiniLM-L6-v2',
 'HOPSWORKS_API_KEY': SecretStr('**********'),
 'OPENAI_API_KEY': SecretStr('**********'),
 'OPENAI_MODEL_ID': 'gpt-4o-mini',
 'RANKING_DATASET_VALIDATON_SPLIT_SIZE': 0.1,
 'RANKING_EARLY_STOPPING_ROUNDS': 5,
 'RANKING_ITERATIONS': 100,
 'RANKING_LEARNING_RATE': 0.2,
 'RANKING_MODEL_TYPE': 'ranking',
 'RANKING_SCALE_POS_WEIGHT': 10,
 'RECSYS_DIR': PosixPath('/Users/xayhanmonty/Desktop/smart-fit-recs/recsys'),
 'TWO_TOWER_DATASET_TEST_SPLIT_SIZE': 0.1,
 'TWO_TOWER_DATASET_VALIDATON_SPLIT_SIZE': 0.1,
 'TWO_TOWER_LEARNING_RATE': 0.01,
 'TWO_TOWER_MODEL_BATCH_SIZE': 2048,
 'TWO_TOWER_MODEL_EMBEDDING_SIZE': 16,
 'TWO_TOWER_NUM_EPOCHS': 10,
 'TWO_TOWER_WEIGHT_DECAY': 0.001}


In [6]:
project, fs = hopsworks_integration.get_feature_store()

[32m2025-08-05 12:11:28.718[0m | [1mINFO    [0m | [36mrecsys.hopsworks_integration.feature_store[0m:[36mget_feature_store[0m:[36m13[0m - [1mLoging to Hopsworks using HOPSWORKS_API_KEY env var.[0m


2025-08-05 12:11:28,719 INFO: Initializing external client
2025-08-05 12:11:28,719 INFO: Base URL: https://c.app.hopsworks.ai:443






2025-08-05 12:11:29,956 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1239220


# Get Training Data

In [7]:
feature_view_ranking = hopsworks_integration.feature_store.create_ranking_feature_views(
    fs
)

In [8]:
X_train, X_val, y_train, y_val = feature_view_ranking.train_test_split(
    test_size=settings.RANKING_DATASET_VALIDATON_SPLIT_SIZE,
    description="Ranking training dataset",
)
X_train.head(3)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (5.99s) 




Unnamed: 0,age,product_type_name,product_group_name,graphical_appearance_name,colour_group_name,perceived_colour_value_name,perceived_colour_master_name,department_name,index_name,index_group_name,section_name,garment_group_name,month_sin,month_cos
0,21.0,Leggings/Tights,Garment Lower body,Colour blocking,Grey,Dark,Grey,Ladies Sport Bottoms,Sport,Sport,Ladies H&M Sport,Jersey Fancy,,
1,29.0,Bra,Underwear,Solid,Light Beige,Dusty Light,Beige,Casual Lingerie,Lingeries/Tights,Ladieswear,Womens Lingerie,"Under-, Nightwear",,
2,21.0,Sunglasses,Accessories,Solid,Silver,Light,Metal,Small Accessories,Menswear,Menswear,Men Accessories,Accessories,,


In [9]:
y_train.head(3)

Unnamed: 0,label
0,0
1,0
2,0


# Training the ranking model

In [17]:
model = training.ranking.RankingModelFactory.build()
trainer = training.ranking.RankingModelTrainer(
    model=model, train_dataset=(X_train, y_train), eval_dataset=(X_val, y_val)
)

In [19]:
trainer.fit()

0:	learn: 0.5148530	test: 0.5148711	best: 0.5148711 (0)	total: 16.9ms	remaining: 1.67s
1:	learn: 0.3950779	test: 0.3951065	best: 0.3951065 (1)	total: 28.5ms	remaining: 1.4s
2:	learn: 0.3093823	test: 0.3094223	best: 0.3094223 (2)	total: 49ms	remaining: 1.58s
3:	learn: 0.2456432	test: 0.2456895	best: 0.2456895 (3)	total: 64.9ms	remaining: 1.56s
4:	learn: 0.1971382	test: 0.1971903	best: 0.1971903 (4)	total: 82.3ms	remaining: 1.56s
5:	learn: 0.1596274	test: 0.1596844	best: 0.1596844 (5)	total: 105ms	remaining: 1.64s
6:	learn: 0.1302530	test: 0.1303122	best: 0.1303122 (6)	total: 125ms	remaining: 1.66s
7:	learn: 0.1070684	test: 0.1071299	best: 0.1071299 (7)	total: 145ms	remaining: 1.67s
8:	learn: 0.0886812	test: 0.0887485	best: 0.0887485 (8)	total: 167ms	remaining: 1.69s
9:	learn: 0.0740011	test: 0.0740713	best: 0.0740713 (9)	total: 177ms	remaining: 1.6s
10:	learn: 0.0622745	test: 0.0623477	best: 0.0623477 (10)	total: 196ms	remaining: 1.58s
11:	learn: 0.0528742	test: 0.0529501	best: 0.052950

<catboost.core.CatBoostClassifier at 0x33c597450>

# Evaluating the ranking model

In [22]:
metrics = trainer.evaluate(log=True)

[32m2025-08-05 17:40:55.331[0m | [1mINFO    [0m | [36mrecsys.training.ranking[0m:[36mevaluate[0m:[36m61[0m - [1m              precision    recall  f1-score   support

           0       1.00      1.00      1.00     19845
           1       0.95      1.00      0.98      1984

    accuracy                           1.00     21829
   macro avg       0.98      1.00      0.99     21829
weighted avg       1.00      1.00      1.00     21829
[0m


In [24]:
trainer.get_feature_importance()


{'month_sin': 55.32445368657355,
 'month_cos': 42.22071705284765,
 'section_name': 0.6085413330357645,
 'age': 0.3601973770296773,
 'product_type_name': 0.3239149136319295,
 'perceived_colour_master_name': 0.2971606501498949,
 'perceived_colour_value_name': 0.20539742674513603,
 'index_group_name': 0.18372159929978635,
 'index_name': 0.12657824005608703,
 'colour_group_name': 0.1101035720130017,
 'product_group_name': 0.10390734458091867,
 'garment_group_name': 0.09575954475299199,
 'graphical_appearance_name': 0.039547259283647315,
 'department_name': 0.0}

In [26]:
mr = project.get_model_registry()

In [28]:
ranking_module = hopsworks_integration.ranking_serving.HopsworksRankingModel(
    model=model
)
ranking_module.register(mr, feature_view_ranking, X_train, metrics)

  0%|          | 0/6 [00:00<?, ?it/s]

Uploading /Users/xayhanmonty/Desktop/smart-fit-recs/notebooks/ranking_model.pkl: 0.000%|          | 0/325736 e…

Uploading /Users/xayhanmonty/Desktop/smart-fit-recs/notebooks/input_example.json: 0.000%|          | 0/466 ela…

Uploading /Users/xayhanmonty/Desktop/smart-fit-recs/notebooks/model_schema.json: 0.000%|          | 0/1269 ela…

Model created, explore it at https://c.app.hopsworks.ai:443/p/1239220/models/ranking_model/1


In [29]:
notebook_end_time = time.time()
notebook_execution_time = notebook_end_time - notebook_start_time

logger.info(
    f"⌛️ Notebook Execution time: {notebook_execution_time:.2f} seconds ~ {notebook_execution_time / 60:.2f} minutes"
)

[32m2025-08-05 17:42:33.990[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1m⌛️ Notebook Execution time: 19871.35 seconds ~ 331.19 minutes[0m
