In [1]:
import time

notebook_start_time = time.time()

In [None]:
import sys
from pathlib import Path

def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False

def clone_repository() -> None:
    !git clone https://github.com/decodingml/hands-on-recommender-system.git
    %cd hands-on-recommender-system/

def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml

if is_google_colab():
    clone_repository()
    install_dependencies()

    root_dir = str(Path().absolute()) 
    print("⛳️ Google Colab environment")
else:
    root_dir = str(Path().absolute().parent)
    print("⛳️ Local environment")

# Add the root directory to the `PYTHONPATH` to use the `recsys` Python module from the notebook.
if root_dir not in sys.path:
    sys.path.append(root_dir)

## <span style="color:#ff5f27">👨🏻‍🏫 Train Ranking Model </span>

In this notebook, you will train a ranking model using gradient boosted trees. 

In [2]:
import sys
from pathlib import Path

root_dir = str(Path().absolute().parent)
if root_dir not in sys.path:
    sys.path.append(root_dir)

## <span style="color:#ff5f27">📝 Imports </span>

In [4]:
%load_ext autoreload
%autoreload 2

import warnings

warnings.filterwarnings("ignore")

from recsys import utils
from recsys.data import ranking as ranking_data
from recsys.models import ranking as ranking_model

## <span style="color:#ff5f27">🔮 Connect to Hopsworks Feature Store </span>

In [5]:
project, fs = utils.get_hopsworks_feature_store()

[32m2024-11-09 19:54:15.506[0m | [1mINFO    [0m | [36mrecsys.utils[0m:[36mget_hopsworks_feature_store[0m:[36m10[0m - [1mLoging to Hopsworks using HOPSWORKS_API_KEY env var.[0m


Connected. Call `.close()` to terminate connection gracefully.



Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/15551
Connected. Call `.close()` to terminate connection gracefully.


## <span style="color:#ff5f27">⚙️ Feature View Creation </span>

In [6]:
feature_view_ranking = ranking_data.create_feature_views(fs)

Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/15551/fs/15471/fv/customers/version/1
Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/15551/fs/15471/fv/articles/version/1
Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/15551/fs/15471/fv/ranking/version/1


## <span style="color:#ff5f27">🗄️ Train Data loading </span>

In [7]:
X_train, X_val, y_train, y_val = feature_view_ranking.train_test_split(
    test_size=0.1,
    description="Ranking training dataset",
)

X_train.head(3)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (96.91s) 



Unnamed: 0,age,month_sin,month_cos,product_type_name,product_group_name,graphical_appearance_name,colour_group_name,perceived_colour_value_name,perceived_colour_master_name,department_name,index_name,index_group_name,section_name,garment_group_name
1,21.0,-0.866025,-0.5,Trousers,Garment Lower body,Solid,Black,Dark,Black,Trousers,Divided,Divided,Divided Collection,Trousers
2,23.0,-0.5,-0.866025,Leggings/Tights,Garment Lower body,Check,Grey,Medium Dusty,Grey,Jersey fancy,Ladieswear,Ladieswear,Womens Everyday Collection,Jersey Fancy
3,40.0,0.5,-0.866025,Top,Garment Upper body,Front print,Light Beige,Dusty Light,Beige,Jersey Fancy DS,Divided,Divided,Divided Selected,Jersey Fancy


In [8]:
y_train.head(3)

Unnamed: 0,label
1,0
2,1
3,0


## <span style="color:#ff5f27">🏃🏻‍♂️ Model Training </span>

Let's train a model.

In [9]:
model = ranking_model.RankingModelFactory.build()
trainer = ranking_model.RankingModelTrainer(
    model=model,
    train_dataset=(X_train, y_train),
    eval_dataset=(X_val, y_val)
)

In [10]:
trainer.fit()

0:	learn: 0.6863383	test: 0.6870174	best: 0.6870174 (0)	total: 87.2ms	remaining: 8.63s
1:	learn: 0.6801507	test: 0.6816104	best: 0.6816104 (1)	total: 113ms	remaining: 5.53s
2:	learn: 0.6760633	test: 0.6783908	best: 0.6783908 (2)	total: 136ms	remaining: 4.4s
3:	learn: 0.6739152	test: 0.6761718	best: 0.6761718 (3)	total: 160ms	remaining: 3.85s
4:	learn: 0.6706339	test: 0.6733373	best: 0.6733373 (4)	total: 183ms	remaining: 3.47s
5:	learn: 0.6671781	test: 0.6709504	best: 0.6709504 (5)	total: 206ms	remaining: 3.22s
6:	learn: 0.6640554	test: 0.6680397	best: 0.6680397 (6)	total: 229ms	remaining: 3.04s
7:	learn: 0.6617355	test: 0.6664765	best: 0.6664765 (7)	total: 255ms	remaining: 2.94s
8:	learn: 0.6606841	test: 0.6659752	best: 0.6659752 (8)	total: 279ms	remaining: 2.82s
9:	learn: 0.6580258	test: 0.6641489	best: 0.6641489 (9)	total: 341ms	remaining: 3.07s
10:	learn: 0.6560304	test: 0.6628030	best: 0.6628030 (10)	total: 366ms	remaining: 2.96s
11:	learn: 0.6554997	test: 0.6624109	best: 0.6624109

<catboost.core.CatBoostClassifier at 0x31be9ef90>

## <span style="color:#ff5f27">👮🏻‍♂️ Model Validation </span>

Next, you'll evaluate how well the model performs on the validation data.

In [11]:
metrics = trainer.evaluate(log=True)

[32m2024-11-09 19:56:09.526[0m | [1mINFO    [0m | [36mrecsys.models.ranking[0m:[36mevaluate[0m:[36m63[0m - [1m              precision    recall  f1-score   support

           0       0.95      0.68      0.79     20881
           1       0.14      0.57      0.23      1928

    accuracy                           0.67     22809
   macro avg       0.54      0.63      0.51     22809
weighted avg       0.88      0.67      0.74     22809
[0m


It can be seen that the model has a low F1-score on the positive class (higher is better). The performance could potentially be improved by adding more features to the dataset, e.g. image embeddings.

Let's see which features your model considers important.

In [12]:
trainer.get_feature_importance()

{'month_cos': 15.264649738767623,
 'age': 14.465886561230931,
 'product_group_name': 8.726719856948513,
 'garment_group_name': 8.452724737526188,
 'section_name': 8.060233962820636,
 'index_name': 7.251644870475382,
 'product_type_name': 7.0335310334814976,
 'month_sin': 6.883970179700253,
 'department_name': 5.9185956236219415,
 'graphical_appearance_name': 4.525367093615201,
 'perceived_colour_master_name': 3.723373381430584,
 'perceived_colour_value_name': 3.634311803645231,
 'index_group_name': 3.15608161196144,
 'colour_group_name': 2.9029095447745243}

It can be seen that the model places high importance on user and item embedding features. Consequently, better trained embeddings could yield a better ranking model.

Finally, you'll save your model.

### <span style="color:#ff5f27">💾  Upload Model to Model Registry </span>

You'll upload the model to the Hopsworks Model Registry.

In [13]:
# Connect to Hopsworks Model Registry
mr = project.get_model_registry()

Connected. Call `.close()` to terminate connection gracefully.


In [14]:
ranking_module = ranking_model.RankingModelModule(model=model)
ranking_module.save_to_hopsworks(mr, X_train, y_train, metrics)

  0%|          | 0/6 [00:00<?, ?it/s]

Uploading: 0.000%|          | 0/1628962 elapsed<00:00 remaining<?

Uploading: 0.000%|          | 0/459 elapsed<00:00 remaining<?

Uploading: 0.000%|          | 0/1274 elapsed<00:00 remaining<?

Model created, explore it at https://c.app.hopsworks.ai:443/p/15551/models/ranking_model/1


---

In [15]:
# End the timer
notebook_end_time = time.time()

# Calculate and print the execution time
notebook_execution_time = notebook_end_time - notebook_start_time
print(f"⌛️ Notebook Execution time: {notebook_execution_time:.2f} seconds")

⌛️ Notebook Execution time: 132.17 seconds


---
## <span style="color:#ff5f27">⏩️ Next Steps </span>

Now you have trained both a retrieval and a ranking model, which will allow you to generate recommendations for users. In the next notebook, you'll take a look at how you can deploy these models with the `HSML` library.