# Baseline Popular Recommender
As with any ML project, we start with a non-ML approach as our baseline. It's very common in RecSys project to have popular recommender as not only a benchmark but also an actual component (retrieval stage).

# Set up

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
from typing import Literal

import pandas as pd
from dotenv import load_dotenv
from loguru import logger
from pydantic import BaseModel

import mlflow

sys.path.insert(0, "..")

from src.eval import (create_label_df, log_classification_metrics,
                      log_ranking_metrics, merge_recs_with_target)
from src.viz import custom_style_plotly

load_dotenv()
custom_style_plotly()

# Controller

In [3]:
class Args(BaseModel):
    testing: bool = False
    log_to_mlflow: bool = True
    experiment_name: str = "Retrieve - Binary"
    run_name: str = "001-baseline-popular"
    notebook_persist_dp: str = None
    random_seed: int = 41

    user_col: str = "user_id"
    item_col: str = "parent_asin"
    rating_col: str = "rating"
    timestamp_col: str = "timestamp"

    top_k_retrieve: int = 100
    top_k_rerank: int = 10

    label_format: Literal["binary", "rating"] = "binary"
    batch_size: int = 128

    def init(self):
        self.notebook_persist_dp = os.path.abspath(f"data/{self.run_name}")

        if not os.environ.get("MLFLOW_TRACKING_URI"):
            logger.warning(
                "Environment variable MLFLOW_TRACKING_URI is not set. Setting self.log_to_mlflow to false."
            )
            self.log_to_mlflow = False

        if self.log_to_mlflow:
            logger.info(
                f"Setting up MLflow experiment {self.experiment_name} - run {self.run_name}..."
            )

            mlflow.set_experiment(self.experiment_name)
            mlflow.start_run(run_name=self.run_name)

        return self


args = Args().init()

print(args.model_dump_json(indent=2))

[32m2025-03-08 21:10:32.568[0m | [1mINFO    [0m | [36m__main__[0m:[36minit[0m:[36m30[0m - [1mSetting up MLflow experiment Retrieve - Binary - run 001-baseline-popular...[0m


{
  "testing": false,
  "log_to_mlflow": true,
  "experiment_name": "Retrieve - Binary",
  "run_name": "001-baseline-popular",
  "notebook_persist_dp": "/home/dvq/frostmourne/recsys-blog/1-seq-model/notebooks/data/001-baseline-popular",
  "random_seed": 41,
  "user_col": "user_id",
  "item_col": "parent_asin",
  "rating_col": "rating",
  "timestamp_col": "timestamp",
  "top_k_retrieve": 100,
  "top_k_rerank": 10,
  "label_format": "binary",
  "batch_size": 128
}


# Prep data

In [4]:
train_df = pd.read_parquet("../data/train_features_neg_df.parquet")
val_df = pd.read_parquet("../data/val_features_neg_df.parquet")

# Implement

In [5]:
popular_items_df = (
    train_df.groupby(args.item_col, as_index=False)
    .size()
    .assign(
        score=lambda df: df["size"] / df["size"].max(),
        rec_ranking=lambda df: df["score"]
        .rank(method="first", ascending=False)
        .astype(int),
    )
    .sort_values(["rec_ranking"], ascending=[True])
)

top_popular_items_df = popular_items_df.head(args.top_k_retrieve)

top_popular_items_df

Unnamed: 0,parent_asin,size,score,rec_ranking
5342,B00L9B7IKE,2401,1.000000,1
5260,B00JO8PEN2,1634,0.680550,2
4340,B006LSZECO,1499,0.624323,3
4924,B00DPM7TIG,1205,0.501874,4
4855,B00CNQ7HAU,1151,0.479384,5
...,...,...,...,...
4670,B00AESRRQS,293,0.122032,96
2933,1476746583,291,0.121200,97
3733,B001NHNG64,290,0.120783,98
6882,B07FTBNVGK,290,0.120783,99


In [6]:
recommendations_df = (
    val_df[[args.user_col]]
    .drop_duplicates()
    .assign(tmp=1)
    .pipe(
        lambda df: pd.merge(
            df, top_popular_items_df.assign(tmp=1), on="tmp", how="left"
        )
    )[[args.user_col, args.item_col, "score", "rec_ranking"]]
)

recommendations_df

Unnamed: 0,user_id,parent_asin,score,rec_ranking
0,AE23RLRV25THT7OZM4T4ZJ4BMYCA,B00L9B7IKE,1.000000,1
1,AE23RLRV25THT7OZM4T4ZJ4BMYCA,B00JO8PEN2,0.680550,2
2,AE23RLRV25THT7OZM4T4ZJ4BMYCA,B006LSZECO,0.624323,3
3,AE23RLRV25THT7OZM4T4ZJ4BMYCA,B00DPM7TIG,0.501874,4
4,AE23RLRV25THT7OZM4T4ZJ4BMYCA,B00CNQ7HAU,0.479384,5
...,...,...,...,...
173595,AGHGJ3YAYSJFOKVHV4WMBFLYRQNA,B00AESRRQS,0.122032,96
173596,AGHGJ3YAYSJFOKVHV4WMBFLYRQNA,1476746583,0.121200,97
173597,AGHGJ3YAYSJFOKVHV4WMBFLYRQNA,B001NHNG64,0.120783,98
173598,AGHGJ3YAYSJFOKVHV4WMBFLYRQNA,B07FTBNVGK,0.120783,99


# Evaluate

## Ranking metrics

In [7]:
# In case we wonder what if we measure Popular Recommender ranking metrics on the basis that the label is binary
# to have a fair comparison with our models
if args.label_format == "binary":
    val_df = val_df.assign(
        **{args.rating_col: lambda df: df[args.rating_col].gt(0).astype(float)}
    )

In [8]:
label_df = create_label_df(
    val_df,
    user_col=args.user_col,
    item_col=args.item_col,
    rating_col=args.rating_col,
    timestamp_col=args.timestamp_col,
)
label_df

Unnamed: 0,user_id,parent_asin,rating,rating_rank
2768,AHZ7HC4ESN2UIVTRF7Y6JEAV3YLA,B07NXPN3B8,1.0,1.0
449,AGJDLDBDZIFH4AP7LOU6EWOBGCLA,B005OCYRGW,1.0,1.0
6758,AERWHGDBA6B3XDADBXDKSBQZOW4A,B01HSER828,1.0,1.0
5010,AH3O64L723RANWIFRFMXSVYBRANQ,B00DPM90C4,1.0,1.0
3645,AENZPBHRYWAM7YWB6JPBPSYIHCPA,B07QYY1NN5,1.0,1.0
...,...,...,...,...
4100,AGJMFI2X7BIY4WXPW6T76SKYGCKQ,B000SEGKM0,0.0,66.0
2765,AGJMFI2X7BIY4WXPW6T76SKYGCKQ,B00DPM90C4,0.0,67.0
3436,AGJMFI2X7BIY4WXPW6T76SKYGCKQ,B075HYJF6X,0.0,68.0
5036,AGJMFI2X7BIY4WXPW6T76SKYGCKQ,B000W939IO,0.0,69.0


In [9]:
eval_df = merge_recs_with_target(
    recommendations_df,
    label_df,
    k=args.top_k_retrieve,
    user_col=args.user_col,
    item_col=args.item_col,
    rating_col=args.rating_col,
)
eval_df

Unnamed: 0,user_id,parent_asin,score,rec_ranking,rating,rating_rank
44,AE23RLRV25THT7OZM4T4ZJ4BMYCA,B00L9B7IKE,1.000000,1,0,
42,AE23RLRV25THT7OZM4T4ZJ4BMYCA,B00JO8PEN2,0.680550,2,0,
11,AE23RLRV25THT7OZM4T4ZJ4BMYCA,B006LSZECO,0.624323,3,0,
29,AE23RLRV25THT7OZM4T4ZJ4BMYCA,B00DPM7TIG,0.501874,4,0,
27,AE23RLRV25THT7OZM4T4ZJ4BMYCA,B00CNQ7HAU,0.479384,5,0,
...,...,...,...,...,...,...
179994,AHZXAMLRISP275TBMPIGCRWUQFYA,B001NHNG64,0.120783,98,0,
180086,AHZXAMLRISP275TBMPIGCRWUQFYA,B07FTBNVGK,0.120783,99,0,
180008,AHZXAMLRISP275TBMPIGCRWUQFYA,B009KP9VIS,0.119950,100,0,
179990,AHZXAMLRISP275TBMPIGCRWUQFYA,1423152883,,101,1,1.0


In [10]:
ranking_report = log_ranking_metrics(args, eval_df)

  return (1 + beta_sqr) * precision_arr * recall_arr / (beta_sqr * precision_arr + recall_arr)


## Classification metrics

In [11]:
eval_classification_df = pd.merge(
    val_df,
    popular_items_df[[args.item_col, "score"]],
    on=[args.item_col],
    how="left",
    validate="m:1",
).assign(label=lambda df: df[args.rating_col].gt(0).astype(int))
eval_classification_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,item_sequence,score,label
0,AE23RLRV25THT7OZM4T4ZJ4BMYCA,0062409212,1.0,1646772001708,7581,6271,"[-1, -1, -1, -1, -1, 7353, 4162, 2974, 6055, 3...",0.019992,1
1,AEKQREM4SYKU6HK2CSMWOYXNMCWQ,B00EA8EO00,1.0,1650134360640,7563,1049,"[-1, -1, -1, -1, 5434, 4443, 6758, 3012, 638, ...",0.009163,1
2,AFGTSYTWRYDCEA7RIDXTBXVOOFFQ,B0141B48W4,0.0,1635269638002,1449,3058,"[4666, 466, 1728, 4963, 4653, 3053, 4442, 3927...",0.010412,0
3,AE4YFHNIXVBWI2V4DRU5UFSTY4RQ,B08ZM7BQ5J,1.0,1630002488344,674,3068,"[7119, 6518, 1870, 346, 6276, 3151, 1624, 42, ...",0.014577,1
4,AGILJFVEHJX5W3Q7QYJZ7MRVU6ZQ,B093GVNHQV,1.0,1641012367793,14381,1286,"[6042, 1683, 6986, 1244, 4861, 6332, 4637, 768...",0.014577,1
...,...,...,...,...,...,...,...,...,...
7135,AEFQQOMB5AYGR2FVV6X5OP5Y7VTQ,B00KIZQG96,1.0,1657081631273,19644,1537,"[6287, 6371, 184, 465, 7223, 5882, 3145, 6059,...",0.022907,1
7136,AH4ARLONPDSCFEVCUQZK6Z5EF72Q,0763655988,0.0,1657136765901,18673,4934,"[3925, 6168, 2698, 4219, 2720, 7090, 2984, 659...",0.014994,0
7137,AGKILHN37242OQLPSMAMMZJA6IAQ,B07CRC52VH,1.0,1653587733450,3136,1407,"[733, 6782, 1923, 4932, 2369, 4831, 5227, 4573...",0.015827,1
7138,AHVI6MSMQ543OPJLYDQEHCUTGAEA,1594484465,0.0,1634219127106,16414,4627,"[4836, 3373, 536, 6701, 3047, 2679, 5048, 5620...",0.013744,0


In [12]:
eval_classification_df["score"].describe().T

count    7140.000000
mean        0.052832
std         0.086923
min         0.005414
25%         0.014994
50%         0.025823
75%         0.054561
max         1.000000
Name: score, dtype: float64

In [13]:
classification_report = log_classification_metrics(
    args, eval_classification_df, target_col="label", prediction_col="score"
)

## Loss
We can estimate what kinds of MSE loss should be the upper bound model training based on measuring the loss when asking the model to naively predict the mean rating for every item.

In [14]:
naive_prediction = train_df[args.rating_col].mean()

naive_mse = (
    (
        val_df[args.rating_col]
        - val_df.assign(naive_prediction=naive_prediction)["naive_prediction"]
    )
    .apply(lambda s: s * s)
    .mean()
)
logger.info(
    f"Val MSE = {naive_mse:,.2f} given naive_prediction={naive_prediction:,.1f}"
)

[32m2025-03-08 21:10:37.702[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m11[0m - [1mVal MSE = 3.01 given naive_prediction=2.2[0m


# Clean up

In [15]:
all_params = [args]

if args.log_to_mlflow:
    for params in all_params:
        params_dict = params.model_dump()
        params_ = {f"{params.__repr_name__()}.{k}": v for k, v in params_dict.items()}
        mlflow.log_params(params_)

    mlflow.end_run()

🏃 View run 001-baseline-popular at: http://localhost:5002/#/experiments/3/runs/30366fb0e78e46c2964e724222d27cac
🧪 View experiment at: http://localhost:5002/#/experiments/3
