In [1]:
import pandas as pd
import sys
from pathlib import Path
from calibrationUtils import preprocess_genres
import os
import pickle





In [2]:
path_to_add = str(Path("../bprMf/").resolve())
sys.path.append(path_to_add)

In [3]:

from evaluation import Evaluate

from bpr_mf import bprMFLClickDebiasingDataloader, bprMF, bpr_loss_with_reg_with_debiased_click, bpr_train_with_debiasing
from utils import generate_bpr_dataset_with_click_data

from torch.utils.data import random_split, DataLoader
from torch.optim import Adam

from torch import device, cuda, tensor


In [4]:
dev = device('cuda' if cuda.is_available() else 'cpu')
dev

device(type='cuda')

# Dynamic calibration


In this notebook, we'll explore some ideas that model the evolution of user preferences in calibration. Namely, the work proposed by D.C da Silva et al (2025 ) - Considering Time and Feature Entropy in Calibrated Recommendations

## Read Data and Preprocess

In [5]:

movies = pd.read_csv("./data/ml-1m/movies.dat", sep="::", engine="python", names=["itemID", "title", "genres"], encoding="ISO-8859-1")
ratings = pd.read_csv("./data/ml-1m/ratings.dat", sep="::", engine="python", names=["userID", "itemID", "rating", "timestamp"], encoding="ISO-8859-1")
data_raw = ratings.merge(movies, on="itemID")

In [6]:

# We make the data zero indexed to make it easier to handle indexes, specially with our
# pytorch implementation
zero_based_indexing_item = {v: K for K, v in enumerate(data_raw["itemID"].unique())}
zero_based_indexing_user = {v: K for K, v in enumerate(data_raw["userID"].unique())}

data_raw["itemID"] = data_raw["itemID"].map(zero_based_indexing_item)
data_raw["userID"] = data_raw["userID"].map(zero_based_indexing_user)


df = preprocess_genres(data_raw)
df = df.rename(columns={"userID": "user", "itemID": "item"})
df["click"] = df["rating"].apply(lambda r: int(r >= 4))

df["relevant"] = df["click"]


In [7]:
df

Unnamed: 0,user,item,rating,timestamp,title,genres,click,relevant
0,0,0,5,978300760,One Flew Over the Cuckoo's Nest (1975),[Drama],1,1
1,0,1,3,978302109,James and the Giant Peach (1996),"[Animation, Children's, Musical]",0,0
2,0,2,3,978301968,My Fair Lady (1964),"[Musical, Romance]",0,0
3,0,3,4,978300275,Erin Brockovich (2000),[Drama],1,1
4,0,4,5,978824291,"Bug's Life, A (1998)","[Animation, Children's, Comedy]",1,1
...,...,...,...,...,...,...,...,...
1000204,6039,772,1,956716541,Weekend at Bernie's (1989),[Comedy],0,0
1000205,6039,1106,5,956704887,"Crying Game, The (1992)","[Drama, Romance, War]",1,1
1000206,6039,365,5,956704746,Welcome to the Dollhouse (1995),"[Comedy, Drama]",1,1
1000207,6039,152,4,956715648,Sophie's Choice (1982),[Drama],1,1


In [8]:


bpr_df= generate_bpr_dataset_with_click_data(df)

## Training a model

In [9]:
data_bpr = bprMFLClickDebiasingDataloader(bpr_df)


# Calculate split lengths
train_len = int(0.7 * len(data_bpr))
test_len = len(data_bpr) - train_len


train_data, test_data = random_split(data_bpr, [train_len, test_len])



dataloader_bpr_train = DataLoader(train_data, batch_size=256, shuffle=True)
dataloader_bpr_test = DataLoader(test_data, batch_size=256, shuffle=True)


n_users = bpr_df.user.max() + 1
n_items = bpr_df.pos_item.max() + 1


In [10]:
bpr_df

Unnamed: 0,user,pos_item,click_position,neg_item
0,0,0,1,969
1,0,0,1,2733
2,0,0,1,418
3,0,1,0,2376
4,0,1,0,1890
...,...,...,...,...
3000622,6039,152,1,435
3000623,6039,152,1,2366
3000624,6039,26,1,2704
3000625,6039,26,1,3365


In [11]:

model_artifact_poath = "artifacts/models/bpr_mf_click_debias_movielens_1m.pkl"
if os.path.exists(model_artifact_poath):
    with open(model_artifact_poath, "rb") as f:
        model = pickle.load(f)
else:
    model = bprMF(num_users=n_users, num_items=n_items, factors=30).to(dev)
    evaluator = Evaluate(model, test_data, df, k=20)
    map_k_before = evaluator.MAP_at_k()
    optimizer = Adam(model.parameters(), lr=1e-3)
    train_loss, test_loss = bpr_train_with_debiasing(
        dataloader_bpr_train, dataloader_bpr_test, model,
        bpr_loss_with_reg_with_debiased_click, optimizer, reg_lambda=5e-4, debug=True
    )
    with open(model_artifact_poath, "wb") as f:
        pickle.dump(model, f)



In [12]:
evaluator = Evaluate(model, test_data, df, k=20)

  output = self.forward(torch.tensor(user, device=device), items_list)
  output = self.forward(torch.tensor(user, device=device), items_list)
  output = self.forward(torch.tensor(user, device=device), items_list)
  output = self.forward(torch.tensor(user, device=device), items_list)
  output = self.forward(torch.tensor(user, device=device), items_list)


In [13]:
map_k_after = evaluator.MAP_at_k()

In [14]:
map_k_after

0.10673276935978548

In [15]:
rec_df = pd.DataFrame({'user': df["user"].unique()})
candidates = df["item"].unique()

In [16]:
from torch import tensor

def get_top_k_recommendations_for_user(row, candidates, model, k=100):
    candidates_t = tensor(candidates, device=dev)
    return model.predict(
        user=tensor(data=row["user"], device=dev),
        candidates=candidates_t,
        k=k
    )



In [17]:
users = pd.DataFrame({'user': df['user'].unique()})

In [18]:
rec_df = users.copy()
rec_df[["top_k_rec_id", "top_k_rec_score"]] = pd.DataFrame(
    rec_df.apply(lambda row: pd.Series(get_top_k_recommendations_for_user(row, candidates, model)), axis=1)
)
rec_df = rec_df.explode(["top_k_rec_id", "top_k_rec_score"])

  output = self.forward(torch.tensor(user, device=device), items_list)


In [19]:
rec_df

Unnamed: 0,user,top_k_rec_id,top_k_rec_score
0,0,167,9.083803
0,0,390,8.723235
0,0,23,8.68331
0,0,104,8.268551
0,0,9,7.843223
...,...,...,...
6039,6039,357,4.970627
6039,6039,1618,4.963928
6039,6039,201,4.954138
6039,6039,509,4.949831


## Calibrating

In [20]:
from calibration import Calibration
from calibrationIO import instantiate_calibrator

In [21]:
READ_LOCALLY = False

In [22]:
calibrator_linear_time = instantiate_calibrator(df, 'linear_time', 'steck', rec_df, READ_LOCALLY=READ_LOCALLY, _lambda=0.1)

100%|██████████| 6040/6040 [24:00<00:00,  4.19it/s]


In [23]:
calibrator_linear_time._mace()

100%|██████████| 6040/6040 [00:19<00:00, 314.66it/s]


0.6032388332284145

In [None]:
calibrator_linear_time = instantiate_calibrator(df, 'linear_time', 'steck', rec_df, READ_LOCALLY=READ_LOCALLY)

In [None]:
calibrator_linear_time._mace()

100%|██████████| 6040/6040 [00:19<00:00, 303.26it/s]


0.26077297049877646

In [None]:
calibrator_linear_time.mace()

100%|██████████| 6040/6040 [00:18<00:00, 318.32it/s]


0.013189115790694199

## GLEB Based calibration


GLEB (Global Local Entropy Based) Calibration is a breakthrough approach proposed by D.C et Al (2025). It benefits items with a mixture of genres instead of penalizing them, which is something that Stecks method does indirectly. This approach works by calibrating using the proportion of genres in the users history as well as the proportion of genres in each item. 

In [20]:
calibrator_linear_time_gleb = instantiate_calibrator(df, 'linear_time', 'gleb', rec_df)

100%|██████████| 6040/6040 [24:03<00:00,  4.19it/s]


In [21]:
calibrator_linear_time_gleb.mace()

100%|██████████| 6040/6040 [00:18<00:00, 330.54it/s]


0.011589026786283185