In [2]:
import pandas as pd
import sys
from pathlib import Path
from calibrationUtils import preprocess_genres
import os
import pickle



# Add the relative path to sys.path
sys.path.append(str(Path("../bpr-mf").resolve()))
from evaluation import Evaluate

from bpr_mf import bprMFDataloader, bprMF, bpr_loss_with_reg, bpr_train
from utils import generate_bpr_dataset

from torch.utils.data import random_split, DataLoader
from torch.optim import Adam

from torch import device, cuda, tensor


In [3]:
dev = device('cuda' if cuda.is_available() else 'cpu')
dev

device(type='cuda')

# Dynamic calibration


In this notebook, we'll explore some ideas that model the evolution of user preferences in calibration. Namely, the work proposed by D.C da Silva et al (2025 ) - Considering Time and Feature Entropy in Calibrated Recommendations

## Read Data and Preprocess

In [4]:

movies = pd.read_csv("./data/ml-1m/movies.dat", sep="::", engine="python", names=["itemID", "title", "genres"], encoding="ISO-8859-1")
ratings = pd.read_csv("./data/ml-1m/ratings.dat", sep="::", engine="python", names=["userID", "itemID", "rating", "timestamp"], encoding="ISO-8859-1")
data_raw = ratings.merge(movies, on="itemID")

In [5]:

# We make the data zero indexed to make it easier to handle indexes, specially with our
# pytorch implementation
zero_based_indexing_item = {v: K for K, v in enumerate(data_raw["itemID"].unique())}
zero_based_indexing_user = {v: K for K, v in enumerate(data_raw["userID"].unique())}

data_raw["itemID"] = data_raw["itemID"].map(zero_based_indexing_item)
data_raw["userID"] = data_raw["userID"].map(zero_based_indexing_user)


df = preprocess_genres(data_raw)
df = df.rename(columns={"userID": "user", "itemID": "item"})
df["relevant"] = df["rating"].apply(lambda r: int(r >= 4))


In [6]:
df

Unnamed: 0,user,item,rating,timestamp,title,genres,relevant
0,0,0,5,978300760,One Flew Over the Cuckoo's Nest (1975),[Drama],1
1,0,1,3,978302109,James and the Giant Peach (1996),"[Animation, Children's, Musical]",0
2,0,2,3,978301968,My Fair Lady (1964),"[Musical, Romance]",0
3,0,3,4,978300275,Erin Brockovich (2000),[Drama],1
4,0,4,5,978824291,"Bug's Life, A (1998)","[Animation, Children's, Comedy]",1
...,...,...,...,...,...,...,...
1000204,6039,772,1,956716541,Weekend at Bernie's (1989),[Comedy],0
1000205,6039,1106,5,956704887,"Crying Game, The (1992)","[Drama, Romance, War]",1
1000206,6039,365,5,956704746,Welcome to the Dollhouse (1995),"[Comedy, Drama]",1
1000207,6039,152,4,956715648,Sophie's Choice (1982),[Drama],1


In [7]:
bpr_df= generate_bpr_dataset(df)

## Training a model

In [8]:
data_bpr = bprMFDataloader(bpr_df)


# Calculate split lengths
train_len = int(0.7 * len(data_bpr))
test_len = len(data_bpr) - train_len


train_data, test_data = random_split(data_bpr, [train_len, test_len])



dataloader_bpr_train = DataLoader(train_data, batch_size=256, shuffle=True)
dataloader_bpr_test = DataLoader(test_data, batch_size=256, shuffle=True)


n_users = bpr_df.user.max() + 1
n_items = bpr_df.pos_item.max() + 1


In [9]:

model_artifact_poath = "artifacts/models/bpr_mf_movielens_1m.pkl"
if os.path.exists(model_artifact_poath):
    with open(model_artifact_poath, "rb") as f:
        model = pickle.load(f)
else:
    optimizer = Adam(model.parameters(), lr=1e-3)
    train_loss, test_loss = bpr_train(
        dataloader_bpr_train, dataloader_bpr_test, model,
        bpr_loss_with_reg, optimizer, reg_lambda=5e-4, debug=True
    )
    with open(model_artifact_poath, "wb") as f:
        pickle.dump(model, f)



In [10]:
evaluator = Evaluate(model, test_data, df, k=20)

  output = self.forward(torch.tensor(user, device=device), items_list)
  output = self.forward(torch.tensor(user, device=device), items_list)
  output = self.forward(torch.tensor(user, device=device), items_list)
  output = self.forward(torch.tensor(user, device=device), items_list)
  output = self.forward(torch.tensor(user, device=device), items_list)


In [11]:
evaluator.MAP_at_k()

0.1026123588473709

In [65]:
calibrator = Calibration(df, model, weight='linear_time')

O weight function é : w_twb


  output = self.forward(torch.tensor(user, device=device), items_list)


In [68]:
calibrator.calibrate_for_users()

100%|██████████| 6040/6040 [23:29<00:00,  4.29it/s]


In [70]:
calibrator.mace()

100%|██████████| 6040/6040 [01:36<00:00, 62.27it/s]


0.0239256812194949