In [101]:
%load_ext autoreload
%autoreload 2

import torch
from torchmetrics import MeanSquaredError
import numpy as np
import pandas as pd
from dynaconf import Dynaconf
import lightning as L

from src.models.mf_with_bias import MatrixFactorizationWithBias
from src.lit_models.base import LightningModel

from src.datasets.movielens import MovielensDataModule

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load config with dynaconf

In [102]:
cfg = Dynaconf(root_path="configs", settings_files=["config_mf.yaml"])

Load model inputs from the config file

In [103]:
n_users = cfg.model.pytorch_model.init_args.n_users
n_items = cfg.model.pytorch_model.init_args.n_items
n_factors = cfg.model.pytorch_model.init_args.n_factors
print(f"{n_users=}, {n_items=}, {n_factors=}")

n_users=943, n_items=1625, n_factors=128


## Load model

In [104]:
pytorch_model = MatrixFactorizationWithBias(n_users, n_items, 128)
pytorch_model

MatrixFactorizationWithBias(
  (user_emb): Embedding(943, 128)
  (user_bias): Embedding(943, 1)
  (item_emb): Embedding(1625, 128)
  (item_bias): Embedding(1625, 1)
)

In [105]:
checkpoint_file = "lightning_logs/embedding_dim/version_1/checkpoints/best_model.ckpt"

# load weights
model = LightningModel.load_from_checkpoint(
    checkpoint_path=checkpoint_file, pytorch_model=pytorch_model
)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/cespeleta/Projects/recsys-movielens/lightning_logs/embedding_dim/version_1/checkpoints/best_model.ckpt'

In [None]:
device = model.device
print(device)

In [None]:
dm = MovielensDataModule(dataset="ml-100k", target="rating", batch_size=32)
dm.setup(stage="test")

## Predict on new data

Here we are going to use test set as a new data but, of course, we could use any new dataset.

In [None]:
test_dataloader = dm.test_dataloader()

In [None]:
# Get first batch of data
for batch_data in test_dataloader:
    users = batch_data["user"].to(device)
    items = batch_data["item"].to(device)
    ratings = batch_data["rating"].to(device)
    break

### Pytorch

We can make our predictions with plain Pytorch our using the Lighning Trainer.

Docs: https://lightning.ai/docs/pytorch/stable/deploy/production_intermediate.html

In [None]:
model.eval()
with torch.inference_mode():
    y_hat = model(users, items)  #* (5.5 - 1) + 1

y_hat

Which is equivalent to call the `forward` of the pytorch model directly 

In [None]:
model.eval()
with torch.inference_mode():
    print(model.pytorch_model(users, items))

In [None]:
# mse = MeanSquaredError().to(device)
# pred_list = []
# for batch_data in test_dataloader:
#     users = batch_data["user"].to(device)
#     items = batch_data["item"].to(device)
#     ratings = batch_data["rating"].to(device)
#     with torch.inference_mode():
#         y_hat = model.predict_step(batch_data) * (5.5 - 1) + 1
#         pred_list.append(y_hat.cpu().detach().numpy().squeeze())
    
#     mse(y_hat, ratings)

### Using Lightning Trainer

In [None]:
trainer = L.Trainer(enable_checkpointing=False)
batched_predictions = trainer.predict(model, dataloaders=[test_dataloader])

For comparison, show first batch fo predictions. Why are they different from the pure pytorch predictions? That's because in the LightningModule predictions are transformed using `Sigmoid` to normalize the oputpus.

In [None]:
batched_predictions[0]

Concatenate all predictions in a single vector

In [None]:
predictions = torch.cat(batched_predictions)
predictions

Get true ratings

In [None]:
users = torch.cat([batch_data["user"] for batch_data in test_dataloader], dim=0)
items = torch.cat([batch_data["item"] for batch_data in test_dataloader], dim=0)
ratings = torch.cat([batch_data["rating"] for batch_data in test_dataloader], dim=0)
ratings

In [None]:
def scale_predictions(x, range=(1, 5.5)):
    min_y, max_y = range
    return x * (max_y - min_y) + min_y

In [None]:
mse = MeanSquaredError()
rmse = MeanSquaredError(squared=False)

scaled_predictions = scale_predictions(predictions)

test_mse = mse(scaled_predictions, ratings)
test_rmse = rmse(scaled_predictions, ratings)

print(f"Test MSE: {test_mse:.4f}")
print(f"Test RMSE: {torch.sqrt(test_rmse):.3f}")

In [None]:
test_mae = mse.compute()
print(f"Test MSE: {test_mse:.4f}")
print(f"Test RMSE: {torch.sqrt(test_mse):.3f}")

In [None]:
ratings.shape

In [None]:
scaled_predictions.shape

In [None]:
# Load mappng
import joblib


item2int = joblib.load("output/encoders/ml-100k/title_encoder.joblib")
int2item = {v: k for k, v in item2int.items()}

In [None]:
def rmse(y_true, y_hat):
    return np.sqrt(np.mean((y_true - y_hat) ** 2 ))

In [None]:
# veamos las diferencias...
data = torch.vstack([users, items, ratings, scaled_predictions]).detach().cpu().numpy().T

# Convert to DataFrame
df = pd.DataFrame.from_records(data, columns=["users_enc", "items_enc", "ratings", "predictions"])
df.insert(loc=0, column="title", value=df.items_enc.map(int2item))
df["error"] = abs(df.ratings - df.predictions)
df.head()

In [None]:
errors_df = df.groupby(["title"]).agg(error=("error", "mean"), count=("title", "count"))
errors_df.sort_values(by="error", ascending=True)

# Movies with lower error
errors_df.head()

In [None]:
# Movies with biggest errors
errors_df.sort_values(by="error", ascending=False).head()