In [1]:

import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [None]:
import sys

sys.path.append("/home/caio/dev/")


import pandas as pd

from bprMf.bpr_mf import bprMf

from torch.optim import Adam

from torch import device, cuda

import torch

## Reading data

In [3]:
path = "/home/caio/dev/dynamicTasteDistortion/data/movielens-1m/ml_1m.pkl"

df = pd.read_pickle(path)
df["relevant"] = df["binarized_rating"]

## Cross validating a model



In [4]:
from sklearn.model_selection import KFold
import numpy as np

RANDOM = 42
K = 5

usuarios = df.user.unique()
kf = KFold(n_splits=K, shuffle=True, random_state=RANDOM)
n_users = df.user.max() + 1
n_items = df.item.max() + 1


In [5]:
params = {
    "factors": [10, 30, 50],
    "reg_lambda": [1e-5, 1e-4, 1e-3,],
    "lr": [1e-3, 1e-4, 1e-5]
}

In [6]:
dev = device('cuda' if cuda.is_available() else 'cpu')
dev

device(type='cuda')

In [7]:

model = bprMf(num_users=n_users, num_items=n_items, factors=30, reg_lambda=1e-4, n_epochs=1, dev=dev)

In [8]:

def average_precision_at_k(ranked_items, relevant_items, k):
    score = 0.0
    hits = 0

    for i, item in enumerate(ranked_items[:k], start=1):
        if item in relevant_items:
            hits += 1
            score += hits / i

    if len(relevant_items) == 0:
        return 0.0

    return score / min(len(relevant_items), k)


In [9]:
model = bprMf(
    num_users=n_users,
    num_items=n_items,
    factors=30,
    reg_lambda=1e-5,
    n_epochs=1,
    dev=dev,
)

model.fit(df, lr=1e-4)

Epochs: 100%|██████████| 1/1 [00:23<00:00, 23.69s/it]


[0.508389032429604]

In [10]:
k=20

tensor([   0,    1,    2,  ..., 3703, 3704, 3705], device='cuda:0')

In [35]:
candidates = torch.tensor(df["item"].unique(), device=model.device)

for user_id, user_df in df.groupby("user"):
    user = torch.tensor(user_id, device=model.device)
    hist = torch.tensor(list(set(user_df.item.values)), device=model.device)
    break

In [24]:
user

tensor(0, device='cuda:0')

In [25]:
hist

tensor([   0,  513, 1025, 1154,  517, 1421,  144, 2710, 2969, 1178, 2586, 2205,
        1439, 2592, 1574, 2599, 1195, 1838,   47,  689, 2483, 2102, 1848, 2488,
         957,  574, 1727, 2557,  963,  580,  581,  708,  964, 2889,  970, 1104,
         593, 2128, 1107, 2898,  853,  858, 1117, 2147,  740, 3177,  877, 2162,
        1781, 1782, 1658,  253,  639], device='cuda:0')

In [40]:
scores = model.forward(user, candidates)
scores[list(hist)] = -float("inf")

In [41]:
scores

tensor([   -inf,  0.4573, -0.0119,  ..., -0.4416, -0.6168,  0.3421],
       device='cuda:0', grad_fn=<IndexPutBackward0>)

In [42]:
torch.topk(scores, k=k)

torch.return_types.topk(
values=tensor([2.1992, 2.1098, 2.0945, 2.0756, 2.0483, 2.0340, 2.0076, 2.0061, 1.9789,
        1.9632, 1.9598, 1.9463, 1.9328, 1.8871, 1.8846, 1.8653, 1.8512, 1.8147,
        1.8113, 1.8070], device='cuda:0', grad_fn=<TopkBackward0>),
indices=tensor([2646, 1106, 1843,  579, 2369, 2552, 1118,  575,  801, 1176,  309, 1105,
         106,  287, 1482, 1146, 2198, 1171, 2780, 1023], device='cuda:0'))

In [43]:
model.eval()
map_scores = []
candidates = torch.tensor(df["item"].unique(), device=model.device)
with torch.no_grad():
    for user_id, user_df in df.groupby("user"):

        user_id = torch.tensor([user_id], device=model.device)
        hist = torch.tensor(
            list(set(user_df.item.values)), device=model.device
        )

        scores = model.forward(user_id, candidates)
        scores[list(hist)] = -float("inf")

        top_k_items = torch.topk(scores, k=k).indices.cpu().numpy()

        ap = average_precision_at_k(
            ranked_items=top_k_items, relevant_items=hist, k=k
        )

        map_scores.append(ap)

model.train()

  scores[list(hist)] = -float("inf")


IndexError: too many indices for tensor of dimension 1

In [None]:
def evaluate(self, test_df, k=20):
        model.eval()
        map_scores = []
        with torch.no_grad():
            for user_id, user_df in test_df.groupby("user"):

                user_id = torch.tensor([user_id], device=model.device)
                relevant_items = torch.tensor(
                    list(set(user_df.item.values)), device=model.device
                )

                scores = model.forward(user_id, relevant_items)

                scores[list(relevant_items)] = -float("inf")

                top_k_items = torch.topk(scores, k=k).indices.cpu().numpy()

                ap = average_precision_at_k(
                    ranked_items=top_k_items, relevant_items=relevant_items, k=k
                )

                map_scores.append(ap)

        model.train()
        return float(np.mean(map_scores))

In [9]:
best_score = -float("inf")
best_params = None

for factors in params["factors"]:
    for reg_lambda in params["reg_lambda"]:
        for lr in params["lr"]:

            scores_folds = []

            for train_users_idx, val_users_idx in kf.split(usuarios):

                train_users = usuarios[train_users_idx]
                val_users = usuarios[val_users_idx]

                train_df = df[df.user.isin(train_users)]
                val_df = df[df.user.isin(val_users)]

                model = bprMf(
                    num_users=n_users,
                    num_items=n_items,
                    factors=factors,
                    reg_lambda=reg_lambda,
                    n_epochs=1,
                    dev=dev,
                )

                model.fit(train_df, lr=lr)

                score = model.evaluate(val_df)
                scores_folds.append(score)

            avg_score = np.mean(scores_folds)

            if avg_score > best_score:
                best_score = avg_score
                best_params = {"factors": factors, "reg_lambda": reg_lambda, "lr": lr}

print("Melhor score médio:", best_score)
print("Melhores parâmetros:", best_params)

Epochs: 100%|██████████| 1/1 [00:18<00:00, 18.93s/it]
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:113: operator(): block: [0,0,0], thread: [65,0,0] Assertion `-sizes[i] <= index && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:113: operator(): block: [0,0,0], thread: [68,0,0] Assertion `-sizes[i] <= index && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:113: operator(): block: [0,0,0], thread: [69,0,0] Assertion `-sizes[i] <= index && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:113: operator(): block: [0,0,0], thread: [72,0,0] Assertion `-sizes[i] <= index && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:113: operator(): block: [0,0,0], thread: [75,0,0] Assertion `-sizes[i] <= index && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/nat

AcceleratorError: CUDA error: device-side assert triggered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


## Calibrating the best model.

In [None]:
lambdas = [0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 1]