In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import scipy.sparse as scs

from recsys.utils import col
from recsys.metrics import ndcg_score, get_ndcg, get_apak

### Last one

In [None]:
X = pd.read_parquet("../data/ml-1m/split/X_last_one.parquet")
y = pd.read_parquet("../data/ml-1m/split/y_last_one.parquet")

In [None]:
most_popular = X[col.movie_code].value_counts().index.tolist()
user_ids = X[col.user_code].unique()
y_true = y[col.movie_code].to_numpy().reshape(-1, 1)

In [None]:
user_movie_matrix = scs.coo_matrix(
    (X[col.rating], (X[col.user_code], X[col.movie_code]))
).tocsr()

In [None]:
user_factors, _, movie_factors = scs.linalg.svds(
    user_movie_matrix.astype(float),
    128,
)
movie_factors = movie_factors.T

In [None]:
retrieval = user_factors.dot(movie_factors.T)
retrieval[user_movie_matrix.nonzero()] = -1
retrieval = np.argsort(retrieval, axis=1)[:, ::-1]

y_pred = retrieval[:, :10]

In [None]:
ndcg_score(y_true, y_pred)

### Last five

In [None]:
X = pd.read_parquet("../data/ml-1m/split/X_last_five.parquet")
y = pd.read_parquet("../data/ml-1m/split/y_last_five.parquet")

In [None]:
most_popular = X[col.movie_code].value_counts().index.tolist()
user_ids = X[col.user_code].unique()
y_true = np.array(
    y.groupby(col.user_code)[col.movie_code].unique().apply(list).tolist()
)

In [None]:
user_movie_matrix = scs.coo_matrix(
    (X[col.rating], (X[col.user_code], X[col.movie_code]))
).tocsr()

In [None]:
user_factors, _, movie_factors = scs.linalg.svds(
    user_movie_matrix.astype(float),
    128,
)
movie_factors = movie_factors.T

In [None]:
retrieval = user_factors.dot(movie_factors.T)
retrieval[user_movie_matrix.nonzero()] = -1
retrieval = np.argsort(retrieval, axis=1)[:, ::-1]

y_pred = retrieval[:, :10]

In [None]:
ndcg_score(y_true, y_pred)