In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import implicit
import scipy.sparse as scs

from recsys.utils import col
from recsys.metrics import ndcg_score, hr_score

### Last one

In [None]:
X = pd.read_parquet("../data/ml-1m/split/X_last_one.parquet")
y = pd.read_parquet("../data/ml-1m/split/y_last_one.parquet")

In [None]:
y_true = y[[col.movie_code]].to_numpy()

test_array = np.hstack([y_true, np.array(y[col.negative].apply(list).tolist())])

user_movie_matrix = scs.csr_matrix(
    (X[col.rating], (X[col.user_code], X[col.movie_code]))
)

In [None]:
model = implicit.als.AlternatingLeastSquares(
    factors=128,
    iterations=100,
    use_gpu=True,
    calculate_training_loss=True
)

model.fit(user_movie_matrix)

In [None]:
y_pred = []
for uid in tqdm(np.arange(user_movie_matrix.shape[0])):
    pred, score = model.recommend(
        uid,
        user_items=user_movie_matrix[uid],
        items=test_array[uid],
    )
    y_pred.append(pred)
    
y_pred = np.array(y_pred)

In [None]:
ndcg_score(y_true, y_pred)

In [None]:
hr_score(y_true, y_pred)

### Last five

In [None]:
X = pd.read_parquet("../data/ml-1m/split/X_last_five.parquet")
y = pd.read_parquet("../data/ml-1m/split/y_last_five.parquet")

In [None]:
most_popular = X[col.movie_code].value_counts().index.tolist()
user_ids = X[col.user_code].unique()
y_true = np.array(
    y.groupby(col.user_code)[col.movie_code].unique().apply(list).tolist()
)

In [None]:
user_movie_matrix = scs.coo_matrix(
    (X[col.rating], (X[col.user_code], X[col.movie_code]))
).tocsr()

In [None]:
embedding_dim = 128

model = implicit.als.AlternatingLeastSquares(
    factors=embedding_dim,
    iterations=50,
)

model.fit(user_movie_matrix)

In [None]:
y_pred, y_score = model.recommend(
    X[col.user_code].unique(), user_items=user_movie_matrix
)

In [None]:
ndcg_score(y_true, y_pred)