In [1]:
import numpy as np
import pandas as pd
from implicit.gpu.als import AlternatingLeastSquares
from scipy.sparse import csr_matrix
from sklearn.preprocessing import LabelEncoder

from src.datasets.base_dataset import BaseDataset

In [2]:
data = BaseDataset(
    args={
        "dataset_shortname": "ml_1m",
        "min_rating": 3.5,
        "min_user_count": 5,  # default 5
        "min_item_count": 0,
    }
)

In [3]:
ratings = data.load_dataset()

In [4]:
train, val, test = (
    pd.Series(ratings["train"]),
    pd.Series(ratings["val"]),
    pd.Series(ratings["test"]),
)

In [5]:
def make_sparse(grouped_ratings):
    ratings = grouped_ratings.explode()
    user_index = ratings.index.to_numpy()

    item_encoder = LabelEncoder()
    item_index = item_encoder.fit_transform(ratings.values)

    user_num = len(grouped_ratings)
    item_num = len(np.unique(item_index))

    sparse_matrix = csr_matrix(
        (np.ones(len(user_index)), (user_index, item_index)),
        shape=(user_num, item_num),
    )

    return sparse_matrix, item_encoder

In [8]:
train_csr, item_train_encoder = make_sparse(train)

In [9]:
als = AlternatingLeastSquares(factors=1024, random_state=0)
als.fit(train_csr)

  0%|          | 0/15 [00:00<?, ?it/s]

In [10]:
rec_matrix, rec_scores = als.recommend(
    range(0, len(train)), train_csr, N=50, filter_already_liked_items=True
)