In [1]:
from src.algorithms.alternating_least_squares import AlternatingLeastSquares
from src.helpers.dataset_indexer import DatasetIndexer
from src.helpers.checkpoint_manager import CheckpointManager
from src.recommenders import CollaborativeFilteringRecommenderBuilder
from src.backends import Backend
from src.helpers._logging import logger  # noqa
from src.settings import settings

In [2]:
logger.info("Log test")

2025-01-05 00:37:40,039 - INFO - Log test


In [3]:
dataset_indexer = DatasetIndexer(
    file_path="./ml-32m/ratings.csv",
    user_header="userId",
    item_header="movieId",
    rating_header="rating",
    limit=settings.general.LINES_COUNT_TO_READ,
)

indexed_data = dataset_indexer.index(
    approximate_train_ratio=settings.general.APPROXIMATE_TRAIN_RATIO
)

2025-01-05 00:37:51,656 - INFO - Successfully indexed 1000000 lines from ./ml-32m/ratings.csv


In [4]:
als_instance = AlternatingLeastSquares(
    hyper_lambda=settings.als.HYPER_LAMBDA,
    hyper_gamma=settings.als.HYPER_GAMMA,
    hyper_tau=settings.als.HYPER_TAU,
    hyper_n_epochs=settings.als.HYPER_N_EPOCH,
    hyper_n_factors=settings.als.HYPER_N_FACTOR,
)

als_backend = Backend(
    # Define the algorithm
    algorithm=als_instance,
    checkpoint_manager=CheckpointManager(
        checkpoint_folder=settings.als.CHECKPOINT_FOLDER,
        sub_folder=str(settings.general.LINES_COUNT_TO_READ),
    ),
    # Whether we should resume by using the last state of
    # the algorithm the checkpoint manager folder or not.
    resume=True,
)

In [None]:
recommender_builder = CollaborativeFilteringRecommenderBuilder(
    backend=als_backend,
)

# This might take some moment before finishing
recommender = recommender_builder.build(data=indexed_data)

2025-01-05 00:37:51,840 - INFO - Starting the build of the recommender using AlternatingLeastSquares...
2025-01-05 00:37:51,844 - INFO - Starting a model fitting using the backend AlternatingLeastSquares...
2025-01-05 00:37:51,853 - INFO - Checkpoint ./artifacts/checkpoints/als/1000000/20250105-003032_lambda0.1_gamma0.01_tau1_n_epochs4_n_factors10.pkl loaded with success
2025-01-05 00:37:51,856 - INFO - All factors and biases are already provided, so no initialization is needed.


Epochs:   0%|          | 0/4 [00:00<?, ?epoch/s]

In [6]:
prediction_input = [{"rating": "4", "movieId": "17", "userId": "1"}]
recommender.recommend(prediction_input)

predictions => [3.96395126 3.55824498 3.766012   ... 4.28324854 3.83026638 4.28324854]
items_ids => [17521 19437 23836 ... 20761 21008 20759]
predictions => [3.96395126 3.55824498 3.766012   ... 4.28324854 3.83026638 4.28324854]
items_ids => [17521 19437 23836 ... 20761 21008 20759]
Recommending [17521 19437 23836 ... 20761 21008 20759]


array([3.96395126, 3.55824498, 3.766012  , ..., 4.28324854, 3.83026638,
       4.28324854], shape=(26505,))