In [1]:
from src.algorithms.alternating_least_squares import AlternatingLeastSquares
from src.helpers.dataset_indexer import DatasetIndexer
from src.helpers.checkpoint_manager import CheckpointManager
from src.recommenders import CollaborativeFilteringRecommenderBuilder
from src.backends import Backend
from src.helpers._logging import logger  # noqa
from src.settings import settings

In [2]:
dataset_indexer = DatasetIndexer(
    file_path="./ml-32m/ratings.csv",
    user_header="userId",
    item_header="movieId",
    rating_header="rating",
    limit=settings.general.LINES_COUNT_TO_READ,
)

indexed_data = dataset_indexer.index(
    approximate_train_ratio=settings.general.APPROXIMATE_TRAIN_RATIO
)

2025-01-05 04:11:09,419 - INFO - Successfully indexed 1000000 lines from ./ml-32m/ratings.csv


In [3]:
als_instance = AlternatingLeastSquares(
    hyper_lambda=settings.als.HYPER_LAMBDA,
    hyper_gamma=settings.als.HYPER_GAMMA,
    hyper_tau=settings.als.HYPER_TAU,
    hyper_n_epochs=settings.als.HYPER_N_EPOCH,
    hyper_n_factors=settings.als.HYPER_N_FACTOR,
)

als_backend = Backend(
    # Define the algorithm
    algorithm=als_instance,
    checkpoint_manager=CheckpointManager(
        checkpoint_folder=settings.als.CHECKPOINT_FOLDER,
        sub_folder=str(settings.general.LINES_COUNT_TO_READ),
    ),
    # Whether we should resume by using the last state of
    # the algorithm the checkpoint manager folder or not.
    resume=True,
)

In [4]:
recommender_builder = CollaborativeFilteringRecommenderBuilder(
    backend=als_backend,
)

# This might take some moment before finishing
recommender = recommender_builder.build(data=indexed_data)

2025-01-05 04:11:09,689 - INFO - Starting the build of the recommender using AlternatingLeastSquares...
2025-01-05 04:11:09,691 - INFO - Starting a model fitting using the backend AlternatingLeastSquares...
2025-01-05 04:11:09,696 - INFO - Checkpoint ./artifacts/checkpoints/als/1000000/20250105-040230_lambda0.1_gamma0.01_tau1_n_epochs4_n_factors10.pkl loaded with success
2025-01-05 04:11:09,697 - INFO - All factors and biases are already provided, so no initialization is needed.


Epochs:  25%|██▌       | 1/4 [00:16<00:49, 16.44s/epoch]

Epoch 1/4: Loss (Train/Test) = -25420.5533/-106068.9212, RMSE (Train/Test) = 0.7086/3.6755


Epochs:  50%|█████     | 2/4 [00:31<00:31, 15.69s/epoch]

Epoch 2/4: Loss (Train/Test) = -25317.7613/-106117.6600, RMSE (Train/Test) = 0.7059/3.6753


Epochs:  75%|███████▌  | 3/4 [00:46<00:15, 15.53s/epoch]

Epoch 3/4: Loss (Train/Test) = -25291.1418/-106067.9397, RMSE (Train/Test) = 0.7052/3.6741


Epochs: 100%|██████████| 4/4 [01:01<00:00, 15.39s/epoch]

Epoch 4/4: Loss (Train/Test) = -25280.0733/-105981.7293, RMSE (Train/Test) = 0.7048/3.6724
2025-01-05 04:12:11,279 - INFO - Checkpoint successfully saved at 20250105-041211_lambda0.1_gamma0.01_tau1_n_epochs4_n_factors10
2025-01-05 04:12:11,280 - INFO - Successfully built the recommender using AlternatingLeastSquares





In [5]:
prediction_input = [("17", 4)]
recommender.recommend(prediction_input)

predictions => [3.86701541 3.44239732 3.68528639 ... 4.19107632 3.7385815  4.19107632]
items_ids => [17512 17521 19437 ... 21607 23931 13676]
predictions => [3.86701541 3.44239732 3.68528639 ... 4.19107632 3.7385815  4.19107632]
items_ids => [17512 17521 19437 ... 21607 23931 13676]
Recommending [17512 17521 19437 ... 21607 23931 13676]


array([3.86701541, 3.44239732, 3.68528639, ..., 4.19107632, 3.7385815 ,
       4.19107632], shape=(26505,))