In [1]:
from src.algorithms.alternating_least_squares import AlternatingLeastSquares
from src.helpers.dataset_indexer import DatasetIndexer
from src.helpers.checkpoint_manager import CheckpointManager
from src.recommenders import CollaborativeFilteringRecommenderBuilder
from src.backends import Backend
from src.helpers._logging import logger  # noqa
from src.settings import settings

In [3]:
dataset_indexer = DatasetIndexer(
    file_path="./ml-32m/ratings.csv",
    user_header="userId",
    item_header="movieId",
    rating_header="rating",
    limit=settings.general.LINES_COUNT_TO_READ,
)

indexed_data = dataset_indexer.index(
    approximate_train_ratio=settings.general.APPROXIMATE_TRAIN_RATIO
)

2025-01-05 00:37:51,656 - INFO - Successfully indexed 1000000 lines from ./ml-32m/ratings.csv


In [4]:
als_instance = AlternatingLeastSquares(
    hyper_lambda=settings.als.HYPER_LAMBDA,
    hyper_gamma=settings.als.HYPER_GAMMA,
    hyper_tau=settings.als.HYPER_TAU,
    hyper_n_epochs=settings.als.HYPER_N_EPOCH,
    hyper_n_factors=settings.als.HYPER_N_FACTOR,
)

als_backend = Backend(
    # Define the algorithm
    algorithm=als_instance,
    checkpoint_manager=CheckpointManager(
        checkpoint_folder=settings.als.CHECKPOINT_FOLDER,
        sub_folder=str(settings.general.LINES_COUNT_TO_READ),
    ),
    # Whether we should resume by using the last state of
    # the algorithm the checkpoint manager folder or not.
    resume=True,
)

In [5]:
recommender_builder = CollaborativeFilteringRecommenderBuilder(
    backend=als_backend,
)

# This might take some moment before finishing
recommender = recommender_builder.build(data=indexed_data)

2025-01-05 00:37:51,840 - INFO - Starting the build of the recommender using AlternatingLeastSquares...
2025-01-05 00:37:51,844 - INFO - Starting a model fitting using the backend AlternatingLeastSquares...
2025-01-05 00:37:51,853 - INFO - Checkpoint ./artifacts/checkpoints/als/1000000/20250105-003032_lambda0.1_gamma0.01_tau1_n_epochs4_n_factors10.pkl loaded with success
2025-01-05 00:37:51,856 - INFO - All factors and biases are already provided, so no initialization is needed.


Epochs:  25%|██▌       | 1/4 [00:38<01:54, 38.19s/epoch]

Epoch 1/4: Loss (Train/Test) = -25513.5652/-105038.5219, RMSE (Train/Test) = 0.7090/3.6938


Epochs:  50%|█████     | 2/4 [01:16<01:15, 37.98s/epoch]

Epoch 2/4: Loss (Train/Test) = -25379.1176/-105099.8658, RMSE (Train/Test) = 0.7055/3.6935


Epochs:  75%|███████▌  | 3/4 [01:46<00:34, 34.33s/epoch]

Epoch 3/4: Loss (Train/Test) = -25336.7063/-105064.5408, RMSE (Train/Test) = 0.7044/3.6924


Epochs: 100%|██████████| 4/4 [02:11<00:00, 32.90s/epoch]

Epoch 4/4: Loss (Train/Test) = -25319.1658/-104983.7697, RMSE (Train/Test) = 0.7040/3.6907
2025-01-05 00:40:03,490 - INFO - Checkpoint successfully saved at 20250105-004003_lambda0.1_gamma0.01_tau1_n_epochs4_n_factors10
2025-01-05 00:40:03,492 - INFO - Successfully built the recommender using AlternatingLeastSquares





In [6]:
prediction_input = [{"rating": "4", "movieId": "17", "userId": "1"}]
recommender.recommend(prediction_input)

predictions => [3.6598907  3.25480278 3.44922819 ... 3.9736498  3.52068668 3.9736498 ]
items_ids => [17521 23079 21595 ... 20759 20761 13676]
predictions => [3.6598907  3.25480278 3.44922819 ... 3.9736498  3.52068668 3.9736498 ]
items_ids => [17521 23079 21595 ... 20759 20761 13676]
Recommending [17521 23079 21595 ... 20759 20761 13676]


array([3.6598907 , 3.25480278, 3.44922819, ..., 3.9736498 , 3.52068668,
       3.9736498 ], shape=(26505,))