In [31]:
import pandas as pd

from src.algorithms.alternating_least_squares import AlternatingLeastSquares
from src.helpers.dataset_indexer import DatasetIndexer
from src.helpers.checkpoint_manager import CheckpointManager
from src.recommenders import CollaborativeFilteringRecommenderBuilder
from src.backends import Backend
from src.helpers._logging import logger  # noqa
from src.settings import settings

from src.helpers.graphing import (
    plot_als_train_test_loss_evolution,
    plot_als_train_test_rmse_evolution,
    # plot_error_evolution,
    plot_power_low_distribution,
    plot_data_item_distribution_as_hist,
)

In [32]:
dataset_indexer = DatasetIndexer(
    file_path="./ml-32m/ratings.csv",
    user_header="userId",
    item_header="movieId",
    rating_header="rating",
    limit=settings.general.LINES_COUNT_TO_READ,
)

indexed_data = dataset_indexer.index(
    approximate_train_ratio=settings.general.APPROXIMATE_TRAIN_RATIO
)

2025-01-07 00:12:00,531 [INFO] Successfully indexed 1000000 lines from ./ml-32m/ratings.csv


In [33]:
# Import the movie csv file that will act as our movie database
# And that database is needed by the backend to query the movies
item_database = (
    pd.read_csv("./ml-32m/movies.csv", dtype={"movieId": str})
    .set_index("movieId")
    .to_dict(orient="index")
)

In [34]:
# plot_data_item_distribution_as_hist(indexed_data)

In [35]:
# plot_power_low_distribution(indexed_data,)

In [36]:
als_instance = AlternatingLeastSquares(
    hyper_lambda=settings.als.HYPER_LAMBDA,
    hyper_gamma=settings.als.HYPER_GAMMA,
    hyper_tau=settings.als.HYPER_TAU,
    hyper_n_epochs=settings.als.HYPER_N_EPOCH,
    hyper_n_factors=settings.als.HYPER_N_FACTOR,
)

als_backend = Backend(
    # Define the algorithm
    algorithm=als_instance,
    checkpoint_manager=CheckpointManager(
        checkpoint_folder=settings.als.CHECKPOINT_FOLDER,
        sub_folder=str(settings.general.LINES_COUNT_TO_READ),
    ),
    # The predictor needs this to render the name of the items
    item_database=item_database,
    # Whether we should resume by using the last state of
    # the algorithm the checkpoint manager folder or not.
    resume=True,
)

In [37]:
recommender_builder = CollaborativeFilteringRecommenderBuilder(
    backend=als_backend,
)

# This might take some moment before finishing
recommender = recommender_builder.build(data=indexed_data)

2025-01-07 00:12:01,337 [INFO] Starting the build of the recommender using AlternatingLeastSquares...
2025-01-07 00:12:01,340 [INFO] Starting a model fitting using the backend AlternatingLeastSquares...
2025-01-07 00:12:01,345 [INFO] Checkpoint ./artifacts/checkpoints/als/1000000/20250107-001150_lambda0.1_gamma0.01_tau1_n_epochs2_n_factors10.pkl loaded with success
2025-01-07 00:12:01,347 [INFO] All factors and biases are already provided, so no initialization is needed.
2025-01-07 00:12:01,348 [ERROR] Cannot train the model more because hyperparameter 'hyper_n_epochs' (2) is already greater or equal to the final number of epochs wanted which is 2. Please check the value of 'hyper_n_epochs' and adjust accordingly. Exiting...
2025-01-07 00:12:01,360 [INFO] Checkpoint successfully saved at 20250107-001201_lambda0.1_gamma0.01_tau1_n_epochs2_n_factors10
2025-01-07 00:12:01,361 [INFO] Successfully built the recommender using AlternatingLeastSquares


In [38]:
# plot_als_train_test_rmse_evolution(als_backend.algorithm)

In [39]:
# plot_als_train_test_loss_evolution(als_backend.algorithm)

In [40]:
prediction_input = [("17", 4)]
recommender.recommend(prediction_input)

[{'title': 'Jeffrey Dahmer Files, The (2012)', 'genres': 'Crime|Documentary'},
 {'title': 'The Last House on the Beach (1978)',
  'genres': 'Crime|Drama|Horror|Thriller'},
 {'title': 'Afflicted, The (2010)', 'genres': 'Horror|Thriller'},
 {'title': 'Paradise (1982)', 'genres': 'Adventure|Romance'},
 {'title': 'In Old California (1942)', 'genres': 'Western'},
 {'title': 'Love in Bloom (1935)', 'genres': 'Romance'},
 {'title': 'Story of G.I. Joe (1945)', 'genres': 'War'},
 {'title': 'On Our Merry Way (1948)', 'genres': 'Comedy'},
 {'title': 'Cyrus: Mind of a Serial Killer (2010)',
  'genres': 'Crime|Horror|Mystery|Thriller'},
 {'title': 'Town That Dreaded Sundown, The (1976)',
  'genres': 'Crime|Drama|Horror|Mystery|Thriller'}]