Just use the Google Colab Notebook for now, it is faster

LightFM Test

Source: https://github.com/recommenders-team/recommenders/blob/main/examples/02_model_collaborative_filtering/lightfm_deep_dive.ipynb

Install libraries

In [None]:
!pip install lightfm
!pip install recommenders

Import libraries

In [None]:
import os
import sys
import itertools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import lightfm
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm import cross_validation
from lightfm.evaluation import precision_at_k as lightfm_prec_at_k
from lightfm.evaluation import recall_at_k as lightfm_recall_at_k

from recommenders.evaluation.python_evaluation import precision_at_k, recall_at_k
from recommenders.utils.timer import Timer
from recommenders.datasets import movielens
from recommenders.models.lightfm.lightfm_utils import (
    track_model_metrics,
    prepare_test_df,
    prepare_all_predictions,
    compare_metric,
    similar_users,
    similar_items,
)
from recommenders.utils.notebook_utils import store_metadata

print("System version: {}".format(sys.version))
print("LightFM version: {}".format(lightfm.__version__))

Defining variables

In [None]:
# Select MovieLens data size
MOVIELENS_DATA_SIZE = '100k'

# default number of recommendations
K = 10
# percentage of data used for testing
TEST_PERCENTAGE = 0.25
# model learning rate
LEARNING_RATE = 0.25
# no of latent factors
NO_COMPONENTS = 20
# no of epochs to fit model
NO_EPOCHS = 20
# no of threads to fit model
NO_THREADS = 32
# regularisation for both user and item features
ITEM_ALPHA = 1e-6
USER_ALPHA = 1e-6

# seed for pseudonumber generations
SEED = 42

Retrieve data

In [None]:
data = pd.read_csv("exported_data/data.csv")
# quick look at the data
data.sample(5, random_state=SEED)

Prepare data

In [None]:
dataset = Dataset()

In [None]:
dataset.fit(users=data['userID'],
            items=data['itemID'])

# quick check to determine the number of unique users and items in the data
num_users, num_topics = dataset.interactions_shape()
print(f'Num users: {num_users}, num_topics: {num_topics}.')

In [None]:
(interactions, weights) = dataset.build_interactions(data.iloc[:, 0:3].values)

In [None]:
train_interactions, test_interactions = cross_validation.random_train_test_split(
    interactions, test_percentage=TEST_PERCENTAGE,
    random_state=np.random.RandomState(SEED))

In [None]:
print(f"Shape of train interactions: {train_interactions.shape}")
print(f"Shape of test interactions: {test_interactions.shape}")

Fit the LightFM model

In [None]:
model1 = LightFM(loss='warp', no_components=NO_COMPONENTS,
                 learning_rate=LEARNING_RATE,
                 random_state=np.random.RandomState(SEED))

In [None]:
model1.fit(interactions=train_interactions,
          epochs=NO_EPOCHS);

Prepare model evaluation data

In [None]:
uids, iids, interaction_data = cross_validation._shuffle(
    interactions.row, interactions.col, interactions.data,
    random_state=np.random.RandomState(SEED))

cutoff = int((1.0 - TEST_PERCENTAGE) * len(uids))
test_idx = slice(cutoff, None)

In [None]:
uid_map, ufeature_map, iid_map, ifeature_map = dataset.mapping()

In [None]:
with Timer() as test_time:
    test_df = prepare_test_df(test_idx, uids, iids, uid_map, iid_map, weights)
print(f"Took {test_time.interval:.1f} seconds for prepare and predict test data.")
time_reco1 = test_time.interval

In [None]:
test_df.sample(5, random_state=SEED)

In [None]:
with Timer() as test_time:
    all_predictions = prepare_all_predictions(data, uid_map, iid_map,
                                              interactions=train_interactions,
                                              model=model1,
                                              num_threads=NO_THREADS)
print(f"Took {test_time.interval:.1f} seconds for prepare and predict all data.")
time_reco2 = test_time.interval

In [None]:
all_predictions.sample(5, random_state=SEED)

Export all predictions

In [None]:
# Define the file path where you want to save the predictions
predictions_file_path = 'model_predictions.csv'

# Export all predictions to a CSV file
all_predictions.to_csv(predictions_file_path, index=False)
print(f"All model predictions exported successfully to {predictions_file_path}")

Model evaluation

In [None]:
with Timer() as test_time:
    eval_precision = precision_at_k(rating_true=test_df,
                                rating_pred=all_predictions, k=K)
    eval_recall = recall_at_k(test_df, all_predictions, k=K)
time_reco3 = test_time.interval

with Timer() as test_time:
    eval_precision_lfm = lightfm_prec_at_k(model1, test_interactions,
                                           train_interactions, k=K).mean()
    eval_recall_lfm = lightfm_recall_at_k(model1, test_interactions,
                                          train_interactions, k=K).mean()
time_lfm = test_time.interval

print(
    "------ Using Repo's evaluation methods ------",
    f"Precision@K:\t{eval_precision:.6f}",
    f"Recall@K:\t{eval_recall:.6f}",
    "\n------ Using LightFM evaluation methods ------",
    f"Precision@K:\t{eval_precision_lfm:.6f}",
    f"Recall@K:\t{eval_recall_lfm:.6f}",
    sep='\n')

Testing the model

In [None]:
def get_top_predictions_for_user(user_id, all_predictions_df, k=10):
    user_predictions = all_predictions_df[all_predictions_df['userID'] == user_id]
    top_predictions = user_predictions.sort_values(by='prediction', ascending=False).head(k)
    return top_predictions

# Example usage: Get top predictions for user with ID
user_id = 626
top_predictions_for_user = get_top_predictions_for_user(user_id, all_predictions)
print(f"Top {K} predictions for user {user_id}:")
print(top_predictions_for_user)

Export model

In [None]:
import joblib

# Define the file path where you want to save the model
model_file_path = 'lightfm_model.pkl'

# Export the model
joblib.dump(model1, model_file_path)
print(f"Model exported successfully to {model_file_path}")

Import model

In [None]:
# Define the file path from where you want to load the model
loaded_model = joblib.load(model_file_path)

# Check if the loaded object is an instance of the LightFM model class
if isinstance(loaded_model, LightFM):
    print("Model loaded successfully!")
else:
    print("Failed to load the model. Please check the file path or the model file.")


Prepare predictions with imported model

In [None]:
with Timer() as test_time:
    loaded_model_predictions = prepare_all_predictions(data, uid_map, iid_map,
                                                       interactions=train_interactions,
                                                       model=loaded_model,
                                                       num_threads=NO_THREADS)
print(f"Took {test_time.interval:.1f} seconds for prepare and predict all data.")

Output prediction for specific user with imported model

In [None]:
user_id = 626
top_predictions_for_user = get_top_predictions_for_user(user_id, loaded_model_predictions)
print(f"Top {K} predictions for user {user_id}:")
print(top_predictions_for_user)