In [1]:
from preprocessing import preprocess_all
from lightfm_functions import (
    get_clean_dataframes,
    create_lightfm_dataset,
    train_lightfm_model,
    train_test_split_data,
    hyperparameters_tuning,
)
import pandas as pd
import warnings
import pickle
import numpy as np
import itertools


warnings.filterwarnings("ignore")



In [2]:
def get_dataframes(min_count_author, min_count_user_ints, path="data/", to_save=True):
    interactions_df = pd.read_csv(
        path + "interactions.csv", sep=";", error_bad_lines=False, encoding="latin-1"
    )

    users_df = pd.read_csv(
        path + "users.csv", sep=";", encoding="latin-1", error_bad_lines=False
    )
    items_df = pd.read_csv(
        path + "items.csv", sep=";", encoding="latin-1", error_bad_lines=False
    )

    interactions_df, items_df, users_df = preprocess_all(
        interactions_df, items_df, users_df, min_count_author, min_count_user_ints
    )

    if to_save:
        interactions_df.to_csv(path + "interactions_clean.csv", index=False)
        items_df.to_csv(path + "items_clean.csv", index=False)
        users_df.to_csv(path + "users_clean.csv", index=False)

    return interactions_df, items_df, users_df

# Importing datasets

In [3]:
# interactions_df, items_df, users_df = get_dataframes(5, 5)
# print('Datasets are saved')

interactions_df, items_df, users_df = get_clean_dataframes()
print("Clean datasets are imported")
print(interactions_df.shape, users_df.shape, items_df.shape)
(
    dataset,
    user_ids_buffered,
    item_ids_buffered,
    interactions,
    weights,
) = create_lightfm_dataset(interactions_df)
print("LightFM dataset is created")

Clean datasets are imported
(192228, 3) (49437, 3) (13787, 6)
LightFM dataset is created


# Random search hyperparameter tuning

In [9]:
train_weights, test_weights = train_test_split_data(weights, 0.1)

In [10]:
hyperparameters_tuning(train_weights, test_weights, 20)

Model #1:

Hyperparameters: {'no_components': 50, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.2, 'max_sampled': 12, 'random_state': [42]}

Train AUC 0.85417, Test AUC 0.56293
Train Precision@10 0.00384, Test Precision@10 0.00105


Model #2:

Hyperparameters: {'no_components': 98, 'learning_schedule': 'adadelta', 'loss': 'bpr', 'learning_rate': 0.02, 'max_sampled': 14, 'random_state': [42]}

Train AUC 0.95202, Test AUC 0.62347
Train Precision@10 0.08574, Test Precision@10 0.00491


Model #3:

Hyperparameters: {'no_components': 19, 'learning_schedule': 'adagrad', 'loss': 'bpr', 'learning_rate': 0.1, 'max_sampled': 10, 'random_state': [42]}

Train AUC 0.95702, Test AUC 0.60264
Train Precision@10 0.07619, Test Precision@10 0.00454


Model #4:

Hyperparameters: {'no_components': 94, 'learning_schedule': 'adadelta', 'loss': 'warp', 'learning_rate': 0.04, 'max_sampled': 9, 'random_state': [42]}

Train AUC 0.98745, Test AUC 0.76911
Train Precision@10 0.07855, Test Precis

# Final model

In [6]:
# If you selected different hyperparameters then change train_lightfm_model function

model = train_lightfm_model(weights, True)
print("The model is trained and saved")

Epoch: 100%|██████████| 15/15 [00:10<00:00,  1.41it/s]

The model is trained and saved



