In [1]:
import pandas as pd
from recommenders.utils.constants import SEED
from recommenders.utils.timer import Timer
from recommenders.datasets.download_utils import maybe_download, unzip_file
from recommenders.tuning.parameter_sweep import generate_param_grid
from recommenders.datasets.pandas_df_utils import LibffmConverter
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
import xlearn as xl
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

XLearnLibraryNotFound: Cannot find xlearn Library in the candidate path

In [None]:
# Load the dataset from csv file
df1 = pd.read_csv('rats.csv')
df2 = pd.read_csv('icat.csv')
merged_df = pd.merge(df1, df2, on='itemId')

# create new dataframe
df3 = pd.DataFrame(merged_df)
# df3['Category'] = df3['Category'].astype('category').cat.codes
# Round the rating values to the nearest integer
df3['rating'] = df3['rating'].round()

# Convert the rating values to integer type
df3['rating'] = df3['rating'].astype(int)
df3.head()

In [None]:
converter = LibffmConverter().fit(df3, col_rating='rating')
df_out = converter.transform(df3)
df_out

In [None]:
print('There are in total {0} fields and {1} features.'.format(converter.field_count, converter.feature_count))

In [None]:
# Model parameters
LEARNING_RATE = 0.2
LAMBDA = 0.002
EPOCH = 10
OPT_METHOD = "sgd" # options are "sgd", "adagrad" and "ftrl"

# The metrics for binary classification options are "acc", "prec", "f1" and "auc"
# for regression, options are "rmse", "mae", "mape"
METRIC = "auc" 
# split header
header = {
    "col_user": "userId",
    "col_item": "itemId",
    "col_rating": "rating",
    "col_timestamp": "Category",
    "col_prediction": "Prediction",
}

In [None]:
train, test = python_stratified_split(df3, ratio=0.75, col_user=header["col_user"], col_item=header["col_item"], seed=42)

In [None]:
# Training task
ffm_model = xl.create_ffm()   # Use field-aware factorization machine (ffm)
ffm_model.setTrain(train)     # Set the path of training dataset
ffm_model.setValidate(test) 

In [None]:
# Split the dataset into train and test sets
train_df, test_df = train_test_split(df3, test_size=0.2, random_state=SEED)

# Convert the train and test dataframes to libffm format
libffm_converter = LibffmConverter()
train_libffm = libffm_converter.fit_transform(train_df)
test_libffm = libffm_converter.transform(test_df)

# Specify the FFM model parameters
param_dict = {
    "lr": [0.0001, 0.001, 0.01],
    "lambda": [0.001, 0.01, 0.1]
}

# Generate parameter grid for tuning
params_list = generate_param_grid(param_dict)

# Train and evaluate FFM models with different parameter settings
best_score = 0.0
best_model = None

for params in params_list:
    print(f'Training model with parameters: {params}')
    timer = Timer()
    
    # Train the FFM model
    # ffm_model = xl.FFMModel(task='binary', lr=LEARNING_RATE, lambda_=LAMBDA, epoch=EPOCH, opt=OPT_METHOD)
    # ffm_model.fit(train_libffm, eval_set=(test_libffm, test_libffm))

    # Train the FFM model
    ffm_model = xl.create_ffm()
    ffm_model.setTrain(train_libffm)
    ffm_model.setValidate(test_libffm)
    ffm_model.setTest(test_libffm)
    ffm_model.fit(params)
    
    # Predict ratings on the test set
    pred_scores = ffm_model.predict(test_libffm)
    
    # Evaluate the model
    map_score = map_at_k(test_df, pred_scores)
    ndcg_score = ndcg_at_k(test_df, pred_scores)
    precision_score = precision_at_k(test_df, pred_scores)
    recall_score = recall_at_k(test_df, pred_scores)
    rmse_score = mean_squared_error(test_df['Rating'], pred_scores) ** 0.5
    mae_score = mean_absolute_error(test_df['Rating'], pred_scores)
    
    print(f'Evaluation results:')
    print(f'MAP: {map_score}')
    print(f'nDCG: {ndcg_score}')
    print(f'Precision: {precision_score}')
    print(f'Recall: {recall_score}')
    print(f'RMSE: {rmse_score}')
    print(f'MAE: {mae_score}')
    
    # Keep track of the best model based on MAP score
    if map_score > best_score:
        best_score = map_score
        best_model = ffm_model
    
    print(f'Training time: {timer.elapsed_time()}')
    print('--------------------------------------')

print('Best model parameters:')
print(best_model.get_model_params())