# Bayesian Personalized Ranking (BPR)

## Global Settings and Imports

In [1]:
import sys
import os
import cornac
import pandas as pd
import numpy as np
from recommenders.datasets.python_splitters import python_random_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer

print("System version: {}".format(sys.version))
print("Cornac version: {}".format(cornac.__version__))

  from .autonotebook import tqdm as notebook_tqdm


FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.
System version: 3.8.17 (default, Jul  5 2023, 16:18:40) 
[Clang 14.0.6 ]
Cornac version: 1.15.4


In [2]:
SEEDS = range(5)

DATA_FILE_NAME = "../Data/20230721T041206_sales_2023_basic_single_events_removed.csv"
#DATA_FILE_NAME = "../Data/20230721T235400_sales_24mo_basic_single_events_removed.csv"

# country
COUNTRY = "nigeria"

# top k items to recommend
TOP_K = 10

# fraction of location_skus to include in training dataset
TRAIN_FRAC = 0.75

# Model parameters
NUM_FACTORS = 200
NUM_EPOCHS = 100
LEARNING_RATE = 0.01
LAMBDA_REG = 0.001

## Engine

In [3]:
eval_maps, eval_ndcgs, eval_precisions, eval_recalls = {}, {}, {}, {}

## 0 Data

data_all_cols = pd.read_csv(DATA_FILE_NAME)
data_all_cols = data_all_cols[data_all_cols["country"] == COUNTRY]
data = data_all_cols[["location_id", "product", "sl_sold"]]


for index, SEED in enumerate(SEEDS):
    print("Run " + str(index + 1) + " of " + str(len(SEEDS)))

    ## 1 Data Splitting

    train, test = python_random_split(data, TRAIN_FRAC, seed = SEED)
    train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed = SEED)


    ## 2 Training

    bpr = cornac.models.BPR(
        k=NUM_FACTORS,
        max_iter=NUM_EPOCHS,
        learning_rate=LEARNING_RATE,
        lambda_reg=LAMBDA_REG,
        verbose=True,
        seed=SEED
    )

    with Timer() as t:
        bpr.fit(train_set)
    print("Took {} seconds for training.".format(t))


    ## 3 Prediction

    with Timer() as t:
        all_predictions = predict_ranking(bpr, train, usercol='location_id', itemcol='product', remove_seen=True)
    print("Took {} seconds for prediction.".format(t))


    ## 4 Evaluation / Testing

    k = TOP_K
    eval_maps[SEED] = round(map_at_k(test, all_predictions, col_user='location_id', col_item='product', col_rating='sl_sold', col_prediction='prediction', k=k), 4)
    eval_ndcgs[SEED] = round(ndcg_at_k(test, all_predictions, col_user='location_id', col_item='product', col_rating='sl_sold', col_prediction='prediction', k=k), 4)
    eval_precisions[SEED] = round(precision_at_k(test, all_predictions, col_user='location_id', col_item='product', col_rating='sl_sold', col_prediction='prediction', k=k), 4)
    eval_recalls[SEED] = round(recall_at_k(test, all_predictions, col_user='location_id', col_item='product', col_rating='sl_sold', col_prediction='prediction', k=k), 4)


print()
print("MAP: " + str(eval_maps),
    "NDCG: " + str(eval_ndcgs),
    "Precision@K: " + str(eval_precisions),
    "Recall@K: " + str(eval_recalls), sep='\n')
print()
print("Average MAP: " + str(np.mean(list(eval_maps.values()))),
    "Average NDCG: " + str(np.mean(list(eval_ndcgs.values()))),
    "Average Precision@K: " + str(np.mean(list(eval_precisions.values()))),
    "Average Recall@K: " + str(np.mean(list(eval_recalls.values()))), sep='\n')



Run 1 of 5


100%|██████████| 100/100 [00:00<00:00, 132.44it/s, correct=92.73%, skipped=9.18%]


Optimization finished!
Took 0.7819 seconds for training.
Took 0.3814 seconds for prediction.




Run 2 of 5


100%|██████████| 100/100 [00:00<00:00, 124.46it/s, correct=92.94%, skipped=9.43%]


Optimization finished!
Took 0.8120 seconds for training.
Took 0.3302 seconds for prediction.




Run 3 of 5


100%|██████████| 100/100 [00:00<00:00, 132.28it/s, correct=92.60%, skipped=8.87%]


Optimization finished!
Took 0.7632 seconds for training.
Took 0.3130 seconds for prediction.




Run 4 of 5


100%|██████████| 100/100 [00:00<00:00, 137.72it/s, correct=92.74%, skipped=9.13%]


Optimization finished!
Took 0.7333 seconds for training.
Took 0.3001 seconds for prediction.




Run 5 of 5


100%|██████████| 100/100 [00:00<00:00, 131.96it/s, correct=92.67%, skipped=9.28%]


Optimization finished!
Took 0.7669 seconds for training.
Took 0.3128 seconds for prediction.

MAP: {0: 0.0546, 1: 0.0535, 2: 0.0529, 3: 0.0562, 4: 0.0544}
NDCG: {0: 0.306, 1: 0.2987, 2: 0.2954, 3: 0.3047, 4: 0.2982}
Precision@K: {0: 0.2926, 1: 0.2843, 2: 0.273, 3: 0.2832, 4: 0.286}
Recall@K: {0: 0.1038, 1: 0.0987, 2: 0.0947, 3: 0.0989, 4: 0.0995}

Average MAP: 0.05432
Average NDCG: 0.30060000000000003
Average Precision@K: 0.28382
Average Recall@K: 0.09912000000000001
