# Bayesian Personalized Ranking (BPR)

## Imports

In [20]:
import sys
sys.path.append('/Users/bongkyun/Desktop/TIL/RecsysPaper_Code_Review/')

In [21]:
import os
import cornac
import papermill as pm
import scrapbook as sb
import pandas as pd
# from recommenders.datasets import movielens
import recommenders
from recommenders.datasets.python_splitters import python_random_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED

print("System version: {}".format(sys.version))
print("Cornac version: {}".format(cornac.__version__))

System version: 3.10.9 | packaged by Anaconda, Inc. | (main, Mar  1 2023, 18:18:15) [MSC v.1916 64 bit (AMD64)]
Cornac version: 1.15.4


In [22]:
# Select MovieLens data size: 100k, 1m, 10m, or 20m
# MOVIELENS_DATA_SIZE = '100k'

# top k items to recommend
TOP_K = 10

# Model parameters
NUM_FACTORS = 200
NUM_EPOCHS = 100

In [23]:
data_dir = '/Users/bongkyun/Desktop/TIL/RecsysPaper_Code_Review/recommenders/datasets/ml-latest-small/'
data = pd.read_csv(data_dir + 'ratings.csv')
data.rename(columns={data.columns[0] : 'userID',
               data.columns[1] : 'itemID',
                data.columns[2] : 'rating'}, inplace=True )

In [24]:
# data = movielens.load_pandas_df(
#     size=MOVIELENS_DATA_SIZE,
#     header=["userID", "itemID", "rating"]
# )

data.head()

Unnamed: 0,userID,itemID,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [25]:
train, test = python_random_split(data, 0.75)

### Cornac Dataset

In [26]:
train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)

print('Number of users: {}'.format(train_set.num_users))
print('Number of items: {}'.format(train_set.num_items))

Number of users: 610
Number of items: 8787


### Train the BPR model

- `k`: 잠재 공간의 차원(즉, 벡터의 크기)
- `max_iter`: SGD 프로시저의 반복 횟수를 정의합니다.
- `learning_rate`: 그레이디언트 업데이트 규칙의 단계 크기 
- `lambda_reg`: 목표 함수에서 L2-정규화 

In [27]:
bpr = cornac.models.BPR(
    k=NUM_FACTORS,
    max_iter=NUM_EPOCHS,
    learning_rate=0.01,
    lambda_reg=0.001,
    verbose=True,
    seed=SEED
)

In [28]:
with Timer() as t:
    bpr.fit(train_set)
print("Took {} seconds for training.".format(t))

  0%|          | 0/100 [00:00<?, ?it/s]

Optimization finished!
Took 54.7340 seconds for training.


### Prediction and Evaluation

Cornac의 모든 추천 모델은 주어진 사용자에 대한 항목 순위 목록뿐만 아니라 항목 등급 값을 예측하는 `rate()` 및 `rank()` 방법을 제공합니다. 현재의 평가 체계를 활용하기 위해 `cornac_utils` 내의 `predict()` 및 `predict_ranking()` 함수를 통해 예측을 생성할 것입니다. BPR 모델은 항목 순위를 매길 수 있도록 효과적으로 설계되었습니다. 따라서, 우리는 순위 측정 기준을 사용하여 성능을 측정합니다.

In [29]:
with Timer() as t:
    all_predictions = predict_ranking(bpr, train, usercol='userID', itemcol='itemID', remove_seen=True)
print("Took {} seconds for prediction.".format(t))

Took 26.6514 seconds for prediction.


In [30]:
all_predictions.head()

Unnamed: 0,userID,itemID,prediction
75627,606,32029,-2.489636
75628,606,785,0.313546
75629,606,2141,1.902507
75630,606,127096,-1.079002
75631,606,913,2.19909


In [31]:
k = 10
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=k)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=k)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=k)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=k)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.046172
NDCG:	0.244140
Precision@K:	0.224754
Recall@K:	0.087891


In [32]:
# Record results with papermill for tests
# sb.glue("map", eval_map)
# sb.glue("ndcg", eval_ndcg)
# sb.glue("precision", eval_precision)
# sb.glue("recall", eval_recall)

## References

1. Rendle, S., Freudenthaler, C., Gantner, Z., & Schmidt-Thieme, L. (2009, June). BPR: Bayesian personalized ranking from implicit feedback. https://arxiv.org/ftp/arxiv/papers/1205/1205.2618.pdf
2. Pan, R., Zhou, Y., Cao, B., Liu, N. N., Lukose, R., Scholz, M., & Yang, Q. (2008, December). One-class collaborative filtering. https://cseweb.ucsd.edu/classes/fa17/cse291-b/reading/04781145.pdf
3. **Cornac** - A Comparative Framework for Multimodal Recommender Systems. https://cornac.preferred.ai/