In [1]:
import os
import sys

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
sys.path.append('../')

In [2]:
import gc
from collections import Counter
from typing import Callable, List, Tuple

from implicit.bpr import BayesianPersonalizedRanking
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from scipy.sparse import csr_matrix, diags
from scipy.sparse.linalg import svds
import time
from tqdm import tqdm

from src.preprocess import add_time_idx
from src.prepare_data import train_val_test_split, get_users_history
from src.SVD import SVD
from src.unbiased_metrics import get_metrics, hr, mrr, ndcg

## Load Data

In [3]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
val_1 = pd.read_csv('val_1.csv')
val_2 = pd.read_csv('val_2.csv')
test_users_history = pd.read_csv('test_users_history.csv')
val_users_history_1 = pd.read_csv('val_users_history_1.csv')
val_users_history_2 = pd.read_csv('val_users_history_2.csv')

# Hyper parameters selection 

In [58]:
def get_mapping(X: pd.DataFrame):
    item_ids = X['item_id'].unique().tolist()
    n_items = len(item_ids)
    item_idx = range(n_items)

    user_ids = X['user_id'].unique().tolist()
    n_users = len(user_ids)
    user_idx = range(n_users)
    
    return dict(zip(item_ids, item_idx)), dict(zip(user_ids, user_idx))

In [60]:
item_mapping_, user_mapping_ = get_mapping(train)
train['item_id'] = train['item_id'].map(item_mapping_)
train['user_id'] = train['user_id'].map(user_mapping_)

In [72]:
def generate_interactions_matrix(X: pd.DataFrame, user_col: str = 'user_id'):
    item_idx = X['item_id'].values
    user_idx = X[user_col].values
    ratings = X['rating'].values
    return csr_matrix(
        (ratings, (user_idx, item_idx)),
        shape=(X[user_col].max() + 1, X['item_id'].max() + 1),
        dtype=np.int8
        )

In [81]:
def predict_folding_in(X: pd.DataFrame, item_factors: np.array, item_mapping_: dict):
    X = X.copy() 
    X['item_id'] = X['item_id'].map(item_mapping_)
    
    prediction_matrix = generate_interactions_matrix(X, user_col='test_user_idx')
    return predict(prediction_matrix, item_factors)

def predict(matrix: csr_matrix, item_factors: np.array):
    # обнуление скоров для айтемов из обучающей выборки, чтобы они не попали в рекомендации
    # учтено, что среди скоров будет достаточное количество положительных
    return np.multiply(
        matrix.dot(item_factors) @ item_factors.T,
        np.invert(matrix.astype(bool).toarray())
        )

def get_top_k(scores: np.array, item_mapping_: dict, user_col: str = 'test_user_idx', batch_size: int = 70000, k: int = 10):
    id2item = {v: k for k, v in item_mapping_.items()}
    preds = pd.DataFrame()

    for batch_ind_left in tqdm(range(0, scores.shape[0], batch_size)):
        batch_ind_right = min(batch_ind_left + batch_size, scores.shape[0])
        ind_part = np.argpartition(
            scores[batch_ind_left: batch_ind_right],
            -k + 1
            )[:, -k:].copy()
        scores_not_sorted = np.take_along_axis(
            scores[batch_ind_left: batch_ind_right],
            ind_part,
            axis=1
            )
        ind_sorted = np.argsort(scores_not_sorted, axis=1)
        indices = np.take_along_axis(ind_part, ind_sorted, axis=1)
        preds = pd.concat([preds, pd.DataFrame({
            user_col: range(batch_ind_left, batch_ind_right),
            'pred_items': indices.tolist()
            })])

        gc.collect()

    preds['pred_items'] = preds['pred_items'].map(lambda inds: [id2item[i] for i in inds])
    return preds

In [97]:
def grid_search(param_list: list, 
                train_sparse: csr_matrix, 
                val_history: pd.DataFrame, 
                val: pd.DataFrame) -> Tuple[list, float]:
    best_params = param_list[0]
    max_ndcg = -1
    for params in param_list:
        bpr = BayesianPersonalizedRanking(**params)
        bpr.fit(train_sparse)
        scores = predict_folding_in(val_history, bpr.item_factors, item_mapping_)
        preds = get_top_k(scores, item_mapping_, user_col='test_user_idx')      
        
        preds = preds.merge(val, on='test_user_idx', how='inner') 
        preds = preds[preds['rating'] >= 3.5]
        
        cur_ndcg = ndcg(preds)
        print(f'params = {params}')
        print(f'ndcg = {cur_ndcg}')
        
        if cur_ndcg > max_ndcg:
            max_ndcg = cur_ndcg
            best_params = params
    return best_params, max_ndcg

In [91]:
train_sparse = generate_interactions_matrix(train)

In [94]:
param_grid = {'learning_rate': [0.1, 0.05, 0.01, 0.005],
              'regularization': [0.05, 0.01],
              'factors': np.linspace(50, 500, 10).astype(int),
              'use_gpu': [False]}
param_list = list(ParameterGrid(param_grid))

In [98]:
best_params, max_ndcg = grid_search(param_list, train_sparse, val_users_history_1, val_1)

  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.04s/it]


params = {'factors': 50, 'learning_rate': 0.1, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.0117


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.01s/it]


params = {'factors': 50, 'learning_rate': 0.1, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.0176


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 50, 'learning_rate': 0.05, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.011436


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


params = {'factors': 50, 'learning_rate': 0.05, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.017956


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.01s/it]


params = {'factors': 50, 'learning_rate': 0.01, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.012468


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.06it/s]


params = {'factors': 50, 'learning_rate': 0.01, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.019959


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.07s/it]


params = {'factors': 50, 'learning_rate': 0.005, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.01306


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 50, 'learning_rate': 0.005, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.021999


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


params = {'factors': 100, 'learning_rate': 0.1, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.010744


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.04s/it]


params = {'factors': 100, 'learning_rate': 0.1, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.017319


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 100, 'learning_rate': 0.05, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.011114


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 100, 'learning_rate': 0.05, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.018522


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.01s/it]


params = {'factors': 100, 'learning_rate': 0.01, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.012646


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.04it/s]


params = {'factors': 100, 'learning_rate': 0.01, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.020698


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.04s/it]


params = {'factors': 100, 'learning_rate': 0.005, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.013981


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


params = {'factors': 100, 'learning_rate': 0.005, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.022201


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.00it/s]


params = {'factors': 150, 'learning_rate': 0.1, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.01117


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.03s/it]


params = {'factors': 150, 'learning_rate': 0.1, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.017989


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.04it/s]


params = {'factors': 150, 'learning_rate': 0.05, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.011434


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


params = {'factors': 150, 'learning_rate': 0.05, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.018587


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.01s/it]


params = {'factors': 150, 'learning_rate': 0.01, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.012407


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


params = {'factors': 150, 'learning_rate': 0.01, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.019675


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


params = {'factors': 150, 'learning_rate': 0.005, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.013202


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 150, 'learning_rate': 0.005, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.020959


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


params = {'factors': 200, 'learning_rate': 0.1, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.011073


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


params = {'factors': 200, 'learning_rate': 0.1, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.018092


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 200, 'learning_rate': 0.05, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.011677


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


params = {'factors': 200, 'learning_rate': 0.05, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.01937


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.03s/it]


params = {'factors': 200, 'learning_rate': 0.01, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.012918


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.06it/s]


params = {'factors': 200, 'learning_rate': 0.01, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.020704


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.04s/it]


params = {'factors': 200, 'learning_rate': 0.005, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.014666


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


params = {'factors': 200, 'learning_rate': 0.005, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.021003


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.00s/it]


params = {'factors': 250, 'learning_rate': 0.1, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.010001


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.00s/it]


params = {'factors': 250, 'learning_rate': 0.1, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.020138


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


params = {'factors': 250, 'learning_rate': 0.05, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.011462


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 250, 'learning_rate': 0.05, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.01943


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.01s/it]


params = {'factors': 250, 'learning_rate': 0.01, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.012339


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.04it/s]


params = {'factors': 250, 'learning_rate': 0.01, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.019709


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


params = {'factors': 250, 'learning_rate': 0.005, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.013268


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 250, 'learning_rate': 0.005, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.021836


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.00s/it]


params = {'factors': 300, 'learning_rate': 0.1, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.009411


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.00s/it]


params = {'factors': 300, 'learning_rate': 0.1, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.019495


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


params = {'factors': 300, 'learning_rate': 0.05, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.011365


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


params = {'factors': 300, 'learning_rate': 0.05, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.019729


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


params = {'factors': 300, 'learning_rate': 0.01, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.013278


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.04it/s]


params = {'factors': 300, 'learning_rate': 0.01, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.019995


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.04s/it]


params = {'factors': 300, 'learning_rate': 0.005, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.013211


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


params = {'factors': 300, 'learning_rate': 0.005, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.021443


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.00s/it]


params = {'factors': 350, 'learning_rate': 0.1, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.009538


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


params = {'factors': 350, 'learning_rate': 0.1, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.019604


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


params = {'factors': 350, 'learning_rate': 0.05, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.011357


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.04it/s]


params = {'factors': 350, 'learning_rate': 0.05, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.020234


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


params = {'factors': 350, 'learning_rate': 0.01, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.012397


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.04it/s]


params = {'factors': 350, 'learning_rate': 0.01, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.019503


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


params = {'factors': 350, 'learning_rate': 0.005, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.013218


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


params = {'factors': 350, 'learning_rate': 0.005, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.021914


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 400, 'learning_rate': 0.1, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.010449


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.00s/it]


params = {'factors': 400, 'learning_rate': 0.1, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.019743


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 400, 'learning_rate': 0.05, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.011287


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.04it/s]


params = {'factors': 400, 'learning_rate': 0.05, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.019


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.01s/it]


params = {'factors': 400, 'learning_rate': 0.01, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.013705


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.05it/s]


params = {'factors': 400, 'learning_rate': 0.01, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.019723


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.04s/it]


params = {'factors': 400, 'learning_rate': 0.005, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.012614


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 400, 'learning_rate': 0.005, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.021059


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


params = {'factors': 450, 'learning_rate': 0.1, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.010353


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 450, 'learning_rate': 0.1, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.01923


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 450, 'learning_rate': 0.05, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.011295


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


params = {'factors': 450, 'learning_rate': 0.05, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.017438


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.01s/it]


params = {'factors': 450, 'learning_rate': 0.01, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.013752


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


params = {'factors': 450, 'learning_rate': 0.01, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.020763


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.03s/it]


params = {'factors': 450, 'learning_rate': 0.005, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.013112


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


params = {'factors': 450, 'learning_rate': 0.005, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.022142


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


params = {'factors': 500, 'learning_rate': 0.1, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.010959


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


params = {'factors': 500, 'learning_rate': 0.1, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.018411


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


params = {'factors': 500, 'learning_rate': 0.05, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.012192


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.00s/it]


params = {'factors': 500, 'learning_rate': 0.05, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.019516


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


params = {'factors': 500, 'learning_rate': 0.01, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.013704


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


params = {'factors': 500, 'learning_rate': 0.01, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.019123


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.03s/it]


params = {'factors': 500, 'learning_rate': 0.005, 'regularization': 0.05, 'use_gpu': False}
ndcg = 0.012095


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.00it/s]


params = {'factors': 500, 'learning_rate': 0.005, 'regularization': 0.01, 'use_gpu': False}
ndcg = 0.021968


In [99]:
best_params, max_ndcg

({'factors': 100,
  'learning_rate': 0.005,
  'regularization': 0.01,
  'use_gpu': False},
 0.022201)

In [100]:
bpr = BayesianPersonalizedRanking(**best_params)
bpr.fit(train_sparse)
scores_val = predict_folding_in(val_users_history_2, bpr.item_factors, item_mapping_)
scores_test = predict_folding_in(test_users_history, bpr.item_factors, item_mapping_)

preds_val = get_top_k(scores_val, item_mapping_, user_col='test_user_idx') 
preds_test = get_top_k(scores_test, item_mapping_, user_col='test_user_idx')

preds_val = preds_val.merge(val_2, on='test_user_idx', how='inner')
preds_test = preds_test.merge(test, on='test_user_idx', how='inner') 

metrics_df, beta = get_metrics(preds_test, preds_val)

  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.11s/it]
100%|██████████| 1/1 [00:01<00:00,  1.85s/it]


In [101]:
metrics_df

Unnamed: 0,type,HR,MRR,nDCG
0,Biased,0.056212,0.013935,0.023457
1,Unbiased,0.090606,0.048329,0.047601
2,Unbiased_feedback_sampling,0.348832,0.109274,0.027003


In [102]:
beta

0.03644258104106743

In [103]:
preds_test_pos = preds_test[preds_test['rating'] >= 3.5]

In [104]:
hr(preds_test_pos, beta=beta, sample_feedback=True, return_confidence_interval=True)

(0.348832, 0.001150057123658177)

In [105]:
mrr(preds_test_pos, beta=beta, sample_feedback=True, return_confidence_interval=True)

(0.109274, 0.00045300593498073805)

In [106]:
ndcg(preds_test_pos, beta=beta, sample_feedback=True, return_confidence_interval=True)

(0.027003, 1.2871707838028879e-05)