In [1]:
import os
import sys

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
sys.path.append('../')

In [2]:
import gc
from collections import Counter
from typing import Callable, List, Tuple

import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from scipy.sparse import csr_matrix, diags
from scipy.sparse.linalg import svds
import time
from tqdm import tqdm

from src.preprocess import add_time_idx
from src.prepare_data import train_val_test_split, get_users_history
from src.SVD import SVD
from src.unbiased_metrics import get_metrics, hr, mrr, ndcg

## Load Data

In [3]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
val_2 = pd.read_csv('val_2.csv')
test_users_history = pd.read_csv('test_users_history.csv')

# Popular model

In [4]:
k = 10

In [5]:
count_items = Counter(train.item_id)
count_items = [*count_items.items()]
count_items.sort(key=lambda x: x[1], reverse=True)

In [6]:
pred_items = [k for k, v in count_items[:k]]

In [7]:
preds_val = val_2.copy()
preds_val['pred_items'] = [pred_items] * preds_val.shape[0]
preds_test = test.copy()
preds_test['pred_items'] = [pred_items] * preds_test.shape[0]

In [8]:
metrics_df, beta = get_metrics(preds_test, preds_val)

In [9]:
metrics_df

Unnamed: 0,type,HR,MRR,nDCG
0,Biased,0.019709,0.005747,0.008945
1,Unbiased,0.033836,0.019874,0.018786
2,Unbiased_feedback_sampling,0.152038,0.046215,0.009559


In [10]:
beta

0.014411184835429408

In [11]:
preds_test_pos = preds_test[preds_test['rating'] >= 3.5]

In [12]:
hr(preds_test_pos, beta=beta, sample_feedback=True, return_confidence_interval=True)

(0.152038, 0.0008839120318355209)

In [13]:
mrr(preds_test_pos, beta=beta, sample_feedback=True, return_confidence_interval=True)

(0.046215, 0.0004461437024730966)

In [14]:
ndcg(preds_test_pos, beta=beta, sample_feedback=True, return_confidence_interval=True)

(0.009559, 5.2964975389667435e-06)

# Random model

In [15]:
preds_val = val_2.copy()
preds_val['pred_items'] = [np.random.choice(train.item_id.unique(), k, replace=False).tolist() for _ in range(preds_val.shape[0])]
preds_test = test.copy()
preds_test['pred_items'] = [np.random.choice(train.item_id.unique(), k, replace=False).tolist() for _ in range(preds_test.shape[0])]

In [16]:
metrics_df, beta = get_metrics(preds_test, preds_val)

In [17]:
metrics_df

Unnamed: 0,type,HR,MRR,nDCG
0,Biased,0.002914,0.001033,0.001467
1,Unbiased,0.003981,0.0021,0.002207
2,Unbiased_feedback_sampling,0.013537,0.004185,0.001471


In [18]:
beta

0.0010698619878035734

In [19]:
preds_test_pos = preds_test[preds_test['rating'] >= 3.5]

In [20]:
hr(preds_test_pos, beta=beta, sample_feedback=True, return_confidence_interval=True)

(0.013537, 0.00020365139148034207)

In [21]:
mrr(preds_test_pos, beta=beta, sample_feedback=True, return_confidence_interval=True)

(0.004185, 7.764567995535016e-05)

In [22]:
ndcg(preds_test_pos, beta=beta, sample_feedback=True, return_confidence_interval=True)

(0.001471, 6.91378551744351e-08)