In [3]:
%load_ext autoreload
%autoreload 2

import numpy as np
import sys
import pandas as pd

import run_utils

sys.path.append('../') 
import reclab

from reclab.recommenders import SLIM, EASE
from reclab import data_utils

sys.path.append('../tests') 
import utils
import collections




In [4]:
def compute_hr_ndcg(N, users, recs, test_ratings):
    assert recs.shape[1] >= N
    num_hits = 0
    cdg = 0
    for user_id, rec in zip(users, recs):
        for i,r in enumerate(rec[:N]):
            if (user_id, r) in test_ratings:
                value = test_ratings[(user_id, r)][0]
                cdg += value * np.log(2) / np.log(i+2)
                num_hits += value
    return num_hits / len(users), cdg / len(users)

# SLIM

In "A troubling analysis" (https://arxiv.org/pdf/1911.07698.pdf), SLIM achieves the following results on ML 1M

| HR@1   | NDCG@1   | HR@5   |      NDCG@5      |  HR@10 | NDCG@10|
|----------|:-------------:|------:|------:|------:|------:|
| 0.2207 | 0.2207 | 0.5576 |  0.3953 | 0.7162 | 0.4468 |


In this paper, the dataset is converted into a implicit dataset, so ratings are either 1 or 0. The testing data consists of the most recent rating of all users, and the training data consists of everything else.
 The [hyperparameters](https://github.com/MaurizioFD/RecSys2019_DeepLearning_Evaluation/blob/861eafeaba2943458adec22469b147ec492784b6/DL_Evaluation_TOIS_Additional_material.pdf) are set as `l1_ratio=1.89e-5` and `alpha=0.049`.

In [103]:
users, items, train_ratings, test_ratings = data_utils.get_time_split_dataset('ml-1m', binarize=True)

In [104]:
all_contexts = collections.OrderedDict([(user_id, np.zeros(0)) for user_id in users])

In [105]:
recommender = SLIM(alpha=0.049, l1_ratio=1.89e-5, seed=0)

In [106]:
recommender.reset(users, items, train_ratings)

KeyboardInterrupt: 

In [None]:
# for each user, let only 100 random items have nonzero ratings
# TODO

In [None]:
recs, _ = recommender.recommend(all_contexts, 10)

In [None]:
for N in [1, 5, 10]:
    hr, ndcg = compute_hr_ndcg(N, users, recs, test_ratings)
    print('HR@{}: {}, NCDG@{}: {}'.format(N, hr, N, ndcg))

# EASE

In "A troubling analysis" (https://arxiv.org/pdf/1911.07698.pdf), EASE achieves the following results on ML 1M

| HR@1   | NDCG@1   | HR@5   |      NDCG@5      |  HR@10 | NDCG@10|
|----------|:-------------:|------:|------:|------:|------:|
| 0.2119 | 0.2119 | 0.5502 |  0.3857 | 0.7098 | 0.4374 |

 
In this paper, the dataset is converted into a implicit dataset, so ratings are either 1 or 0. The testing data consists of the most recent rating of all users, and the training data consists of everything else.

The [hyperparameters](https://github.com/MaurizioFD/RecSys2019_DeepLearning_Evaluation/blob/861eafeaba2943458adec22469b147ec492784b6/DL_Evaluation_TOIS_Additional_material.pdf) are set as `lam=1.25e3`

In [5]:
users, items, train_ratings, test_ratings = data_utils.get_time_split_dataset('ml-1m', binarize=True)
all_contexts = collections.OrderedDict([(user_id, np.zeros(0)) for user_id in users])

In [6]:
recommender = EASE(lam=1.25e3)

In [7]:
recommender.reset(users, items, train_ratings)

  self._set_arrayXarray(i, j, x)


In [111]:
# for each user, let only 100 random items have nonzero ratings
# this is a hack to mimic the reproduction method
# TODO

In [25]:
recommender._ratings[0,5]

0.0

In [28]:
for user_id, item_id in test_ratings.keys():
    print(user_id)
    uid = recommender._outer_to_inner_uid[user_id]
    iid = recommender._outer_to_inner_iid[item_id]
    all_item_idx = np.arange(len(items))
    unrated_item_idx = [i for i in all_item_idx if i not in recommender._ratings[uid].nonzero()[1] and i != iid]
    fake_rating_idx = np.random.choice(unrated_item_idx, size=(len(unrated_item_idx)-100))
    for item in fake_rating_idx:
        recommender._ratings[uid, item] = 1e-10


1
2
3
4


KeyboardInterrupt: 

In [22]:
test_ratings

{(1, 48): (1, array([], dtype=float64)),
 (2, 1687): (1, array([], dtype=float64)),
 (3, 2081): (1, array([], dtype=float64)),
 (4, 2951): (1, array([], dtype=float64)),
 (5, 288): (1, array([], dtype=float64)),
 (6, 597): (1, array([], dtype=float64)),
 (7, 3107): (1, array([], dtype=float64)),
 (8, 3257): (1, array([], dtype=float64)),
 (9, 367): (1, array([], dtype=float64)),
 (10, 2252): (1, array([], dtype=float64)),
 (11, 3182): (1, array([], dtype=float64)),
 (12, 3362): (1, array([], dtype=float64)),
 (13, 2822): (1, array([], dtype=float64)),
 (14, 2731): (1, array([], dtype=float64)),
 (15, 3510): (1, array([], dtype=float64)),
 (16, 2987): (1, array([], dtype=float64)),
 (17, 164): (1, array([], dtype=float64)),
 (18, 1683): (1, array([], dtype=float64)),
 (19, 1234): (1, array([], dtype=float64)),
 (20, 1371): (1, array([], dtype=float64)),
 (21, 2800): (1, array([], dtype=float64)),
 (22, 910): (1, array([], dtype=float64)),
 (23, 2643): (1, array([], dtype=float64)),
 (24

In [101]:
recs, _ = recommender.recommend(all_contexts, 10)

In [102]:
for N in [1, 5, 10]:
    hr, ndcg = compute_hr_ndcg(N, users, recs, test_ratings)
    print('HR@{}: {}, NCDG@{}: {}'.format(N, hr, N, ndcg))


HR@1: 0.008774834437086093, NCDG@1: 0.008774834437086093
HR@5: 0.041721854304635764, NCDG@5: 0.02491564548163808
HR@10: 0.10430463576158941, NCDG@10: 0.04469509568469407
