In [1]:
import numpy as np
import sys


import run_utils

sys.path.append('../') 
import reclab

from reclab.recommenders import SLIM, EASE
from reclab import data_utils

sys.path.append('../tests') 
import utils
import collections




In [13]:
def compute_hr_ndcg(N, users, recs, test_ratings):
    assert recs.shape[1] >= N
    num_hits = 0
    cdg = 0
    for user_id, rec in zip(users, recs):
        for i,r in enumerate(rec[:N]):
            if (user_id, r) in test_ratings:
                value = test_ratings[(user_id, r)][0]
                cdg += value * np.log(2) / np.log(i+2)
                num_hits += value
    return num_hits / len(users) / N, cdg / len(users) / N

# SLIM

In "A troubling analysis" (https://arxiv.org/pdf/1911.07698.pdf), SLIM achieves the following results on ML 1M

| HR@1   | NDCG@1   | HR@5   |      NDCG@5      |  HR@10 | NDCG@10|
|----------|:-------------:|------:|------:|------:|------:|
| 0.2207 | 0.2207 | 0.5576 |  0.3953 | 0.7162 | 0.4468 |


In this paper, the dataset is converted into a implicit dataset, so ratings are either 1 or 0.

In [33]:
users, items, ratings = data_utils.read_dataset('ml-1m')

In [36]:
for key in ratings.keys():
    ratings[key] = (1, ratings[key][1])

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [38]:
all_contexts = collections.OrderedDict([(user_id, np.zeros(0)) for user_id in users])

In [39]:
train_ratings, test_ratings = data_utils.split_ratings(ratings, 0.8, shuffle=True, seed=0)

In [40]:
# TODO: need to ask about best hyperparameters! (or run code to find)
recommender = SLIM(alpha=0.1, l1_ratio=1e-3, seed=0)

In [41]:
recommender.reset(users, items, train_ratings)

total: 772.2034709453583, super: 13.336290121078491, tolil: 0.32517457008361816, toarray(x3706): 0.006170749664306641, fit(x3706):0.17826056480407715, loop time:757.6870028972626, csr:0.854973554611206


In [42]:
recs, _ = recommender.recommend(all_contexts, 10)

In [43]:
for N in [1, 5, 10]:
    hr, ndcg = compute_hr_ndcg(N, users, recs, test_ratings)
    print('HR@{}: {}, NCDG@{}: {}'.format(N, hr, N, ndcg))

HR@1: 0.3675496688741722
HR@5: 0.4388079470198676
NCDG@5: 0.2477166317962122


## Timing SLIM

In [2]:
recommender = SLIM(alpha=0.1, l1_ratio=1e-3, seed=0)


In [4]:
utils.test_binary_recommend_ml100k(recommender, 0.1)

1
total: 28.832359313964844, super: 0.8893601894378662, tolil: 0.017225980758666992, toarray(x1682): 0.0008440017700195312, fit(x1682):0.012163877487182617, loop time:27.833446264266968, csr:0.09229564666748047
2
3
total: 29.638110160827637, super: 0.6489026546478271, tolil: 0.018927812576293945, toarray(x1682): 0.0006875991821289062, fit(x1682):0.010415792465209961, loop time:28.88001012802124, csr:0.09023499488830566
4
total: 42.47570466995239, super: 0.585723876953125, tolil: 0.0322873592376709, toarray(x1682): 0.0009162425994873047, fit(x1682):0.022894620895385742, loop time:41.744932651519775, csr:0.11272811889648438
5


# EASE

In "A troubling analysis" (https://arxiv.org/pdf/1911.07698.pdf), EASE achieves the following results on ML 1M

| HR@1   | NDCG@1   | HR@5   |      NDCG@5      |  HR@10 | NDCG@10|
|----------|:-------------:|------:|------:|------:|------:|
| 0.2119 | 0.2119 | 0.5502 |  0.3857 | 0.7098 | 0.4374 |


In this paper, the dataset is converted into a implicit dataset, so ratings are either 1 or 0.

In [2]:
users, items, ratings = data_utils.read_dataset('ml-1m')
for key in ratings.keys():
    ratings[key] = (1, ratings[key][1])
all_contexts = collections.OrderedDict([(user_id, np.zeros(0)) for user_id in users])

In [3]:
train_ratings, test_ratings = data_utils.split_ratings(ratings, 0.8, shuffle=True, seed=0)
# TODO: need to ask about best hyperparameters! (or run code to find)
recommender = EASE(lam=500)

In [4]:
recommender.reset(users, items, train_ratings)

  self._set_arrayXarray(i, j, x)


In [5]:
recs, _ = recommender.recommend(all_contexts, 10)

In [14]:
for N in [1, 5, 10]:
    hr, ndcg = compute_hr_ndcg(N, users, recs, test_ratings)
    print('HR@{}: {}, NCDG@{}: {}'.format(N, hr, N, ndcg))


HR@1: 0.29205298013245035, NCDG@1: 0.29205298013245035
HR@5: 0.31728476821192053, NCDG@5: 0.18321577048841534
HR@10: 0.380182119205298, NCDG@10: 0.1616521340642658
