In [1]:
import numpy as np
import sys


import run_utils

sys.path.append('../') 
import reclab

from reclab.recommenders import SLIM, EASE
from reclab import data_utils

sys.path.append('../tests') 
import utils
import collections




In [23]:
def compute_hr_ndcg(N, users, recs, test_ratings):
    assert recs.shape[1] >= N
    num_hits = 0
    cdg = 0
    for user_id, rec in zip(users, recs):
        for i,r in enumerate(rec[:N]):
            if (user_id, r) in test_ratings:
                value = test_ratings[(user_id, r)][0]
                cdg += value * np.log(2) / np.log(i+2)
                num_hits += value
    return num_hits / len(users), cdg / len(users)

In [2]:
data = data_utils.get_data('ml-1m')

In [18]:
last_rating_time = data[data['user_id']==1]['timestamp'].max()
last_rating_idx = data[data['user_id']==1]['timestamp'].idxmax()

In [34]:
print(data.loc[data['user_id']==1]['timestamp'])

0     978300760
1     978302109
2     978301968
3     978300275
4     978824291
5     978302268
6     978302039
7     978300719
8     978302268
9     978301368
10    978824268
11    978301752
12    978302281
13    978302124
14    978301753
15    978302188
16    978824268
17    978301777
18    978301713
19    978302039
20    978302205
21    978300760
22    978300055
23    978824195
24    978300103
25    978824351
26    978301953
27    978300055
28    978824139
29    978824268
30    978824291
31    978300019
32    978824330
33    978824268
34    978824330
35    978824291
36    978300172
37    978300055
38    978302091
39    978301777
40    978824268
41    978301590
42    978301753
43    978301570
44    978300760
45    978301777
46    978302205
47    978300719
48    978301619
49    978302149
50    978302174
51    978301398
52    978302091
Name: timestamp, dtype: int64


In [26]:
data[data['user_id']==1]


Unnamed: 0,user_id,item_id,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291
5,1,1197,3,978302268
6,1,1287,5,978302039
7,1,2804,5,978300719
8,1,594,4,978302268
9,1,919,4,978301368


In [19]:
last_rating_time

978824351

# SLIM

In "A troubling analysis" (https://arxiv.org/pdf/1911.07698.pdf), SLIM achieves the following results on ML 1M

| HR@1   | NDCG@1   | HR@5   |      NDCG@5      |  HR@10 | NDCG@10|
|----------|:-------------:|------:|------:|------:|------:|
| 0.2207 | 0.2207 | 0.5576 |  0.3953 | 0.7162 | 0.4468 |


In this paper, the dataset is converted into a implicit dataset, so ratings are either 1 or 0. The [hyperparameters](https://github.com/MaurizioFD/RecSys2019_DeepLearning_Evaluation/blob/861eafeaba2943458adec22469b147ec492784b6/DL_Evaluation_TOIS_Additional_material.pdf) are set as `l1_ratio=1.89e-5` and `alpha=0.049`.

In [15]:
# TODO: split with time!
users, items, ratings = data_utils.read_dataset('ml-1m')

In [16]:
for key in ratings.keys():
    ratings[key] = (1, ratings[key][1])

In [17]:
all_contexts = collections.OrderedDict([(user_id, np.zeros(0)) for user_id in users])

In [18]:
train_ratings, test_ratings = data_utils.split_ratings(ratings, 0.8, shuffle=True, seed=0)

In [19]:
recommender = SLIM(alpha=0.049, l1_ratio=1.89e-5, seed=0)

In [20]:
recommender.reset(users, items, train_ratings)

total: 817.4129316806793, super: 13.620255708694458, tolil: 0.3354213237762451, toarray(x3706): 0.00567936897277832, fit(x3706):0.17802858352661133, loop time:802.5900127887726, csr:0.8672127723693848


In [21]:
recs, _ = recommender.recommend(all_contexts, 10)

In [24]:
for N in [1, 5, 10]:
    hr, ndcg = compute_hr_ndcg(N, users, recs, test_ratings)
    print('HR@{}: {}, NCDG@{}: {}'.format(N, hr, N, ndcg))

HR@1: 0.30149006622516555, NCDG@1: 0.30149006622516555
HR@5: 1.6208609271523178, NCDG@5: 0.937859361000297
HR@10: 3.8142384105960265, NCDG@10: 1.630645723191781


## Timing SLIM

In [2]:
recommender = SLIM(alpha=0.1, l1_ratio=1e-3, seed=0)


In [4]:
utils.test_binary_recommend_ml100k(recommender, 0.1)

1
total: 28.832359313964844, super: 0.8893601894378662, tolil: 0.017225980758666992, toarray(x1682): 0.0008440017700195312, fit(x1682):0.012163877487182617, loop time:27.833446264266968, csr:0.09229564666748047
2
3
total: 29.638110160827637, super: 0.6489026546478271, tolil: 0.018927812576293945, toarray(x1682): 0.0006875991821289062, fit(x1682):0.010415792465209961, loop time:28.88001012802124, csr:0.09023499488830566
4
total: 42.47570466995239, super: 0.585723876953125, tolil: 0.0322873592376709, toarray(x1682): 0.0009162425994873047, fit(x1682):0.022894620895385742, loop time:41.744932651519775, csr:0.11272811889648438
5


# EASE

In "A troubling analysis" (https://arxiv.org/pdf/1911.07698.pdf), EASE achieves the following results on ML 1M

| HR@1   | NDCG@1   | HR@5   |      NDCG@5      |  HR@10 | NDCG@10|
|----------|:-------------:|------:|------:|------:|------:|
| 0.2119 | 0.2119 | 0.5502 |  0.3857 | 0.7098 | 0.4374 |

 
In this paper, the dataset is converted into a implicit dataset, so ratings are either 1 or 0. The [hyperparameters](https://github.com/MaurizioFD/RecSys2019_DeepLearning_Evaluation/blob/861eafeaba2943458adec22469b147ec492784b6/DL_Evaluation_TOIS_Additional_material.pdf) are set as `lam=1.25e3`

In [2]:
users, items, ratings = data_utils.read_dataset('ml-1m')
for key in ratings.keys():
    ratings[key] = (1, ratings[key][1])
all_contexts = collections.OrderedDict([(user_id, np.zeros(0)) for user_id in users])

In [3]:
train_ratings, test_ratings = data_utils.split_ratings(ratings, 0.8, shuffle=True, seed=0)
# TODO: need to ask about best hyperparameters! (or run code to find)
recommender = EASE(lam=1.25e3)

In [4]:
recommender.reset(users, items, train_ratings)

  self._set_arrayXarray(i, j, x)


In [5]:
recs, _ = recommender.recommend(all_contexts, 10)

In [14]:
for N in [1, 5, 10]:
    hr, ndcg = compute_hr_ndcg(N, users, recs, test_ratings)
    print('HR@{}: {}, NCDG@{}: {}'.format(N, hr, N, ndcg))


HR@1: 0.29205298013245035, NCDG@1: 0.29205298013245035
HR@5: 0.31728476821192053, NCDG@5: 0.18321577048841534
HR@10: 0.380182119205298, NCDG@10: 0.1616521340642658
