In [11]:
import os
os.chdir('/home/daniilstrunov/nogcn/RippleNet-PyTorch-master/src')

In [52]:
from data_loader import load_data
from sklearn.metrics import roc_auc_score, accuracy_score
from tqdm.notebook import tqdm
import scipy as sp
import numpy as np
import pandas as pd

In [95]:
class Args():
    def __init__(self, ):
        self.dataset = 'movie'
        self.dim = 16
        self.n_hop = 2
        self.kge_weight = 0.01
        self.l2_weight = 1e-7
        self.lr = 0.02
        self.batch_size = 1024
        self.n_epoch = 10
        self.n_memory = 32
        self.item_update_mode = 'plus_transform'
        self.using_all_hops = True

args = Args()

In [96]:
data_info = load_data(args)

reading rating file ...
splitting dataset ...
reading KG file ...
constructing knowledge graph ...
constructing ripple set ...


In [97]:
train_data = data_info[0]
val_data = data_info[1]
test_data = data_info[2]

In [35]:
class RP3beta():
    def __init__(self, data):
        self.data = data

    def fit(self, alpha=1., beta=0.):
        rows = self.data[:, 0]
        cols = self.data[:, 1]
        vals = self.data[:, 2]
        Dsi_sparse = sp.sparse.coo_matrix((vals, (rows, cols)))
        self.UI = Dsi_sparse.todense()
        Dis_sparse = Dsi_sparse.T
        Dsi_sparse = Dsi_sparse/(Dsi_sparse.sum(1) + 1e-10)
        Dis_sparse = Dis_sparse/(np.power(Dis_sparse.sum(1), 1+beta) + 1e-10)
        Dsi_sparse = np.power(Dsi_sparse, alpha)
        Dis_sparse = np.power(Dis_sparse, alpha)
        W = Dis_sparse @ Dsi_sparse
        self.W = W

    def predict(self, users, items):
        self.UI[users]
        return self.UI[users] @ self.W[:, items]

In [36]:
class EASE():
    def __init__(self, data):
        self.data = data
        
        self.item_num = max(data[:, 1]) + 1

    def fit(self, reg=250):
        rows = self.data[:, 0]
        cols = self.data[:, 1]
        vals = self.data[:, 2]
        Dsi_sparse = sp.sparse.coo_matrix((vals, (rows, cols)))
        self.UI = Dsi_sparse.todense()
        Dis_sparse = Dsi_sparse.T
        
        
        Dsi_sparse = Dsi_sparse#/(Dsi_sparse.sum(1) + 1e-10)
        Dis_sparse = Dis_sparse#/(Dis_sparse.sum(1) + 1e-10)
        
        D = (Dis_sparse @ Dsi_sparse).todense()
        
        diag = ([i for i in range(self.item_num)],
                [i for i in range(self.item_num)])
        D[diag] += reg
        
        P = np.linalg.inv(D)
        
        B = -P/P[diag]
        
        B[diag] = 0

        self.W = B

    def predict(self, users, items):
        self.UI[users]
        return self.UI[users] @ self.W[:, items]

In [227]:
model = EASE(train_data)
model.fit(reg=2000)

In [230]:
model = RP3beta(train_data)
model.fit(alpha=.5, beta=0.2)

In [231]:
def test(model, test_data):
    targets = []
    scores = []
    for user, item, target in tqdm(test_data):
        scores.append(model.predict(user, item).item())
        targets.append(target)
    return roc_auc_score(y_true=targets, y_score=scores), 

In [232]:
test(model, test_data)

  0%|          | 0/24134 [00:00<?, ?it/s]

0.706729073016769

In [98]:
import surprise

In [99]:
reader = surprise.reader.Reader(line_format='user item rating', rating_scale=(0, 1))
surprise_train_data = surprise.Dataset.load_from_df(pd.DataFrame(train_data), reader)

In [121]:
'''
book and movie
n_factors=110,
n_epochs=200,
init_low=1,
init_high=20,
reg_pu=0.06,
reg_qi=0.06,
'''


model = surprise.prediction_algorithms.matrix_factorization.NMF(n_factors=110,
                                                                n_epochs=200,
                                                                init_low=1,
                                                                init_high=20,
                                                                reg_pu=0.06,
                                                                reg_qi=0.06,
                                                                verbose=True)
model.fit(surprise_train_data.build_full_trainset())

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 20
Processing epoch 21
Processing epoch 22
Processing epoch 23
Processing epoch 24
Processing epoch 25
Processing epoch 26
Processing epoch 27
Processing epoch 28
Processing epoch 29
Processing epoch 30
Processing epoch 31
Processing epoch 32
Processing epoch 33
Processing epoch 34
Processing epoch 35
Processing epoch 36
Processing epoch 37
Processing epoch 38
Processing epoch 39
Processing epoch 40
Processing epoch 41
Processing epoch 42
Processing epoch 43
Processing epoch 44
Processing epoch 45
Processing epoch 46
Processing epoch 47
Processing epoch 48
Processing epoch 49
Processing

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x7f1c547317e0>

In [122]:
def test(model, test_data):
    targets = []
    scores = []
    class_ = []
    for user, item, target in tqdm(test_data):
        scores.append(model.predict(user, item).est)
        class_.append(int(scores[-1] > 0.5))
        targets.append(target)
    return roc_auc_score(y_true=targets, y_score=scores), accuracy_score(y_true=targets, y_pred=class_)

In [123]:
test(model, test_data)

  0%|          | 0/150746 [00:00<?, ?it/s]

(0.923406226199106, 0.8465962612606637)