In [1]:
from algorithms import BPR
import pandas as pd
import numpy as np
import os,sys
from scipy.sparse import *

In [2]:
def load_data(csv_file, shape):
    tp = pd.read_csv(csv_file)
    rows, cols, ratings = np.array(tp['uid']), np.array(tp['sid']), np.array(tp['rating'])
    seq = np.concatenate((rows[:, None], cols[:, None], np.ones((rows.size, 1), dtype='int') ), axis=1)
    data = csr_matrix((ratings, (rows, cols)), dtype=np.float32, shape=shape)
    return data, seq

In [3]:
DATA_DIR = 'data/ml-100k/pro'
uid_fname = 'unique_uid.txt'
sid_fname = 'unique_sid.txt'
rating_fname = 'train.csv'
test_fname ='test.csv'
unique_uid = list()
with open(os.path.join(DATA_DIR, uid_fname), 'r') as f:
    for line in f:
        unique_uid.append(line.strip())
unique_sid = list()

with open(os.path.join(DATA_DIR, sid_fname), 'r') as f:
    for line in f:
        unique_sid.append(line.strip())
n_items = len(unique_sid)
n_users = len(unique_uid)

train_data, train_raw = load_data(os.path.join(DATA_DIR, rating_fname),(n_users,n_items))
test_data, test_raw = load_data(os.path.join(DATA_DIR, test_fname),(n_users,n_items))


In [4]:
def bi_mat(mat,neg=True):
    r_mat = mat.copy()
    r_mat[mat == 3] = 0
    if neg:
        r_mat[1 == mat] = -1
        r_mat[2 == mat] = -1
    else:
        r_mat[2 >= mat] = 0
    r_mat[mat > 3] = 1
    r_mat.eliminate_zeros()
    return r_mat

In [5]:
train_data_with_neg = bi_mat(train_data)
train_data_without_neg = bi_mat(train_data,False)
test_data = bi_mat(test_data,neg=False)



In [6]:
print(test_data.nnz / np.prod(test_data.shape))
print(train_data.nnz / np.prod(train_data.shape))

0.008265731166470523
0.033164739668023205


In [7]:
import importlib
importlib.reload(BPR)
model = BPR.BPR(dtype=np.float32, latent_dim=30, learning_rate=0.05,
                lamb_theta=0.03, lamb_beta=0.03)

In [8]:
model.train_model(train_data_with_neg,vad_data=None,n_iter=15)

100%|██████████| 15/15 [01:50<00:00,  7.34s/it]


In [9]:
u = model.theta
v = model.beta

In [10]:
import eval
pr_5 = eval.prec_at_k(train_data_without_neg,test_data,u,v,k=5)
ndcg = eval.normalized_dcg(train_data_without_neg,test_data,u,v)
ndcg_100 = eval.normalized_dcg_at_k(train_data_without_neg,test_data,u,v,k=100)
print("[pr@5 : %0.4f]\t[ndcg : %0.4f]\t[ndcg@100 : %0.4f ]" % (pr_5,ndcg, ndcg_100))

[pr@5 : 0.2746]	[ndcg : 0.5113]	[ndcg@100 : 0.4218 ]


In [11]:
import importlib
importlib.reload(BPR)
model = BPR.BPR(dtype=np.float32, latent_dim=30, learning_rate=0.05,
                lamb_theta=0.03, lamb_beta=0.03)

In [12]:
model.train_model(train_data_with_neg,vad_data=None,n_iter=15)

100%|██████████| 15/15 [01:45<00:00,  7.06s/it]


In [13]:
u = model.theta
v = model.beta

In [14]:
import eval
pr_5 = eval.prec_at_k(train_data_without_neg,test_data,u,v,k=5)
ndcg = eval.normalized_dcg(train_data_without_neg,test_data,u,v)
ndcg_100 = eval.normalized_dcg_at_k(train_data_without_neg,test_data,u,v,k=100)
print("[pr@5 : %0.4f]\t[ndcg : %0.4f]\t[ndcg@100 : %0.4f ]" % (pr_5,ndcg, ndcg_100))

[pr@5 : 0.2729]	[ndcg : 0.5113]	[ndcg@100 : 0.4216 ]
