In [1]:
from algorithms import BPR
import pandas as pd
import numpy as np
import os,sys
from scipy.sparse import *

In [2]:
def load_data(csv_file, shape):
    tp = pd.read_csv(csv_file)
    rows, cols, ratings = np.array(tp['uid']), np.array(tp['sid']), np.array(tp['rating'])
    seq = np.concatenate((rows[:, None], cols[:, None], np.ones((rows.size, 1), dtype='int') ), axis=1)
    data = csr_matrix((ratings, (rows, cols)), dtype=np.float32, shape=shape)
    return data, seq

In [9]:
DATA_DIR = 'data/ml-100k/pro'
uid_fname = 'unique_uid.txt'
sid_fname = 'unique_sid.txt'
rating_fname = 'train.csv'
test_fname ='test.csv'
unique_uid = list()
with open(os.path.join(DATA_DIR, uid_fname), 'r') as f:
    for line in f:
        unique_uid.append(line.strip())
unique_sid = list()

with open(os.path.join(DATA_DIR, sid_fname), 'r') as f:
    for line in f:
        unique_sid.append(line.strip())
n_items = len(unique_sid)
n_users = len(unique_uid)

train_data, train_raw = load_data(os.path.join(DATA_DIR, rating_fname),(n_users,n_items))
test_data, test_raw = load_data(os.path.join(DATA_DIR, test_fname),(n_users,n_items))


In [10]:
def bi_mat(mat,neg=True):
    r_mat = mat.copy()
    r_mat[mat == 3] = 0
    if neg:
        r_mat[1 == mat] = -1
        r_mat[2 == mat] = -1
    else:
        r_mat[2 >= mat] = 0
    r_mat[mat > 3] = 1
    r_mat.eliminate_zeros()
    return r_mat

In [11]:
train_data_with_neg = bi_mat(train_data)
train_data_without_neg = bi_mat(train_data,False)
test_data = bi_mat(test_data,neg=False)



In [12]:
print(test_data.nnz / np.prod(test_data.shape))
print(train_data.nnz / np.prod(train_data.shape))

0.008265731166470523
0.033164739668023205


In [23]:
import importlib
importlib.reload(BPR)
model = BPR.BPR(dtype=np.float32, latent_dim=30, learning_rate=0.05,
                lamb_theta=0.03, lamb_beta=0.03)

In [24]:
model.train_model(train_data_with_neg,vad_data=None,n_iter=15)


  0%|          | 0/15 [00:00<?, ?it/s][A
  7%|▋         | 1/15 [00:07<01:39,  7.09s/it][A
 13%|█▎        | 2/15 [00:13<01:30,  6.97s/it][A
 20%|██        | 3/15 [00:21<01:24,  7.03s/it][A
 27%|██▋       | 4/15 [00:28<01:17,  7.07s/it][A
 33%|███▎      | 5/15 [00:35<01:10,  7.05s/it][A
 40%|████      | 6/15 [00:42<01:03,  7.06s/it][A
 47%|████▋     | 7/15 [00:49<00:56,  7.09s/it][A
 53%|█████▎    | 8/15 [00:56<00:49,  7.09s/it][A
 60%|██████    | 9/15 [01:03<00:42,  7.08s/it][A
 67%|██████▋   | 10/15 [01:11<00:35,  7.11s/it][A
 73%|███████▎  | 11/15 [01:18<00:28,  7.12s/it][A
 80%|████████  | 12/15 [01:25<00:21,  7.14s/it][A
 87%|████████▋ | 13/15 [01:32<00:14,  7.14s/it][A
 93%|█████████▎| 14/15 [01:40<00:07,  7.15s/it][A
100%|██████████| 15/15 [01:47<00:00,  7.15s/it][A
[A

In [25]:
u = model.theta
v = model.beta

In [35]:
import eval
pr_5, ndcg = eval.prec_at_k(train_data_without_neg,test_data,u,v,k=5), eval.normalized_dcg(train_data_without_neg,test_data,u,v)
print("[pr@5 : %0.4f]\t [ndcg : %0.4f]" % (pr_5,ndcg))

[pr@5 : 0.2733]	 [ndcg : 0.5116]


In [27]:
import importlib
importlib.reload(BPR)
model = BPR.BPR(dtype=np.float32, latent_dim=30, learning_rate=0.05,
                lamb_theta=0.03, lamb_beta=0.03)

In [28]:
model.train_model(train_data_with_neg,vad_data=None,n_iter=15)


  0%|          | 0/15 [00:00<?, ?it/s][A
  7%|▋         | 1/15 [00:07<01:38,  7.00s/it][A
 13%|█▎        | 2/15 [00:14<01:33,  7.18s/it][A
 20%|██        | 3/15 [00:21<01:26,  7.18s/it][A
 27%|██▋       | 4/15 [00:28<01:19,  7.21s/it][A
 33%|███▎      | 5/15 [00:35<01:11,  7.20s/it][A
 40%|████      | 6/15 [00:43<01:04,  7.18s/it][A
 47%|████▋     | 7/15 [00:49<00:57,  7.14s/it][A
 53%|█████▎    | 8/15 [00:57<00:50,  7.15s/it][A
 60%|██████    | 9/15 [01:04<00:42,  7.16s/it][A
 67%|██████▋   | 10/15 [01:11<00:35,  7.16s/it][A
 73%|███████▎  | 11/15 [01:18<00:28,  7.17s/it][A
 80%|████████  | 12/15 [01:25<00:21,  7.16s/it][A
 87%|████████▋ | 13/15 [01:33<00:14,  7.19s/it][A
 93%|█████████▎| 14/15 [01:40<00:07,  7.20s/it][A
100%|██████████| 15/15 [01:47<00:00,  7.19s/it][A
[A

In [29]:
u = model.theta
v = model.beta

In [30]:
import eval
eval.prec_at_k(train_data_without_neg,test_data,u,v,k=5)

0.27330786026200876