In [5]:
import numpy as np
from scipy import sparse

In [18]:
from DAN.dataloader import BasicDataset, Loader
from DAN.model import LAE_DAN
from SaplingSimilarity.similarities import sapling
from SaplingSimilarity.utils import scores, read_data_validation

In [16]:
config = {
    "reg_p": 1e-2,
    "alpha": 0.5,
    "beta" : 0.5,
    "drop_p": 0.0
}
gamma = 0.5

In [19]:
dataset = Loader(path="DAN/abook")
X = dataset.UserItemNet

[0;30;43mloading [DAN/abook][0m
42643 training users, 5000 valid users, 5000 test users, 91599 items
2413689 interactions for training
54411 interactions for validation
55899 interactions for testing
abook is ready to go


In [20]:
train_matrix = X.astype(np.float32)
user_counts = np.array(train_matrix.sum(axis=1)).ravel()
item_counts = np.array(train_matrix.sum(axis=0)).ravel()

In [21]:
eps = 1e-8
user_counts_safe = user_counts + eps
item_counts_safe = item_counts + eps

beta = config["beta"]
alpha_final = 1 - config["alpha"]

In [22]:
user_power = np.power(user_counts_safe, -beta)             
item_power = np.power(item_counts_safe, -(1 - alpha_final))

In [23]:
X_user_norm = X.multiply(user_power.reshape(-1, 1))       

In [24]:
X_norm = X_user_norm.multiply(item_power.reshape(1, -1))   

In [25]:
M_norm = X_norm.toarray().astype(np.float32)

MemoryError: Unable to allocate 29.1 GiB for an array with shape (42643, 91599) and data type float64

In [None]:
B_user = sapling(M_norm, projection=0)
B_item = sapling(M_norm, projection=1)

In [None]:
rec_u = np.nan_to_num( np.dot(B_user, M_norm).T / np.sum(np.abs(B_user), axis=1) ).T
rec_i = np.nan_to_num( np.dot(M_norm, B_item) / np.sum(np.abs(B_item), axis=0) )

rec_final = (1 - gamma)*rec_u + gamma*rec_i

In [None]:
N_users, N_items, _, train_list, valid_list = read_data_validation("DAN/abook")

In [None]:
scores_u     = scores(train_list, valid_list, rec_u,     N_users, N_items, K=20)
scores_i     = scores(train_list, valid_list, rec_i,     N_users, N_items, K=20)
scores_final = scores(train_list, valid_list, rec_final, N_users, N_items, K=20)

In [None]:
print("=== Resultados Sapling usando X_norm de DAN ===")
print("User-based \t prec@20: {:.4f}, recall@20: {:.4f}, ndcg@20: {:.4f}".format(
      scores_u[0], scores_u[1], scores_u[2]))
print("Item-based \t prec@20: {:.4f}, recall@20: {:.4f}, ndcg@20: {:.4f}".format(
      scores_i[0], scores_i[1], scores_i[2]))
print("Final (γ={:.2f})\t prec@20: {:.4f}, recall@20: {:.4f}, ndcg@20: {:.4f}".format(
      gamma, scores_final[0], scores_final[1], scores_final[2]))