In [1]:
import yaml
import os
import argparse
import json

import numpy as np
from recbole.config import Config
from recbole.utils import init_seed, get_model, get_trainer
from recbole.data import create_dataset, data_preparation


from collections import defaultdict


from recbole.data.interaction import Interaction

In [2]:
from src.baselines.baseline import *

In [44]:
config_file = "config/Pop.yaml"

In [45]:
model_name, config = load_config(config_file)
init_seed(config["seed"], config["reproducibility"])

dataset = create_dataset(config)
print(dataset)
train_data, valid_data, test_data = data_preparation(config, dataset)
all_valid_results = dict()
all_test_results = dict()

[1;35mrecbolemoocube[0m
[1;34mThe number of users[0m: 6508
[1;34mAverage actions of users[0m: 14.941293991086523
[1;34mThe number of items[0m: 688
[1;34mAverage actions of items[0m: 141.51819505094613
[1;34mThe number of inters[0m: 97223
[1;34mThe sparsity of the dataset[0m: 97.8286339889367%
[1;34mRemain Fields[0m: ['user_id', 'item_id']


In [46]:
model = get_model(model_name)(config, train_data._dataset).to(config["device"])
trainer = get_trainer(config["MODEL_TYPE"], config["model"])(config, model)
print(config["valid_metric"])
best_valid_score, best_valid_result = trainer.fit(train_data, valid_data, verbose=False)

Hit@10


In [67]:
def get_topk_and_interactions(model, test_data):
    num_items = test_data._dataset.item_num
    num_users = test_data._dataset.user_num
    topk_matches = defaultdict(list)
    interactions = defaultdict(list)
    model.to("cpu")
    for user_id in range(1, num_users):
        row = test_data._dataset.inter_matrix().getrow(user_id).nonzero()[1]
        interactions[user_id] = row.tolist()
        tmp = Interaction({"user_id":[user_id for i in range(num_items)],"item_id":[i for i in range(num_items)]})
        prediction = model.predict(tmp).cpu().detach().numpy().argsort()[-10:][::-1].tolist()
        topk_matches[user_id] = prediction
    return topk_matches, interactions

In [66]:
row.tolist()

[14]

In [68]:
topk_matches, interactions = get_topk_and_interactions(model, test_data)

In [69]:
evaluate(topk_matches, interactions)

NDCG=2.711 |  Recall=5.073 | HR=9.728 | Precision=0.993 | HR@1=0.830 | HR@3=2.905 | HR@5=4.549 | Computed for all users.



(0.9927770093745196, 5.073329352374995, 2.710929954486078, 9.727985246657447)

In [56]:
evaluate(topk_matches, test_user_products)

NDCG=2.711 |  Recall=5.073 | HR=9.728 | Precision=0.993 | HR@1=0.830 | HR@3=2.905 | HR@5=4.549 | Computed for all users.



(0.9927770093745196, 5.073329352374995, 2.710929954486078, 9.727985246657447)

In [54]:
def evaluate(topk_matches, test_user_products):
    """Compute metrics for predicted recommendations.
    Args:
        topk_matches: a list or dict of product ids in ascending order.
    """
    # Compute metrics
    precisions_all, recalls_all, ndcgs_all, hits_all, hits_at_1_all, hits_at_3_all, hits_at_5_all = [], [], [], [], [], [], []
    test_user_idxs = list(test_user_products.keys())
    for uid in test_user_idxs:
        pred_list, rel_set = topk_matches.get(uid, [])[::-1], test_user_products[uid]
        if len(pred_list) == 0:
            ndcgs_all.append(0.0)
            recalls_all.append(0.0)
            precisions_all.append(0.0)
            hits_all.append(0.0)
            hits_at_1_all.append(0.0)
            hits_at_3_all.append(0.0)
            hits_at_5_all.append(0.0)
            continue

        dcg_all = 0.0
        hit_num_all = 0.0
        hit_at_1_all = 0.0
        hit_at_3_all = 0.0
        hit_at_5_all = 0.0
        for i in range(len(pred_list)):
            if pred_list[i] in rel_set:
                dcg_all += 1. / (np.log(i + 2) / np.log(2))
                hit_num_all += 1
                if i < 1:
                    hit_at_1_all += 1
                if i < 3:
                    hit_at_3_all += 1
                if i < 5:
                    hit_at_5_all += 1
        # idcg
        idcg_all = 0.0
        for i in range(min(len(rel_set), len(pred_list))):
            idcg_all += 1. / (np.log(i + 2) / np.log(2))
        ndcg_all = dcg_all / idcg_all
        recall_all = hit_num_all / len(rel_set)
        precision_all = hit_num_all / len(pred_list)
        hit_all = 1.0 if hit_num_all > 0.0 else 0.0
        hit_at_1_all = 1.0 if hit_at_1_all > 0.0 else 0.0
        hit_at_3_all = 1.0 if hit_at_3_all > 0.0 else 0.0
        hit_at_5_all = 1.0 if hit_at_5_all > 0.0 else 0.0
        ndcgs_all.append(ndcg_all)
        recalls_all.append(recall_all)
        precisions_all.append(precision_all)
        hits_all.append(hit_all)
        hits_at_1_all.append(hit_at_1_all)
        hits_at_3_all.append(hit_at_3_all)
        hits_at_5_all.append(hit_at_5_all)

    avg_precision_all = np.mean(precisions_all) * 100
    avg_recall_all = np.mean(recalls_all) * 100
    avg_ndcg_all = np.mean(ndcgs_all) * 100
    avg_hit_all = np.mean(hits_all) * 100
    avg_hit_at_1_all = np.mean(hits_at_1_all) * 100
    avg_hit_at_3_all = np.mean(hits_at_3_all) * 100
    avg_hit_at_5_all = np.mean(hits_at_5_all) * 100

    print('NDCG={:.3f} |  Recall={:.3f} | HR={:.3f} | Precision={:.3f} | HR@1={:.3f} | HR@3={:.3f} | HR@5={:.3f} \n'.format(
            avg_ndcg_all, avg_recall_all, avg_hit_all, avg_precision_all, avg_hit_at_1_all, avg_hit_at_3_all, avg_hit_at_5_all))

    return avg_precision_all, avg_recall_all, avg_ndcg_all, avg_hit_all

In [71]:
os.path.join("a", "b", "c")

'a/b/c'