In [1]:
from tqdm import tqdm
import pandas as pd
from scipy import sparse
import numpy as np
import cloudpickle

In [2]:
# read data
ratings = pd.read_feather('data/amazon_review_ratings.feather')

# indexing ids
# userid
userid_unique = pd.Series(ratings["userId"].unique())
index_userid_dict = userid_unique.to_dict()
# inverse
userid_index_dict = dict(map(reversed, index_userid_dict.items()))

# itemid
itemid_unique = pd.Series(ratings["itemId"].unique())
index_itemid_dict = itemid_unique.to_dict()
# inverse
itemid_index_dict = dict(map(reversed, index_itemid_dict.items()))

ratings["userId_reindex"] = ratings["userId"].map(userid_index_dict)
ratings["itemid_reindex"] = ratings["itemId"].map(itemid_index_dict)

# reindexしたidを使って、アイテムとジャンルの対応が取れるdictを作る
itemid_genres_dict = ratings[['itemid_reindex', 'category']].set_index('itemid_reindex')['category'].to_dict()

In [3]:
X_train = cloudpickle.load(open("output/Amazon-X_train.pkl","rb"))
X_test = cloudpickle.load(open("output/Amazon-X_test.pkl","rb"))
test_movies_and_TVs_pos_items_dict = cloudpickle.load(open('output/test_movies_and_TVs_pos_items_dict.pkl', 'rb'))

In [4]:
# popular itemsを作る
# X_trainのratingを合計して、アイテムを降順に並べる
item_sum_ratings = X_train.sum(axis=0)

In [5]:
movies_popular_items = []
sorted_indices = np.argsort(-item_sum_ratings).tolist()[0]
for idx in sorted_indices:
    if 'movies_and_TVs' in itemid_genres_dict[idx]:
        movies_popular_items.append(idx)

In [6]:
from lib.recommend_util import ndcg

# 評価する
# ndcg@kでk各種
ndcgs = {
    'ndcg5':  [],
    'ndcg10':  [],
    'ndcg20':  [],
    'ndcg50':  [],
    'ndcg100':  []
}
for userid, pos_itemid in tqdm(test_movies_and_TVs_pos_items_dict.items()):
    pos_itemid = np.array(pos_itemid)
    ndcgs['ndcg5'].append(ndcg(movies_popular_items[:5], pos_itemid))
    ndcgs['ndcg10'].append(ndcg(movies_popular_items[:10], pos_itemid))
    ndcgs['ndcg20'].append(ndcg(movies_popular_items[:20], pos_itemid))
    ndcgs['ndcg50'].append(ndcg(movies_popular_items[:50], pos_itemid))
    ndcgs['ndcg100'].append(ndcg(movies_popular_items[:100], pos_itemid))

100%|██████████| 10189/10189 [00:03<00:00, 3173.57it/s]


In [7]:
print(f"ndcg@5: {np.mean(ndcgs['ndcg5'])}")
print(f"ndcg@10: {np.mean(ndcgs['ndcg10'])}")
print(f"ndcg@20: {np.mean(ndcgs['ndcg20'])}")
print(f"ndcg@50: {np.mean(ndcgs['ndcg50'])}")
print(f"ndcg@100: {np.mean(ndcgs['ndcg100'])}")

ndcg@5: 0.009535563890243183
ndcg@10: 0.013784389046866938
ndcg@20: 0.019309342458305994
ndcg@50: 0.027534191932122278
ndcg@100: 0.036264798704017234
