In [1]:
from tqdm import tqdm
from sklearn.decomposition import NMF
import pandas as pd
from scipy import sparse
import numpy as np

In [4]:
# read data
movies = pd.read_csv("ml-20m/movies.csv")
ratings = pd.read_csv("ml-20m/ratings.csv")

In [5]:
# join
ratings_joined = pd.merge(ratings, movies)

In [6]:
# ratingsをsparse matrixに変換して横持ちにする
action_adventure_ratings = ratings_joined.query("genres.str.contains('Action') or genres.str.contains('Adventure')", 
                                                engine='python').reset_index(drop=True)

In [7]:
# indexing ids
# userid
userid_unique = pd.Series(action_adventure_ratings["userId"].unique())
index_userid_dict = userid_unique.to_dict()
# inverse
userid_index_dict = dict(map(reversed, index_userid_dict.items()))

# itemid
itemid_unique = pd.Series(action_adventure_ratings["movieId"].unique())
index_itemid_dict = itemid_unique.to_dict()
# inverse
itemid_index_dict = dict(map(reversed, index_itemid_dict.items()))

action_adventure_ratings["user_id"] = action_adventure_ratings["userId"].map(userid_index_dict)
action_adventure_ratings["item_id"] = action_adventure_ratings["movieId"].map(itemid_index_dict)

In [8]:
item_id_values = action_adventure_ratings["item_id"].values
user_id_values = action_adventure_ratings["user_id"].values
rating_values = action_adventure_ratings["rating"].values

X = sparse.csr_matrix(
    (rating_values, (user_id_values, item_id_values)))

In [9]:
from lib.recommend_util import split_train_validation_leave_one_out_user_wise
X_train, X_test = split_train_validation_leave_one_out_user_wise(X, verbose=True, at_least_n_train_items=0)

User 2568 has 0 train items
User 46969 has 0 train items
User 65911 has 0 train items
User 84942 has 0 train items
User 85962 has 0 train items
User 87215 has 0 train items
User 87733 has 0 train items
User 96036 has 0 train items
User 97232 has 0 train items
User 97344 has 0 train items
User 97515 has 0 train items
User 98073 has 0 train items
User 98579 has 0 train items
User 100696 has 0 train items
User 101615 has 0 train items
User 103496 has 0 train items
User 104367 has 0 train items
User 105589 has 0 train items
User 106360 has 0 train items
User 108051 has 0 train items
User 108358 has 0 train items
User 108892 has 0 train items
User 110328 has 0 train items
User 110732 has 0 train items
User 111878 has 0 train items
User 111946 has 0 train items
User 112086 has 0 train items
User 112556 has 0 train items
User 112708 has 0 train items
User 112932 has 0 train items
User 114765 has 0 train items
User 114870 has 0 train items
User 116494 has 0 train items
User 116741 has 0 train 

In [10]:
# NMFする
model = NMF(n_components=100, random_state=42)

In [None]:
fitted_model = model.fit_transform(X_train)
fitted_components = model.components_

In [None]:
# rating matrixを復元する
predicted_ratings = np.dot(fitted_model, fitted_components)

In [None]:
# 評価用の辞書を作る(ジャンルはadventure)
test_adventure_pos_items_dict = {}
for i in tqdm(range(X_test.shape[0])):
    # trainでadventureにアクションしていないユーザに
    rated_items = X_train[i, :].indices
    if len([v for v in rated_items if 'Adventure' in itemid_genres_dict[v]]) == 0:
        # X_testの中でstoreしているアイテムが0以上のユーザに
        if X_test[i, :].nnz > 0:
            test_items = []
            selected_user_ratings = X_test[i, :]
            value_indices = selected_user_ratings.indices
            sorted_indices = np.argsort(-X_test[i, :].toarray())[0]
            # valueがあるアイテムのジャンルがadventureの場合に
            for v in sorted_indices[:len(value_indices)]:
                if 'Adventure' in itemid_genres_dict[v]:
                    test_items.append(v)
            if len(test_items) > 0:
                test_adventure_pos_items_dict[i] = test_items

In [None]:
from lib.recommend_util import ndcg

# 評価する
# ndcg@kでk各種
ndcgs = {
    'ndcg5':  [],
    'ndcg10':  [],
    'ndcg20':  [],
    'ndcg50':  [],
    'ndcg100':  []
}
for userid, pos_itemid in tqdm(test_adventure_pos_items_dict.items()):
    pos_itemid = np.array(pos_itemid)
    # 予測した評価値の中でadventureのアイテムを持ってくる
    sorted_indices = np.array([v for v in np.argsort(-predicted_ratings[userid, :]) if 'Adventure' in itemid_genres_dict[v]])
    ndcgs['ndcg5'].append(ndcg(sorted_indices[:5], pos_itemid))
    ndcgs['ndcg10'].append(ndcg(sorted_indices[:10], pos_itemid))
    ndcgs['ndcg20'].append(ndcg(sorted_indices[:20], pos_itemid))
    ndcgs['ndcg50'].append(ndcg(sorted_indices[:50], pos_itemid))
    ndcgs['ndcg100'].append(ndcg(sorted_indices[:100], pos_itemid))

In [None]:
print(f"ndcg@5: {np.mean(ndcgs['ndcg5'])}")
print(f"ndcg@10: {np.mean(ndcgs['ndcg10'])}")
print(f"ndcg@20: {np.mean(ndcgs['ndcg20'])}")
print(f"ndcg@50: {np.mean(ndcgs['ndcg50'])}")
print(f"ndcg@100: {np.mean(ndcgs['ndcg100'])}")