# 第13章 推薦順位に基づく正確性

# テストデータと推薦リスト

# 準備

In [1]:
import math
import numpy as np
np.set_printoptions(precision=3)

# 上位K件
TOP_K = 5
# 対数の底
ALPHA = 2

# テストデータ
R = np.array([
              [5, 4,      3, np.nan, 5, 4,      2,      2,      np.nan, np.nan],
              [3, 3,      3, 3,      2, np.nan, 4,      np.nan, 5,      np.nan],
              [4, np.nan, 3, 5,      4, 3,      np.nan, 3,      np.nan, np.nan],
])
U = np.arange(R.shape[0])
I = np.arange(R.shape[1])
Iu = [I[~np.isnan(R)[u,:]] for u in U]

# 推薦システムAによる推薦リスト
RA = np.array([
               [1,      np.nan, 3,      np.nan, 4,      2,      5,      np.nan, np.nan, np.nan],
               [4,      1,      np.nan, 3,      np.nan, np.nan, 5,      np.nan, 2,      np.nan],
               [np.nan, np.nan, 5,      3,      4,      2,      np.nan, 1,      np.nan, np.nan],
])

def confusion_matrix(u, RS, K):
    """
    ユーザu向け推薦リストRSの上位K件における混同行列の各値を返す。

    Parameters
    ----------
    u : int
        ユーザuのID
    RS : ndarray
        推薦リストRS
    K : int
        上位K件

    Returns
    -------
    int
        TP
    int
        FN
    int
        FP
    int
        TN
    """
    like = R[u,Iu[u]]>=4
    recommended = RS[u,Iu[u]]<=K
    TP = np.count_nonzero(np.logical_and(like, recommended))
    FN = np.count_nonzero(np.logical_and(like, ~recommended))
    FP = np.count_nonzero(np.logical_and(~like, recommended))
    TN = np.count_nonzero(np.logical_and(~like, ~recommended))
    return TP, FN, FP, TN

# 平均逆順位
## 01 好きなアイテムか否かの判定
## 02 最初に好きなアイテムが見つかったときの順位
## 03 MRR

In [2]:
u = 0
like = np.nan_to_num(R) >= 4
print('like = \n{}'.format(like))
ku = np.array([np.nanmin(RA[u, like[u]]) for u in U])
print('ku = {}'.format(ku))
MRR = np.sum([1/k for k in ku]) / ku.size
print('MRR = {:.3f}'.format(MRR))

like = 
[[ True  True False False  True  True False False False False]
 [False False False False False False  True False  True False]
 [ True False False  True  True False False False False False]]
ku = [1. 2. 3.]
MRR = 0.611


# 平均適合率

## 04 評価値行列の並べ替え
## 05 好きなアイテムか否かの判定
## 06 好きなアイテムか否かの判定
## 07 各ユーザのAP
## 08 MAP

In [3]:
# 各順位における適合率
precisions = []
for u in U:
    precisions_u = []
    for k in range(1, Iu[u].size+1):
        TP, FN, FP, TN = confusion_matrix(u, RA, k)
        precision_uk = TP / (TP + FP)
        precisions_u.append(precision_uk)
    precisions.append(precisions_u)
print('precisions = \n{}'.format(precisions))

ranked_R = np.array([R[u][np.argsort(RA[u])] for u in U])
print('ranked_R = \n{}'.format(ranked_R))
ranked_like = ranked_R >= 4
print('ranked_like = \n{}'.format(ranked_like))
rel = ranked_like.astype(int)
print('rel = \n{}'.format(rel))
APu = np.array([np.sum(rel[u, :TOP_K] * precisions[u][:TOP_K]) / np.sum(rel[u, :TOP_K]) for u in U])
print('APu = {}'.format(APu))
MAP = np.sum(APu) / APu.size
print('MAP = {:.3f}'.format(MAP))

precisions = 
[[1.0, 1.0, 0.6666666666666666, 0.75, 0.6, 0.6, 0.6], [0.0, 0.5, 0.3333333333333333, 0.25, 0.4, 0.4, 0.4], [0.0, 0.0, 0.3333333333333333, 0.5, 0.4, 0.4]]
ranked_R = 
[[ 5.  4.  3.  5.  2.  4. nan  2. nan nan]
 [ 3.  5.  3.  3.  4.  3.  2. nan nan nan]
 [ 3.  3.  5.  4.  3.  4. nan nan nan nan]]
ranked_like = 
[[ True  True False  True False  True False False False False]
 [False  True False False  True False False False False False]
 [False False  True  True False  True False False False False]]
rel = 
[[1 1 0 1 0 1 0 0 0 0]
 [0 1 0 0 1 0 0 0 0 0]
 [0 0 1 1 0 1 0 0 0 0]]
APu = [0.917 0.45  0.417]
MAP = 0.594


# DCG

## 09 各ユーザのDCG
## 10 理想的な推薦順位
## 11 理想的な推薦リスト
## 12 各ユーザのIDCG
## 13 各ユーザのnDCG
## 14 nDCG

In [4]:
Iu_rec = [I[~np.isnan(RA[u])] for u in U]
DCGu = np.array([np.sum([R[u, i] / np.max([1, math.log(RA[u, i], ALPHA)]) for i in Iu_rec[u]]) for u in U])
print('DCGu = {}'.format(DCGu))

RI = np.argsort(np.argsort(-np.nan_to_num(R))) + 1
print('RI = \n{}'.format(RI))
Iu_recI = np.array([I[RI[u] <= TOP_K] for u in U])
print('Iu_recI = \n{}'.format(Iu_recI))
IDCGu = np.array([np.sum([R[u, i] / np.max([1, math.log(RI[u, i], ALPHA)]) for i in Iu_recI[u]]) for u in U])
print('IDCGu = {}'.format(IDCGu))
nDCGu = DCGu / IDCGu
print('nDCGu = {}'.format(nDCGu))
nDCG = np.sum(nDCGu) / nDCGu.size
print('nDCG = {:.3f}'.format(nDCG))

DCGu = [14.254 13.115 12.447]
RI = 
[[ 1  3  5  8  2  4  6  7  9 10]
 [ 3  4  5  6  7  8  2  9  1 10]
 [ 2  7  4  1  3  5  8  6  9 10]]
Iu_recI = 
[[0 1 2 4 5]
 [0 1 2 6 8]
 [0 2 3 4 5]]
IDCGu = [15.816 13.685 14.316]
nDCGu = [0.901 0.958 0.869]
nDCG = 0.910
