# MMGCN与MMRec框架差别比较大，对比evaluation部分

## ==结论：一样==

那可能是 dataloader上有差别。

In [6]:
import numpy as np
import torch
import math

In [9]:
dim, topk = 64, 10
num_users, num_items = 10, 50
user_tensor = torch.randn((num_users, dim))
item_tensor = torch.randn((num_items, dim))
print(f'{user_tensor.shape}, item: {item_tensor.shape}')

torch.Size([10, 64]), item: torch.Size([50, 64])


## 生成val_data和user_item_dict

In [10]:
import random
user_item_dict = {}
val_data = []

num_of_train = topk + 5
num_of_val = 5
for i in range(num_users):
    num_train_i = random.randint(1, num_of_train)
    num_val_i = random.randint(1, num_of_val)
    train_item_idx = random.sample(range(0, num_items), num_train_i)
    val_item_idx = random.sample(range(0, num_items), num_val_i)

    user_item_dict[i] = train_item_idx
    val_data.append([i] + val_item_idx)

print(f'u_i_dict: {user_item_dict}')
print(f'val: {val_data}')

u_i_dict: {0: [5, 49, 24], 1: [18, 21, 20, 38, 14, 34, 33, 45, 0, 24, 43, 37], 2: [16, 22, 35, 31, 4, 45, 26, 12, 17, 49], 3: [44, 29, 27, 30, 21, 9], 4: [38], 5: [33, 28, 21, 17, 22, 24, 32, 2, 25, 14, 48, 46, 34], 6: [43, 30, 20, 23, 28], 7: [9, 33, 16, 11, 24, 29, 28, 23, 26, 35, 14, 45], 8: [2, 48, 9, 19, 1, 21, 45, 4, 8, 32, 5, 28, 27, 23, 39], 9: [43, 30, 28, 34, 4, 49, 17, 22, 29, 10, 40, 48]}
val: [[0, 40], [1, 8, 18], [2, 3, 14, 21, 22, 45], [3, 46, 36, 0, 25, 34], [4, 40, 24], [5, 12, 4, 7, 29, 49], [6, 33], [7, 22, 0, 30, 43, 37], [8, 17, 15, 45, 3], [9, 3, 16, 4, 18, 28]]


## MMGCN Evaluation

In [65]:
def mmgcn_recall_ndcg(u_tensor, i_tensor):
    start_index, step = 0, 200
    end_index = num_users   # if step==None else step

    all_index_of_rank_list = torch.LongTensor([])
    while end_index <= num_users and start_index < end_index:
        temp_user_tensor = user_tensor[start_index:end_index]
        score_matrix = torch.matmul(temp_user_tensor, item_tensor.t())

        for row, col in user_item_dict.items():
            if row >= start_index and row < end_index:
                row -= start_index
                #col = torch.LongTensor(list(col)) - num_users
                col = torch.LongTensor(list(col))
                #score_matrix[row][col] = 1e-5
                score_matrix[row][col] = -1e10

        #print(f'score matrix: {score_matrix}')
        _, index_of_rank_list = torch.topk(score_matrix, topk)
        del score_matrix
        torch.cuda.empty_cache()

        #all_index_of_rank_list = torch.cat((all_index_of_rank_list, index_of_rank_list.cpu()+num_users), dim=0)
        all_index_of_rank_list = torch.cat((all_index_of_rank_list, index_of_rank_list.cpu()), dim=0)
        start_index = end_index

        if end_index+step < num_users:
            end_index += step
        else:
            end_index = num_users

    print(f'topk_index: {all_index_of_rank_list}')
    length = 0
    precision = recall = ndcg = 0.0

    for data in val_data:
        user = data[0]
        pos_items = set(data[1:])
        num_pos = len(pos_items)
        if num_pos == 0:
            continue
        length += 1
        items_list = all_index_of_rank_list[user].tolist()

        items = set(items_list)

        num_hit = len(pos_items.intersection(items))

        precision += float(num_hit / topk)
        recall += float(num_hit / num_pos)

        ndcg_score = 0.0
        max_ndcg_score = 0.0

        for i in range(min(num_pos, topk)):
            max_ndcg_score += 1 / math.log2(i+2)
        if max_ndcg_score == 0:
            continue

        for i, temp_item in enumerate(items_list):
            if temp_item in pos_items:
                ndcg_score += 1 / math.log2(i+2)

        ndcg += ndcg_score/max_ndcg_score

    return precision/length, recall/length, ndcg/length

In [66]:
mmgcn_p, mmgcn_r, mmgcn_n = mmgcn_recall_ndcg(user_tensor, item_tensor)
print(f'MMGCN, Precision: {mmgcn_p}, Recall: {mmgcn_r}, NDCG: {mmgcn_n}')

topk_index: tensor([[25, 18, 36,  0, 47, 28, 20, 33, 29, 32],
        [ 8, 48, 16, 27, 49, 15, 11,  2, 35, 41],
        [21,  8, 14, 28, 46, 44, 40,  3, 13, 11],
        [17, 12, 13, 40, 47, 43, 42, 46, 20, 37],
        [43, 44, 32, 31,  1, 27, 22, 21,  0, 36],
        [45, 39, 43,  3, 37, 36, 20,  5, 44, 10],
        [39, 31, 12, 37, 44,  7, 15, 27, 10, 24],
        [30, 42,  5, 36, 19,  0, 39, 12, 38, 40],
        [11, 10, 47, 22, 44, 38, 16, 25, 37, 40],
        [18, 33, 38, 36, 46,  1, 21, 42, 20, 32]])
MMGCN, Precision: 0.07999999999999999, Recall: 0.19, NDCG: 0.21350054786571365


## MMRec 框架

In [20]:
u_ls, i_ls = [], []
for u, i in user_item_dict.items():
    u_ls.extend([u]*len(i))
    i_ls.extend(i)
#u_ls, i_ls
masked_items = (torch.as_tensor(u_ls), torch.as_tensor(i_ls))
masked_items

(tensor([0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6,
         6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
         8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9]),
 tensor([ 5, 49, 24, 18, 21, 20, 38, 14, 34, 33, 45,  0, 24, 43, 37, 16, 22, 35,
         31,  4, 45, 26, 12, 17, 49, 44, 29, 27, 30, 21,  9, 38, 33, 28, 21, 17,
         22, 24, 32,  2, 25, 14, 48, 46, 34, 43, 30, 20, 23, 28,  9, 33, 16, 11,
         24, 29, 28, 23, 26, 35, 14, 45,  2, 48,  9, 19,  1, 21, 45,  4,  8, 32,
          5, 28, 27, 23, 39, 43, 30, 28, 34,  4, 49, 17, 22, 29, 10, 40, 48]))

In [23]:
pos_items = []
pos_len_list = []
for i in val_data:
    pos_items.append(np.array(i[1:]))
    pos_len_list.append(len(i[1:]))
pos_len_list = np.array(pos_len_list)
pos_items, pos_len_list

([array([40]),
  array([ 8, 18]),
  array([ 3, 14, 21, 22, 45]),
  array([46, 36,  0, 25, 34]),
  array([40, 24]),
  array([12,  4,  7, 29, 49]),
  array([33]),
  array([22,  0, 30, 43, 37]),
  array([17, 15, 45,  3]),
  array([ 3, 16,  4, 18, 28])],
 array([1, 2, 5, 5, 2, 5, 1, 5, 4, 5]))

In [26]:

def recall_(pos_index, pos_len):
    # Recall: average single users recall ratio.
    rec_ret = np.cumsum(pos_index, axis=1) / pos_len.reshape(-1, 1)
    return rec_ret.mean(axis=0)

def ndcg_(pos_index, pos_len):
    len_rank = np.full_like(pos_len, pos_index.shape[1])
    idcg_len = np.where(pos_len > len_rank, len_rank, pos_len)

    iranks = np.zeros_like(pos_index, dtype=np.float)
    iranks[:, :] = np.arange(1, pos_index.shape[1] + 1)
    idcg = np.cumsum(1.0 / np.log2(iranks + 1), axis=1)
    for row, idx in enumerate(idcg_len):
        idcg[row, idx:] = idcg[row, idx - 1]

    ranks = np.zeros_like(pos_index, dtype=np.float)
    ranks[:, :] = np.arange(1, pos_index.shape[1] + 1)
    dcg = 1.0 / np.log2(ranks + 1)
    dcg = np.cumsum(np.where(pos_index, dcg, 0), axis=1)

    result = dcg / idcg
    return result.mean(axis=0)

def precision_(pos_index, pos_len):
    rec_ret = pos_index.cumsum(axis=1) / np.arange(1, pos_index.shape[1] + 1)
    return rec_ret.mean(axis=0)

In [27]:
metrics = ['precision', 'recall', 'ndcg']
metrics_dict = {
    'ndcg': ndcg_,
    'recall': recall_,
    'precision': precision_,
}

In [29]:

def calculate_metrics(pos_len_list, topk_index):
    result_list = []
    for metric in metrics:
        metric_fuc = metrics_dict[metric.lower()]
        result = metric_fuc(topk_index, pos_len_list)
        result_list.append(result)
    return np.stack(result_list, axis=0)


In [67]:
def MMRec_recall_ndcg(u_tensor, i_tensor):
    scores = torch.matmul(u_tensor, i_tensor.t())
    scores[masked_items[0], masked_items[1]] = -1e10

    #print(f'scores: {scores}')
    # rank and get top-k
    _, topk_index = torch.topk(scores, topk, dim=-1)  # nusers x topk
    topk_index = topk_index.numpy()
    print(f'topk_index: {topk_index}')

    bool_rec_matrix = []
    for m, n in zip(pos_items, topk_index):
        bool_rec_matrix.append([True if i in m else False for i in n])
    bool_rec_matrix = np.asarray(bool_rec_matrix)
    # get metrics
    metric_dict = {}
    result_list = calculate_metrics(pos_len_list, bool_rec_matrix)
    for metric, value in zip(metrics, result_list):
        for k in [topk]:
            key = '{}@{}'.format(metric, k)
            #metric_dict[key] = round(value[k - 1], 4)
            metric_dict[key] = value[k - 1]
    return metric_dict

In [68]:
mmrec_dict = MMRec_recall_ndcg(user_tensor, item_tensor)
mmrec_dict

topk_index: [[25 18 36  0 47 28 20 33 29 32]
 [ 8 48 16 27 49 15 11  2 35 41]
 [21  8 14 28 46 44 40  3 13 11]
 [17 12 13 40 47 43 42 46 20 37]
 [43 44 32 31  1 27 22 21  0 36]
 [45 39 43  3 37 36 20  5 44 10]
 [39 31 12 37 44  7 15 27 10 24]
 [30 42  5 36 19  0 39 12 38 40]
 [11 10 47 22 44 38 16 25 37 40]
 [18 33 38 36 46  1 21 42 20 32]]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  # Remove the CWD from sys.path while we load stuff.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  app.launch_new_instance()


{'precision@10': 0.07999999999999999,
 'recall@10': 0.19,
 'ndcg@10': 0.21350054786571365}