### import required module

In [None]:
import os
from collections import defaultdict
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import scipy.sparse as sp
from tqdm import tqdm

### 予測結果をロード

In [None]:
saved_path = './result_1114/'
file_name = 'each_user_ret_epoch=19.pickle'

In [None]:
with open (saved_path + file_name, 'rb') as f:
    result = pickle.load(f)

In [None]:
user_rec_items = defaultdict()

for user, ret in result.items():
    items = dict(ret['top_N_items'])
    items = items.keys()
    
    user_rec_items[user]  = list(items)

In [None]:
users =  user_rec_items.keys()
USER_NUM = len(users)
print('user_num:{}'.format(USER_NUM))

### 正規化済みの Attention Score のロード

In [None]:
saved_path = './result_1114/'
file_name = 'attention_score_epoch=19.pickle'

with open (saved_path + file_name, 'rb') as f:
    attention_score = pickle.load(f)

In [None]:
attention_score.shape

In [None]:
# attention_score[e_h, e_t] として Attention Score にアクセスできる
attention_score = sp.csr_matrix(attention_score)

### Knowledge Graph のデータをロードする

In [None]:
saved_path = '../Data/last-fm/'
file_name = 'kg_final.txt'

In [None]:
kg_df = pd.read_csv(saved_path + file_name, sep=' ', header=None, names=('e_h','r','e_t'))
kg_df = kg_df.drop_duplicates()
kg_df.head()

### 学習データのロード

In [None]:
saved_path = '../Data/last-fm/'
file_name  = 'train.txt'

In [None]:
all_data = defaultdict(list)
f = open(saved_path+file_name)
line = f.readline()

while line :

    data = line.strip()
    data_list = data.split()

    user = data_list[0]
    items = data_list[1:]
    all_data[user] = items
    
    line = f.readline()
    
f.close()

###  [推薦アイテム→entity→インタラクションのあったアイテム] のパスを特定する

In [None]:
saved_path = '../Analysis/users/'
file_name = 'worst_100_users.pickle'

with open (saved_path + file_name, 'rb') as f:
    worst_100_users = pickle.load(f)

In [None]:
saved_path = '../Analysis/users/'
file_name = 'top_100_users.pickle'

with open (saved_path + file_name, 'rb') as f:
    top_100_users = pickle.load(f)

In [None]:
def get_entity_to_entity_attention_score(e_h, e_t):

    e_h_idx = e_h + USER_NUM
    e_t_idx = e_t + USER_NUM
    att = attention_score[e_h_idx, e_t_idx]

    return att

def get_user_to_item_attention_score(user_id, item_id):

    e_h_idx = user_id
    e_t_idx = item_id + USER_NUM
    att = attention_score[e_h_idx, e_t_idx]

    return att

In [None]:
rec_item_path = defaultdict(dict)

for user in tqdm(users):

    rec_item_list = user_rec_items[user]
    interacted_items = list(dict.fromkeys(all_data[str(user)]))

    for rec_item_id in rec_item_list:

        rec_item_entity_relation_df = kg_df[kg_df['e_h'] == rec_item_id]

        entity_list = list(rec_item_entity_relation_df['e_t'])

        for entity_id in entity_list:

            target_rec_item_entity_relation_df = rec_item_entity_relation_df[rec_item_entity_relation_df['e_t']==entity_id]
    
            relation_list    = list(target_rec_item_entity_relation_df['r'])

            for entity_to_rec_item_r in relation_list:

                # attention score : entity to recommended item
                entity_to_rec_item_att = get_entity_to_entity_attention_score(entity_id, rec_item_id)

                item_entity_relation_df = kg_df[kg_df['e_t'] == entity_id]
                head_items = list(item_entity_relation_df['e_h'])

                interacted_item_entity_relation_df = item_entity_relation_df.query('e_h in {}'.format(interacted_items))

                if len(interacted_item_entity_relation_df) > 0:

                    e_h_list      = list(interacted_item_entity_relation_df['e_h'])
                    e_t_list      = list(interacted_item_entity_relation_df['e_t'])
                    relation_list = list(interacted_item_entity_relation_df['r'])

                    for e_h, e_t, r in zip(e_h_list, e_t_list, relation_list):
                        
                        item_id          = e_h  # interacted item
                        item_to_entity_r = r    # relation between interacted item and entity

                        # consider item to entity to rec_item path
                        if item_to_entity_r == entity_to_rec_item_r:
                            
                            # attention score : interacted item to entity
                            item_to_entity_att = get_entity_to_entity_attention_score(item_id, entity_id)
                    
                            # attention score : user to interacted item
                            user_to_item_att = get_user_to_item_attention_score(user, item_id)

                            total_att_score = user_to_item_att + item_to_entity_att + entity_to_rec_item_att

                            if not rec_item_id in rec_item_path[user] or ( total_att_score > max_att) :

                                max_att = total_att_score 

                                rec_item_path[user][rec_item_id] =  {

                                                        'total_att_score'    : total_att_score,
                                                        'relation'           : item_to_entity_r,
                                                        'item_id'            : item_id, 
                                                        'entity_id'          : entity_id, 
                                                        'rec_item_id'        : rec_item_id,
                                                        'user_to_item'       : user_to_item_att,
                                                        'item_to_entity'     : item_to_entity_att,
                                                        'entity_to_rec_item' : entity_to_rec_item_att
                                                    }
                            

In [None]:
save_path = './attention_path/'
file_name = 'all_users.pickle'

if not os.path.exists(save_path):
    os.makedirs(save_path)

with open(save_path + file_name, mode='wb') as f:
    pickle.dump(rec_item_path, f)