### import required module

In [26]:
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
import os
from collections import defaultdict
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import scipy.sparse as sp
from tqdm import tqdm

### モデルを復元

In [2]:
# model_file = 'model'
# meta_file_name = 'weights-26.meta'
model_file = 'model_last-fm_epoch=300'
meta_file_name = 'weights-299.meta'

In [3]:
sess = tf.Session()
saver = tf.train.import_meta_graph('./{}/{}'.format(model_file, meta_file_name))

# モデルの復元
saver.restore(sess,tf.train.latest_checkpoint('./' + model_file + '/'))
# グラフを復元
graph = tf.get_default_graph()

INFO:tensorflow:Restoring parameters from ./model_last-fm_epoch=300/weights-299


In [5]:
user_embed = graph.get_tensor_by_name("user_embed:0")
entity_embed = graph.get_tensor_by_name("entity_embed:0")
relation_embed = graph.get_tensor_by_name("relation_embed:0")

### 予測結果をロード

In [4]:
saved_path = './result_1114/'
file_name = 'each_user_ret_epoch=19.pickle'

In [5]:
with open (saved_path + file_name, 'rb') as f:
    result = pickle.load(f)

print(result[0])

{'user_id': 0, 'recall': array([0.08510638, 0.12765957, 0.12765957, 0.14893617, 0.17021277]), 'precision': array([0.2   , 0.15  , 0.1   , 0.0875, 0.08  ]), 'ndcg': array([0.30618864, 0.41385325, 0.41385325, 0.45512116, 0.49408425]), 'hit_ratio': array([1., 1., 1., 1., 1.]), 'auc': 0.0, 'top_N_items': [(1085, 12.173683), (10778, 11.681754), (23493, 11.466055), (8523, 11.296857), (11184, 11.179999), (11211, 11.060519), (2237, 11.028409), (19439, 10.986417), (9491, 10.648063), (10936, 10.647737), (14037, 10.647504), (10874, 10.485147), (9454, 10.439081), (9492, 10.429654), (8524, 10.411466), (23496, 10.3966), (8422, 10.33805), (7140, 10.331901), (9493, 10.330873), (7536, 10.172565), (2719, 10.006644), (9871, 9.955256), (10799, 9.801346), (9726, 9.771114), (2718, 9.73137), (7537, 9.72799), (2717, 9.723579), (23564, 9.722545), (13996, 9.712077), (8348, 9.70301), (2801, 9.643488), (23466, 9.505796), (2657, 9.474501), (6105, 9.37311), (8353, 9.3293705), (7931, 9.2671175), (12539, 9.260599), (

In [6]:
user_rec_items = defaultdict()

for user, ret in result.items():
    items = dict(ret['top_N_items'])
    items = items.keys()
    
    user_rec_items[user]  = list(items)

In [7]:
users =  user_rec_items.keys()
len(users)

23566

### 正規化済みの Attention Score のロード

In [8]:
saved_path = './result_1114/'
file_name = 'attention_score_epoch=19.pickle'

with open (saved_path + file_name, 'rb') as f:
    attention_score = pickle.load(f)

In [9]:
# attention_score[e_h, e_t] として Attention Score にアクセスできる
attention_score = sp.csr_matrix(attention_score)

### Knowledge Graph のデータをロードする

In [10]:
saved_path = '../Data/last-fm/'
file_name = 'kg_final.txt'

In [11]:
kg_df = pd.read_csv(saved_path + file_name, sep=' ', header=None, names=('e_h','r','e_t'))
kg_df.head()

Unnamed: 0,e_h,r,e_t
0,12700,0,48123
1,18104,0,48123
2,25838,1,48124
3,41691,2,48125
4,9746,1,48126


### Knowledge Graph でのAttention Score を算出する

In [12]:
e_h_list = list(kg_df['e_h'])
e_t_list = list(kg_df['e_t'])
r_list = list(kg_df['r'])
attention_score_list = []

In [13]:
for e_h, e_t in zip(e_h_list, e_t_list):
    
    e_h_idx = e_h + len(users)
    e_t_idx = e_t + len(users)
    
    att = attention_score[e_h_idx, e_t_idx]
    attention_score_list.append(att)

In [14]:
kg_att_df = pd.DataFrame(
    data = {
        'e_h' : e_h_list,
        'r' : r_list,
        'e_t' : e_h_list,
        'attention_score' : attention_score_list
        
    },
    columns=['e_h', 'r', 'e_t', 'attention_score']
)

In [15]:
kg_att_df.head()

Unnamed: 0,e_h,r,e_t,attention_score
0,12700,0,12700,0.014034
1,18104,0,18104,0.027095
2,25838,1,25838,0.035783
3,41691,2,41691,0.079258
4,9746,1,9746,0.032012


In [18]:
save_path = './result_1114/'
file_name = 'attention_score_entity-entity.csv'

kg_att_df.to_csv(save_path+file_name)

### 学習データのロード

In [19]:
saved_path = '../Data/last-fm/'
file_name  = 'train.txt'

In [20]:
all_data = defaultdict(list)
f = open(saved_path+file_name)
line = f.readline()

while line :

    data = line.strip()
    data_list = data.split()

    user = data_list[0]
    items = data_list[1:]
    all_data[user] = items
    
    line = f.readline()
    
f.close()

In [21]:
user_list = []
item_list = []

for user, items in all_data.items():
    
    for item in items:
        user_list.append(user)
        item_list.append(item)

In [22]:
data_df = pd.DataFrame(
    data = {
        'user_id' : user_list,
        'item_id' : item_list
    },
    columns=['user_id', 'item_id']
)

In [23]:
data_df.head()

Unnamed: 0,user_id,item_id
0,0,0
1,0,1
2,0,2
3,0,2
4,0,3


### 実際にinteractionのあったアイテムとのAttention Score を算出する

In [25]:
user_id_list = list(data_df['user_id'])
item_id_list = list(data_df['item_id'])
attention_score_list = []

In [28]:
for u_id, i_id in zip(tqdm(user_id_list), item_id_list):
    
    e_h_idx = int(u_id)
    e_t_idx = int(i_id) + len(users)
    
    att = attention_score[e_h_idx, e_t_idx]
    attention_score_list.append(att)

100%|█████████████████████████████████████████████████████████████████████| 2418427/2418427 [01:51<00:00, 21605.03it/s]


In [29]:
user_item_attention_df = pd.DataFrame(
    data={
        'user_id' : user_id_list,
        'item_id' : item_id_list,
        'attention_score' : attention_score_list
    },
    columns=['user_id', 'item_id', 'attention_score']
)

In [30]:
user_item_attention_df.head()

Unnamed: 0,user_id,item_id,attention_score
0,0,0,0.007703
1,0,1,0.00733
2,0,2,0.010621
3,0,2,0.010621
4,0,3,0.009094


In [31]:
save_path = './result_1114/'
file_name = 'attention_score_user-item.csv'

user_item_attention_df.to_csv(save_path+file_name)