In [1]:
import argparse
import os
import pickle

import pandas as pd
import torch

from common.data_iterator import data_iterator
from config import CONFIG
from model.hrnn4recom import HRNN

In [2]:
pd.set_option("display.width", 150)

In [3]:
# class Argument:
#     user = 1422
#     dataset = '1M'
#     weight = 'hrnn_v1.0.1_e05-loss0.2407_nDCG0.436.zip'
#     eval_k = 10

class Argument:
    user = 3295 # 3293
    dataset = 'BRUNCH'
    weight = 'hrnn_v1.0.1_e12-loss0.1701_nDCG0.531.zip'
    eval_k = 10
    
def get_user_test_data(test_data, user_id):
    with open(test_data, 'r') as file:
        for line in file:
            line = [int(l) for l in line.split('\t')]
            if line[0] == user_id:
                return line[1], line[2:]
    raise ValueError(f'User {user_id} is not exist')

In [4]:
argument = Argument()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
data_dir = os.path.join(CONFIG.DATA, argument.dataset)

In [5]:
# loading data
context_dataset = pickle.load(open(os.path.join(data_dir, f'valid.pkl'), 'rb'))
item_meta = pd.read_csv(os.path.join(data_dir, f'item_meta.tsv'), sep='\t', low_memory=False)
user_meta = pd.read_csv(os.path.join(data_dir, f'user_meta.tsv'), sep='\t', low_memory=False)
positive_item, negative_item = get_user_test_data(os.path.join(data_dir, 'negative_test.dat'), argument.user)
item_size = int(item_meta.item_id.max() + 1)

In [6]:
item_display_cols = ['item_id', 'Title', 'Genres']
user_display_cols = ['user_id', 'Gender', 'Age', 'Occupation', 'Zip-code']

if argument.dataset == 'BRUNCH':
    item_display_cols = ['item_id', 'author_id', 'title', 'display_url', 'keyword_list']
    user_display_cols = ['user_id','following_list']

In [7]:
# argument.user = 1422
user_contexts = context_dataset[argument.user]
user_meta[user_meta['user_id'] == argument.user]

Unnamed: 0,user_id,keyword_list,following_list,id
5795,3295,[],"['@cardnews', '@sustainlife', '@langman', '@br...",#824cd5fcb359865da1a32b2f5d993869


In [8]:
item_meta.rename(columns={'user_id':'author_id'}, inplace=True)

In [9]:
history = pd.DataFrame(
    {'item_id': [int(c['outputItem']) for c in user_contexts],
     'rating': [int(c['rating']) for c in user_contexts],
     'Timestamp': [int(c['timestamp']) for c in user_contexts]}
)

In [10]:
# history.sort_values('timestamp', inplace=True)
history = history.merge(item_meta, on='item_id', how='left')
history[item_display_cols + ['rating', 'Timestamp'] ].tail(30)

Unnamed: 0,item_id,author_id,title,display_url,keyword_list,rating,Timestamp
0,192361,@seonju730,어떤 뉴스를 좋아하세요?,https://brunch.co.kr/@seonju730/27,"['취향', '테스트', '뉴스']",5,1545901200
1,192362,@syn,06. 당신의 여행은 어떠십니까?,https://brunch.co.kr/@syn/27,"['여행', '스타일', '테스트']",5,1545901200
2,192363,@itisyoona,<코코> 보고 영어공부 시작한 썰 : 픽사 가고 싶어요,https://brunch.co.kr/@itisyoona/76,"['영어공부', '영어', '직장인']",5,1545901200
3,156979,@bitcommin,"멜론, 종자부터 브랜딩이다",https://brunch.co.kr/@bitcommin/42,"['농산물', '브랜딩', '브랜드']",5,1545901200
4,147571,@taekangk,글로벌 기업의 회의 방법,https://brunch.co.kr/@taekangk/52,"['회의', '글로벌', '회사']",5,1547474400
5,180298,@roysday,소비자와 함께 만드는 참여형컨텐츠 10가지를 정리해보자,https://brunch.co.kr/@roysday/310,"['콘텐츠', '마케팅', '스타트업']",5,1547474400
6,178342,@dooook,넷플릭스가 영어공부에 가성비 갑인 세 가지 이유,https://brunch.co.kr/@dooook/142,"['영어공부', '미드', '넷플릭스']",5,1548266400
7,216674,@expediakr,특색 있는 전주 게스트하우스 BEST 5,https://brunch.co.kr/@expediakr/205,"['전주여행', '여행']",5,1548709200
8,216674,@expediakr,특색 있는 전주 게스트하우스 BEST 5,https://brunch.co.kr/@expediakr/205,"['전주여행', '여행']",5,1549918800
9,149400,@volo,동계 내일로 <전주 가볼 만한 곳 & 게스트하우스 추천,https://brunch.co.kr/@volo/413,"['전주', '여행', '전주여행']",5,1549918800


In [11]:
# loading model
model_params = {
    'hiddenUnits': 100,  # 50
    'k': argument.eval_k, 'dropout': 0.2, 'item_size': item_size
}
hrnn = HRNN(
    model_params['hiddenUnits'], model_params['item_size'], device=device, k=model_params['k'],
    dropout=model_params['dropout']
)
weight = os.path.join('result', argument.dataset, argument.weight)
hrnn.load(weight)

In [12]:
# prediction
with torch.no_grad():
    recommend_items = hrnn.get_recommendation(
        data_iterator(user_contexts, device=device), argument.eval_k
    )

In [13]:
_, indices = torch.topk(recommend_items, argument.eval_k, dim=-1)

In [14]:
test_dataset = pd.DataFrame({'item_id': [positive_item]}) #  + negative_item
test_dataset = test_dataset.merge(item_meta, on='item_id', how='left', validate='1:m')
print(test_dataset.shape)
test_dataset[item_display_cols].head()

(1, 10)


Unnamed: 0,item_id,author_id,title,display_url,keyword_list
0,92664,@shindong,일 잘하는 사람의 특징,https://brunch.co.kr/@shindong/38,"['사람', '스타트업', '직장인']"


In [15]:
recommend = pd.DataFrame({'item_id': indices.cpu().numpy()[0], 'user_id': argument.user})
recommend = recommend.merge(
    item_meta[item_display_cols], on='item_id', how='left', validate='1:m'
)
recommend = recommend.merge(
    user_meta[user_display_cols], on='user_id', how='left', validate='m:1'
)
print(recommend.shape)
recommend

(10, 7)


Unnamed: 0,item_id,user_id,author_id,title,display_url,keyword_list,following_list
0,220899,3295,,,,,"['@cardnews', '@sustainlife', '@langman', '@br..."
1,226013,3295,@greenshifter,버릴수록 행복한가요?,https://brunch.co.kr/@greenshifter/8,"['심플라이프', '사회', '미니멀리즘']","['@cardnews', '@sustainlife', '@langman', '@br..."
2,227046,3295,@phillee,"리암 니슨이 선보인 복수의 끝, 이번엔 다를까?",https://brunch.co.kr/@phillee/3,"['영화평점', '영화']","['@cardnews', '@sustainlife', '@langman', '@br..."
3,226007,3295,@hamsah0321,"검사하면 정상인데, 나는 왜 아플까?",https://brunch.co.kr/@hamsah0321/1,"['검사', '질병', '아유르베다']","['@cardnews', '@sustainlife', '@langman', '@br..."
4,225494,3295,@icallu,수영장에서,https://brunch.co.kr/@icallu/109,"['그림일기', '공감에세이', '드로잉']","['@cardnews', '@sustainlife', '@langman', '@br..."
5,230078,3295,@kidakhs,"앙코르와트, 인간의 작품인가 신들의 작품인가",https://brunch.co.kr/@kidakhs/15,"['해외여행', '캄보디아', '앙코르와트']","['@cardnews', '@sustainlife', '@langman', '@br..."
6,228066,3295,@soul-music-ws,#11. 버스 맨 앞자리에 앉는 일,https://brunch.co.kr/@soul-music-ws/37,"['버스', '회사', '퇴근']","['@cardnews', '@sustainlife', '@langman', '@br..."
7,228021,3295,@yolo-life,'난임'이란 단어가 나에게..,https://brunch.co.kr/@yolo-life/6,"['난임', '임신', '자궁내막증']","['@cardnews', '@sustainlife', '@langman', '@br..."
8,220971,3295,@haleytravel,몽골 여행의 꿈,https://brunch.co.kr/@haleytravel/4,"['몽골', '여행']","['@cardnews', '@sustainlife', '@langman', '@br..."
9,228002,3295,@wldud92221,고삼 요거트,https://brunch.co.kr/@wldud92221/38,"['드로잉', '그림일기', '공감에세이']","['@cardnews', '@sustainlife', '@langman', '@br..."
