In [1]:
import argparse
import os

import pandas as pd
import torch
from torch.nn import Sigmoid

from config import CONFIG
from model.ncf_model import NeuralMF

In [2]:
pd.set_option("display.width", 200)

In [3]:
# class Argument:
#     user = 1422
#     dataset = '1M'
#     weight = 'nmf_v1.0.5e03_loss0.151_nDCG0.387.zip'
#     eval_k = 10

class Argument:
    user = 3295
    dataset = 'BRUNCH'
    weight = 'nmf_v1.0.1e02_loss0.175_nDCG0.503.zip'
    eval_k = 10

def get_user_test_data(test_data, user_id):
    with open(test_data, 'r') as file:
        for line in file:
            line = [int(l) for l in line.split('\t')]
            if line[0] == user_id:
                return line[1], line[2:]
    raise ValueError(f'User {user_id} is not exist')
    
def prediction(model, user, items):
    sigmoid = Sigmoid()
    with torch.no_grad():
        user = [user for _ in range(len(items))]
        scores = torch.zeros(len(items), device=device)
        for i, (it, ur) in enumerate(zip(items, user)):
            it = torch.tensor(it, device=device, dtype=torch.int64)
            ur = torch.tensor(ur, device=device, dtype=torch.int64)
            scores[i] = sigmoid(model(ur, it))
        _, indices = torch.topk(scores, k=argument.eval_k)

    return [items[i] for i in indices.cpu().numpy()]

In [4]:
argument = Argument()
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device = torch.device('cpu')
save_dir = os.path.join(CONFIG.DATA, argument.dataset)
print(save_dir)

./datasets/BRUNCH


In [5]:
# loading data
train_data = pd.read_csv(os.path.join(save_dir, 'train.tsv'), sep='\t')
item_meta = pd.read_csv(os.path.join(save_dir, 'item_meta.tsv'), sep='\t', low_memory=False)
user_meta = pd.read_csv(os.path.join(save_dir, 'user_meta.tsv'), sep='\t')
n_user = int(user_meta.user_id.max() + 1)  # 30606
n_item = int(item_meta.item_id.max() + 1)  # 235788

In [6]:
item_meta.rename(columns={'user_id':'author_id'}, inplace=True)

In [7]:
item_display_cols = ['item_id', 'Title', 'Genres']
user_display_cols = ['user_id', 'Gender', 'Age', 'Occupation', 'Zip-code']

if argument.dataset == 'BRUNCH':
    item_display_cols = ['item_id', 'author_id', 'title', 'display_url', 'keyword_list']
    user_display_cols = ['user_id','following_list']

In [8]:
# user_meta[user_meta['id'] == '#baf111ce572a6c22bf9746afd3f848ac']
# user_meta[user_meta.UserID == 1423]
# argument.user = 1422

In [15]:
# user interaction items
interactions = train_data[train_data['user_id'] == argument.user]
# interactions.sort_values('Timestamp', inplace=True)
interactions = interactions.merge(item_meta, on='item_id', how='left')
interactions[item_display_cols + ['Rating', 'Timestamp']].tail(30)

Unnamed: 0,item_id,author_id,title,display_url,keyword_list,Rating,Timestamp
84,145582,@jacong,공간의 가치를 높이는 기획자#1,https://brunch.co.kr/@jacong/63,"['공간기획', '기획', '기획자']",5,1545318000
85,180298,@roysday,소비자와 함께 만드는 참여형컨텐츠 10가지를 정리해보자,https://brunch.co.kr/@roysday/310,"['콘텐츠', '마케팅', '스타트업']",5,1548266400
86,101724,@fashionlab,번아웃 증후군 테스트하기,https://brunch.co.kr/@fashionlab/39,"['직무스트레스', '번아웃', '스트레스']",5,1545901200
87,192361,@seonju730,어떤 뉴스를 좋아하세요?,https://brunch.co.kr/@seonju730/27,"['취향', '테스트', '뉴스']",5,1545901200
88,192362,@syn,06. 당신의 여행은 어떠십니까?,https://brunch.co.kr/@syn/27,"['여행', '스타일', '테스트']",5,1545901200
89,192363,@itisyoona,<코코> 보고 영어공부 시작한 썰 : 픽사 가고 싶어요,https://brunch.co.kr/@itisyoona/76,"['영어공부', '영어', '직장인']",5,1545901200
90,178342,@dooook,넷플릭스가 영어공부에 가성비 갑인 세 가지 이유,https://brunch.co.kr/@dooook/142,"['영어공부', '미드', '넷플릭스']",5,1548709200
91,216674,@expediakr,특색 있는 전주 게스트하우스 BEST 5,https://brunch.co.kr/@expediakr/205,"['전주여행', '여행']",5,1549918800
92,216674,@expediakr,특색 있는 전주 게스트하우스 BEST 5,https://brunch.co.kr/@expediakr/205,"['전주여행', '여행']",5,1549918800
93,149400,@volo,동계 내일로 <전주 가볼 만한 곳 & 게스트하우스 추천,https://brunch.co.kr/@volo/413,"['전주', '여행', '전주여행']",5,1549918800


In [20]:
interactions[item_display_cols + ['Rating', 'Timestamp']].iloc[100:150, :]

Unnamed: 0,item_id,author_id,title,display_url,keyword_list,Rating,Timestamp
100,225639,@mangoboard,정돈된 것이 아름답다.,https://brunch.co.kr/@mangoboard/4,"['디자인', '콘텐츠', '광고']",5,1550574000
101,200852,@mangoboard,"마케터들이여, 디자인 칼럼니스트가 되어라.",https://brunch.co.kr/@mangoboard/2,"['디자인', '마케팅', '마케터']",5,1550574000
102,212976,@roysday,책에선 도무지 알려주지 않는 리얼 민낯대화에 대한 썰,https://brunch.co.kr/@roysday/314,"['대화', '커뮤니케이션', '대화법']",5,1550574000
103,208622,@roysday,디자이너 눈에 비친 조금 다른 세상 20가지,https://brunch.co.kr/@roysday/313,"['디자인', '디자이너', '카페']",5,1550574000
104,3068,@intlovesong,아끼면 똥 되는 것 4가지,https://brunch.co.kr/@intlovesong/28,"['칭찬', '인간관계', '소셜미디어']",5,1550574000
105,201938,@sparrowmill,재미있는 광고회사 면접.SSUL,https://brunch.co.kr/@sparrowmill/24,"['면접', '면접관', '재미']",5,1550656800
106,201890,@sparrowmill,"Factbook 만들기 ""어디서 찾지?""",https://brunch.co.kr/@sparrowmill/23,"['자료검색', '통계자료', '마케팅']",5,1550656800
107,201938,@sparrowmill,재미있는 광고회사 면접.SSUL,https://brunch.co.kr/@sparrowmill/24,"['면접', '면접관', '재미']",5,1550656800
108,174980,@sparrowmill,제안서 기초작업 Factbook만들기,https://brunch.co.kr/@sparrowmill/22,"['광고기획서', '마케팅', '제안']",5,1550656800
109,119221,@sparrowmill,광고회사AE 자기소개서 작성법,https://brunch.co.kr/@sparrowmill/13,"['직무경험', '자기소개서', '자소서']",5,1550656800


In [10]:
# loading model
nmf = NeuralMF(n_user, n_item, n_factor=32, layers=[256, 128, 64, 32], component=['mlp', 'gmf'], device=device)
nmf.load(os.path.join('result', argument.dataset, argument.weight))
nmf.eval()
print('model')

model


In [11]:
positive_item, negative_item = get_user_test_data(os.path.join(save_dir, 'negative_test.dat'), argument.user)

# predictions
recommend_items = prediction(nmf, argument.user, list(range(n_item)))
# recommend_items = prediction(nmf, argument.user, [positive_item] + negative_item)
print(positive_item)

92664


In [12]:
recommend = pd.DataFrame({'item_id': recommend_items, 'user_id': argument.user})
recommend = recommend.merge(
    item_meta[item_display_cols], on='item_id', how='left'
)
recommend = recommend.merge(
    user_meta[user_display_cols], on='user_id', how='left', validate='m:1'
)
recommend

Unnamed: 0,item_id,user_id,author_id,title,display_url,keyword_list,following_list
0,72794,3295,@thinkaboutlove,2018 그래픽 디자인 트렌드 정리,https://brunch.co.kr/@thinkaboutlove/211,"['디자인', '디자이너', 'UX']","['@cardnews', '@sustainlife', '@langman', '@br..."
1,19378,3295,@thinkaboutlove,국내 디자인 스튜디오 리스트,https://brunch.co.kr/@thinkaboutlove/206,"['디자인', '디자이너', 'UI']","['@cardnews', '@sustainlife', '@langman', '@br..."
2,19377,3295,@outlines,"지금, 2010년대의 리브랜딩 트렌드",https://brunch.co.kr/@outlines/27,"['브랜딩', '브랜드', 'IT']","['@cardnews', '@sustainlife', '@langman', '@br..."
3,71149,3295,@chulhochoiucj0,모바일 UI 디자인 기본 요소 - 버튼,https://brunch.co.kr/@chulhochoiucj0/23,"['UI', '모바일', 'UX']","['@cardnews', '@sustainlife', '@langman', '@br..."
4,121564,3295,@designforhuman,"2019년, 어떤 디자이너가 될 것인가",https://brunch.co.kr/@designforhuman/32,"['디자이너', '스프린트', '트렌드']","['@cardnews', '@sustainlife', '@langman', '@br..."
5,20611,3295,@sabumbyun,실리콘벨리 UI/UX 디자이너들과 이런저런 이야기.,https://brunch.co.kr/@sabumbyun/25,"['디자이너', 'UX', '실리콘밸리']","['@cardnews', '@sustainlife', '@langman', '@br..."
6,374,3295,@roysday,디자이너가 혹시라도 멋져보인다거나 그러면.,https://brunch.co.kr/@roysday/259,"['디자인', '디자이너', '업무']","['@cardnews', '@sustainlife', '@langman', '@br..."
7,24496,3295,@kakao-it,다음웹툰의 UX 개편 이야기,https://brunch.co.kr/@kakao-it/279,"['UX', '다음웹툰', '카카오']","['@cardnews', '@sustainlife', '@langman', '@br..."
8,112259,3295,@thinkaboutlove,"인스타그램,에어비앤비,트위터 앱이 비슷하게 생긴 이유",https://brunch.co.kr/@thinkaboutlove/216,"['디자인', 'UX', 'UI']","['@cardnews', '@sustainlife', '@langman', '@br..."
9,22595,3295,@toriteller,주식으로 돈 버는 두 가지 방법,https://brunch.co.kr/@toriteller/345,"['주식투자', '재테크', '사회초년생']","['@cardnews', '@sustainlife', '@langman', '@br..."
