In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity="all"

import os
import json
import argparse
import pandas as pd
import numpy as np
import time, datetime
from tqdm import tqdm
from logging import getLogger
import torch

from recbole.model.general_recommender.ease import EASE
from recbole.model.context_aware_recommender.ffm import FFM

from recbole.config import Config
from recbole.data import create_dataset, data_preparation, Interaction
from recbole.utils import init_logger, get_trainer, get_model, init_seed, set_color


SEED=13

In [24]:
model_path='/opt/ml/input/code/Recbole/baseline/saved/NeuMF-Jan-02-2023_15-01-29.pth'
# model_path='/opt/ml/input/code/Recbole/saved/EASE-Dec-21-2022_15-02-10.pth'

In [25]:
# config, model, dataset 불러오기
checkpoint = torch.load(model_path)
config = checkpoint['config']
config['dataset'] = 'train_data'

# device 설정
device = config.final_config_dict['device']

config['eval_args'] = {'split': {'RS': [8, 1, 1]},
                         'group_by': 'user',
                         'order': 'RO',
                         'mode': 'full'}

In [10]:
dataset = create_dataset(config)
train_data, valid_data, test_data = data_preparation(config, dataset)

In [11]:
# user, item id -> token 변환 array
user_id = config['USER_ID_FIELD']
item_id = config['ITEM_ID_FIELD']
user_id2token = dataset.field2id_token[user_id]
item_id2token = dataset.field2id_token[item_id]

In [26]:
model = get_model(config['model'])(config, test_data.dataset).to(config['device'])
model.load_state_dict(checkpoint['state_dict'])
model.load_other_parameter(checkpoint.get('other_parameter'))

# user id list
all_user_list = torch.arange(1, len(user_id2token)).view(-1,128) # 245, 128

# user, item 길이
user_len = len(user_id2token) # 31361 (PAD 포함)
item_len = len(item_id2token) # 6808 (PAD 포함)

<class 'list'>
[1024, 512, 256]


In [13]:
valid_data.dataset

[1;35mtrain_data[0m
[1;34mThe number of users[0m: 31361
[1;34mAverage actions of users[0m: 15.981855867346939
[1;34mThe number of items[0m: 6808
[1;34mAverage actions of items[0m: 73.66122868900646
[1;34mThe number of inters[0m: 501191
[1;34mThe sparsity of the dataset[0m: 99.76525637106212%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'timestamp', 'label']

In [14]:
matrix = valid_data.dataset.inter_matrix(form='csr')
rows, cols = matrix.nonzero()
valid = pd.DataFrame({'user':rows,'item':cols})

valid.user = valid.user.map(lambda x: int(user_id2token[x]))
valid.item = valid.item.map(lambda x: int(item_id2token[x]))

In [15]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity="last"

In [33]:
# rank K 설정
K = 30

# model 평가모드 전환
model.eval()

# progress bar 설정
tbar = tqdm(all_user_list, desc=set_color(f"Inference", 'pink')) # 245, 128

pred_list2 = None
user_list2 = []
from recbole.utils.case_study import full_sort_topk
for data in tbar:
    batch_pred_list2 = full_sort_topk(data, model, valid_data, K, device=device)[1]
    batch_pred_list2 = batch_pred_list2.clone().detach().cpu().numpy()
    if pred_list2 is None:
        pred_list2 = batch_pred_list2
        user_list2 = data.numpy()
    else:
        pred_list2 = np.append(pred_list2, batch_pred_list2, axis=0)
        user_list2 = np.append(
            user_list2, data.numpy(), axis=0
        )
tbar.close()

# user별 item 추천 결과 하나로 합쳐주기
result2 = []
for user, pred in zip(user_list2, pred_list2):
    for item in pred:
        result2.append((int(user_id2token[user]), int(item_id2token[item])))

  uid_series = torch.tensor(uid_series)
[1;35mInference[0m: 100%|█████████████████████████████████████████████████| 245/245 [00:45<00:00,  5.40it/s][0m


## NeuMF 모델

In [32]:
## valid는 train:valid=9:1 -> 가져온 모델은 train:valid=1:0
## NeuMF - valid 0.1307 -> 여기선 0.09957
sub = pd.DataFrame(result2, columns=["user", "item"])

intersect = valid.merge(sub,on=['user','item'])

print(f"Recall@{K} = {intersect.shape[0] / valid.shape[0]}")

Recall@300 = 0.6900423191956759


In [30]:
## valid는 train:valid=9:1 -> 가져온 모델은 train:valid=1:0
## NeuMF - valid 0.1307 -> 여기선 0.09957
sub = pd.DataFrame(result2, columns=["user", "item"])

intersect = valid.merge(sub,on=['user','item'])

print(f"Recall@{K} = {intersect.shape[0] / valid.shape[0]}")

Recall@100 = 0.43237208968237656


In [34]:
## valid는 train:valid=9:1 -> 가져온 모델은 train:valid=1:0
## NeuMF - valid 0.1307 -> 여기선 0.09957
sub = pd.DataFrame(result2, columns=["user", "item"])

intersect = valid.merge(sub,on=['user','item'])

print(f"Recall@{K} = {intersect.shape[0] / valid.shape[0]}")

Recall@30 = 0.21601345594793203


In [28]:
## valid는 train:valid=9:1 -> 가져온 모델은 train:valid=1:0
## NeuMF - valid 0.1307 -> 여기선 0.09957
sub = pd.DataFrame(result2, columns=["user", "item"])

intersect = valid.merge(sub,on=['user','item'])

print(f"Recall@{K} = {intersect.shape[0] / valid.shape[0]}")

Recall@10 = 0.09957082230127835


## EASE 모델

In [23]:
## K=500
## valid는 train:valid=9:1 -> 가져온 모델은 train:valid=1:0
## EASE - valid 0.1966 -> 여기선 0.1674 (dataset을 정의할때마다 달라짐)
sub = pd.DataFrame(result2, columns=["user", "item"])

intersect = valid.merge(sub,on=['user','item'])

print(f"Recall@{K} = {intersect.shape[0] / valid.shape[0]}")

Recall@500 = 0.978974083732549


In [21]:
## K=300
## valid는 train:valid=9:1 -> 가져온 모델은 train:valid=1:0
## EASE - valid 0.1966 -> 여기선 0.1674 (dataset을 정의할때마다 달라짐)
sub = pd.DataFrame(result2, columns=["user", "item"])

intersect = valid.merge(sub,on=['user','item'])

print(f"Recall@{K} = {intersect.shape[0] / valid.shape[0]}")

Recall@300 = 0.9369282369396098


In [19]:
## K=100
## valid는 train:valid=9:1 -> 가져온 모델은 train:valid=1:0
## EASE - valid 0.1966 -> 여기선 0.1674 (dataset을 정의할때마다 달라짐)
sub = pd.DataFrame(result2, columns=["user", "item"])

intersect = valid.merge(sub,on=['user','item'])

print(f"Recall@{K} = {intersect.shape[0] / valid.shape[0]}")

Recall@100 = 0.6760436639923701


In [17]:
## valid는 train:valid=9:1 -> 가져온 모델은 train:valid=1:0
## EASE - valid 0.1966 -> 여기선 0.2296
sub = pd.DataFrame(result2, columns=["user", "item"])

intersect = valid.merge(sub,on=['user','item'])

print(f"Recall@{K} = {intersect.shape[0] / valid.shape[0]}")

Recall@10 = 0.16745911239427683


In [None]:
## valid는 train:valid=9:1 -> 가져온 모델은 train:valid=1:0
## SLIMElastic - valid 0.1768 -> 여기선 0.
sub = pd.DataFrame(result2, columns=["user", "item"])

intersect = valid.merge(sub,on=['user','item'])

print(f"Recall@{K} = {intersect.shape[0] / valid.shape[0]}")

In [78]:
## valid는 train:valid=9:1 -> 가져온 모델은 train:valid=1:0
## EASE - valid 0.1966 -> 여기선 0.2296
sub = pd.DataFrame(result2, columns=["user", "item"])

intersect = valid.merge(sub,on=['user','item'])

print(f"Recall@{K} = {intersect.shape[0] / valid.shape[0]}")

Recall@10 = 0.22968289534329228
