In [118]:
from recbole.config import Config
import torch

In [119]:
config_file = ["configs/general/bpr_config.yaml"]
config = Config(model='BPR', dataset='dataset', config_file_list=config_file)

In [120]:
from recbole.data import create_dataset, data_preparation
from recbole.model.general_recommender import BPR
from recbole.trainer import Trainer
import warnings
warnings.filterwarnings('ignore')

dataset = create_dataset(config)

In [None]:
train_data, valid_data, test_data = data_preparation(config, dataset) # DataLoader

In [None]:
from recbole.utils import get_model, get_trainer
model = get_model(config['model'])(config, train_data.dataset).to(config['device'])

In [None]:
trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)

In [None]:
best_valid_score, best_valid_result = trainer.fit(train_data, valid_data, saved=True, show_progress=config["show_progress"])

[1;35mTrain     0[0m: 100%|█████████████████████| 2470/2470 [00:43<00:00, 56.91it/s, [1;33mGPU RAM: 0.09 G/31.74 G[0m][0m
[1;35mEvaluate   [0m: 100%|██████████████████| 31360/31360 [01:12<00:00, 430.99it/s, [1;33mGPU RAM: 0.09 G/31.74 G[0m][0m


In [None]:
test_result = trainer.evaluate(test_data, load_best_model=True, show_progress=config['show_progress'], )

[1;35mEvaluate   [0m: 100%|██████████████████| 31360/31360 [01:13<00:00, 424.06it/s, [1;33mGPU RAM: 0.09 G/31.74 G[0m][0m


In [None]:
result = {
        "best_valid_score": best_valid_score,
        "valid_score_bigger": config["valid_metric_bigger"],
        "best_valid_result": best_valid_result,
        "test_result": test_result,
    }

In [None]:
print("Valud Result:\n")
print(f"{config['valid_metric']}: {result['best_valid_result']['recall@10']}, NDCG@10: {result['best_valid_result']['ndcg@10']}")

Recall@10: 0.1116, NDCG@10: 0.0648
Recall@10: 0.1127, NDCG@10: 0.0665


In [None]:
import pandas as pd
import os
import glob
sample_submission = pd.read_csv(os.path.join(config['eval_path'], 'sample_submission.csv'))

In [None]:
test_data = sample_submission.copy()

In [None]:
checkpoint_dir = config['checkpoint_dir']
model_name = config['model']
checkpoint_pattern = os.path.join(checkpoint_dir, f"{model_name}-*.pth")
checkpoint_files = glob.glob(checkpoint_pattern)

if not checkpoint_files:
    print(f"Checkpoint files not found in {checkpoint_dir} with pattern {checkpoint_pattern}")

# 최신 체크포인트 파일 선택
checkpoint_path = max(checkpoint_files, key=os.path.getmtime)
print(f"Loading model from {checkpoint_path}")

Loading model from ./model/saved/BPR-Nov-19-2024_15-24-49.pth


In [None]:
from recbole.quick_start import load_data_and_model
general_hyper_params, model, dataset, train_data_loader, valid_data_loader, test_data_loader = load_data_and_model(model_file=checkpoint_path)
model.to(config['device'])

In [None]:
test_data.columns = ['user_id', 'item_id']
test_users = test_data['user_id'].unique().tolist()
test_users = [str(user) for user in test_users]
uid_series = dataset.token2id(dataset.uid_field, test_users)

item_meta = dataset.get_item_feature().to(config['device'])

batch_size = 256

In [None]:
from tqdm import tqdm
from recbole.utils.case_study import full_sort_topk

recommended_df = pd.DataFrame(columns=['user', 'item'])
for i in tqdm(range(0, len(uid_series), batch_size)):
    batch_indices = uid_series[i:i+batch_size]
    batch_users = test_users[i:i+batch_size]
    
    topk_iid_list_batch = full_sort_topk(batch_indices, model, valid_data_loader, k=10, device=config['device'])
    last_topk_iid_list = topk_iid_list_batch.indices
    recommended_item_list = dataset.id2token(dataset.iid_field, last_topk_iid_list.cpu()).tolist()
    temp_df = pd.DataFrame({'user': batch_users, 'item': recommended_item_list})
    recommended_df = pd.concat([recommended_df, temp_df], ignore_index=True)

100%|██████████| 123/123 [00:00<00:00, 136.61it/s]


In [None]:
recommended_df = recommended_df.explode('item').reset_index(drop=True)

Unnamed: 0,user,item
0,11,4963
1,11,5418
2,11,8961
3,11,8360
4,11,4886
...,...,...
313595,138493,4963
313596,138493,47
313597,138493,589
313598,138493,8961


In [None]:
recommended_df.to_csv(os.path.join('./data/output/', f"output_{checkpoint_path.split('/')[-1][:-4]}.csv"))

# Sequential Check

# MLflow Check

In [1]:
import mlflow
import mlflow.pytorch
import pandas as pd
import os
import glob
from tqdm import tqdm
from recbole.utils.case_study import full_sort_topk
from recbole.quick_start import load_data_and_model
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.general_recommender import BPR
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger
from recbole.utils import get_model, get_trainer
import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm
2024-11-20 13:25:10,087	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-11-20 13:25:10,174	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
model_type = 'general'
model = "ADMMSLIM"
config_file = [f"configs/{model_type.lower()}/{model.lower()}_config.yaml"]
config = Config(model=model, dataset='dataset', config_file_list=config_file)

In [3]:
config.final_config_dict

{'gpu_id': '0',
 'worker': 0,
 'use_gpu': True,
 'seed': 42,
 'state': 'INFO',
 'reproducibility': True,
 'data_path': './data/processed/dataset',
 'checkpoint_dir': './model/saved/',
 'show_progress': True,
 'save_dataset': False,
 'dataset_save_path': None,
 'save_dataloaders': False,
 'dataloaders_save_path': None,
 'log_wandb': False,
 'wandb_project': None,
 'shuffle': True,
 'epochs': 2,
 'train_batch_size': 2048,
 'learner': 'adam',
 'learning_rate': 0.001,
 'train_neg_sample_args': {'distribution': 'uniform',
  'sample_num': 1,
  'alpha': 1.0,
  'dynamic': False,
  'candidate_num': 0},
 'eval_step': 1,
 'stopping_step': 10,
 'clip_grad_norm': None,
 'weight_decay': 0.0,
 'loss_decimal_place': 4,
 'require_pow': False,
 'enable_amp': False,
 'enable_scaler': False,
 'transform': None,
 'eval_args': {'split': {'RS': [0.99, 0.01, 0]},
  'order': 'RO',
  'group_by': 'user',
  'mode': {'valid': 'full', 'test': 'full'}},
 'repeatable': False,
 'metrics': ['Recall', 'NDCG'],
 'topk': 