In [1]:
import logging
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.sequential_recommender import BERT4Rec
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger

  from .autonotebook import tqdm as notebook_tqdm


In [37]:
params = {
    "data_path": "./dataset",
    "USER_ID_FIELD": "user_id",
    "ITEM_ID_FIELD": "item_id",
    "TIME_FIELD": "timestamp",
    "train_neg_sample_args": None,
    "load_col": {"inter": ["user_id", "item_id", "timestamp"]},
    "neg_sampling": None,
    "epochs": 50,
    "learning_rate": 0.001,
    "eval_args": {
        "split": {"RS": [9, 1, 0]},
        "group_by": "user",
        "order": "TO",
        "mode": "full"},
    "valid_metrics": "MRR@20"
    }

config = Config(model="BERT4Rec", dataset="data", config_dict=params)

init_seed(config["seed"], config["reproducibility"])
init_logger(config)

logger = getLogger()
c_handler = logging.StreamHandler()
c_handler.setLevel(logging.INFO)

logger.addHandler(c_handler)
logger.info(config)

13 Jan 20:10    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = ./dataset\data
checkpoint_dir = saved
show_progress = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 100
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [9, 1, 0]}, 'order': 'TO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}
repeatable = True
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [10]
valid_metric = MRR@10
valid_metric_bigger = True
eval_batch_size = 4096
metric_decimal_place = 4

Dataset Hyper Parameters:
fiel

In [38]:

dataset = create_dataset(config)
logger.info(dataset)
train_data, valid_data, test_data = data_preparation(config, dataset)

model = BERT4Rec(config, train_data.dataset).to(config["device"])
logger.info(model)

trainer = Trainer(config, model)

best_valid_score, best_valid_result = trainer.fit(train_data, valid_data)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)
13 Jan 20:10    INFO  data
The number of users: 516
Average actions of users: 102.24466019417476
The number of items: 3097
Average 

In [4]:
from recbole.utils.case_study import full_sort_topk
external_user_ids = dataset.id2token(dataset.uid_field, list(range(dataset.user_num)))[1:]

In [5]:
topk_items = []
for internal_user_id in list(range(dataset.user_num))[1:]:
    _, topk_iid_list = full_sort_topk([internal_user_id], model, test_data, k=12, device=config['device'])
    last_topk_iid_list = topk_iid_list[-1]
    external_item_list = dataset.id2token(dataset.iid_field, last_topk_iid_list.cpu()).tolist()
    topk_items.append(external_item_list)
print(len(topk_items))



515


In [57]:

import numpy as np
index = np.isin(dataset[dataset.uid_field].numpy(), [3])
input_interaction = dataset[index]
input_interaction



The batch_size of interaction: 88
    user_id, torch.Size([88]), cpu, torch.int64
    item_id, torch.Size([88]), cpu, torch.int64
    timestamp, torch.Size([88]), cpu, torch.float32
    item_length, torch.Size([88]), cpu, torch.int64
    item_id_list, torch.Size([88, 50]), cpu, torch.int64
    timestamp_list, torch.Size([88, 50]), cpu, torch.float32


In [154]:
iid_list = dataset.token2id(dataset.iid_field, ["22457"])
iid_list

array([256])

In [160]:
import torch
from recbole.data.interaction import Interaction
import numpy as np

index = np.isin(dataset[dataset.uid_field].numpy(), [104])
input_interaction = dataset[index]
input_interaction

def _pad_item_sequence_to_max_length(item_sequence):
    # Если последовательность меньше максимальной длины предметов модели
    new_tensor = torch.zeros((1, model.max_seq_length),dtype=torch.int)
    new_tensor[:, :6] = torch.tensor([item_sequence])
    return new_tensor

test = {
            'item_id_list': _pad_item_sequence_to_max_length(iid_list),
            'item_length': torch.tensor([len(iid_list)])
        }

new_inter = Interaction(test)
new_inter = new_inter.to(config['device'])

new_scores = model.full_sort_predict(new_inter)
new_scores = new_scores.view(-1, test_data.dataset.item_num)
new_scores[:, 0] = -np.inf  # set scores of [pad] to -inf

torch.topk(new_scores, 10)

torch.return_types.topk(
values=tensor([[5.5296, 4.9989, 4.8446, 4.7937, 4.7579, 4.7471, 4.6589, 4.4890, 4.4311,
         4.3549]], device='cuda:0', grad_fn=<TopkBackward0>),
indices=tensor([[ 330,  966,  705,  234, 2930,  578,  865, 1996,  511, 1034]],
       device='cuda:0'))

In [167]:
dataset.item_num

3097

In [198]:
def _pad_item_sequence_to_max_length(item_sequence: list):
    # Если последовательность меньше максимальной длины предметов модели
    new_tensor = torch.zeros((1, model.max_seq_length),dtype=torch.int)
    new_tensor[:, :len(item_sequence)] = torch.tensor([item_sequence])
    return new_tensor

def _predict(item_sequence: list, model: BERT4Rec):
    model.eval()
    with torch.no_grad():

        inter = Interaction({
            "item_id_list": _pad_item_sequence_to_max_length(item_sequence),
            "item_length": torch.tensor([len(item_sequence)])
        }).to(model.device)

        scores = model.full_sort_predict(inter)
        scores = scores.view(-1, dataset.item_num)
        scores[:, 0] = -np.inf
    
    item_score, item_inner_iid = torch.topk(scores, 10)

    return item_score, item_inner_iid

iid_list = dataset.token2id(dataset.iid_field, ["72133", "22457", "85049E", "17090D", "84763"])
_, indices = _predict(iid_list, model)

indices = indices.view(10).tolist()
raw_iid = [dataset.id2token(dataset.iid_field, [i for i in indices])]
raw_iid


[array(['84800S', '21669', '22040', '22710', '84763', '22148', '22149',
        '22697', '21500', '22087'], dtype='<U7')]

In [224]:
from recbole.model.abstract_recommender import SequentialRecommender
from recbole.data.interaction import Interaction

from typing import List, Tuple

from dataclasses import dataclass

@dataclass
class Prediction:
    iid: str
    score: float


class RecommendationHandler:
    def __init__(self, dataset, model: SequentialRecommender) -> None:
        self.dataset = dataset
        self.model = model

    def build_recos(self, item_sequence: list, k: int) -> List[Prediction]:
        """
            Generates recommendations based on a sequence of external item indices.

            Args:
                item_sequence: A list of external item indices.

            Returns:
                A list of type Prediction.
        """
        
        inner_ids = self.dataset.token2id(dataset.iid_field, item_sequence)

        scores, inner_iid = self._predict(inner_ids, k)

        scores = scores.view(k).tolist()
        indices = inner_iid.view(k).tolist()
        raw_iid = [self.dataset.id2token(self.dataset.iid_field, indices)]

        return [Prediction(iid, score) for iid, score in list(zip(raw_iid[0], scores))]

    def _predict(self, item_sequence: list, k: int):
        """
            Calculates the top k items for a given sequence of internal indices.

            Args:
                item_sequence: A list of internal indices of items.
                k: amount of items.

            Returns:
                A tuple containing two lists:
                    - The first list contains the internal indices of the top k items.
                    - The second list contains the scores of these items.
        """

        self.model.eval()
        with torch.no_grad():

            inter = self._prepare_interaction_data(item_sequence)

            scores = self.model.full_sort_predict(inter)
            scores = scores.view(-1, self.dataset.item_num)
            scores[:, 0] = -np.inf

        item_score, item_inner_iid = torch.topk(scores, k)
        return item_score, item_inner_iid
    
    def _prepare_interaction_data(self, item_sequence: List[int]) -> Interaction:        
        return Interaction({
            "item_id_list": self._pad_item_sequence_to_max_length(item_sequence),
            "item_length": torch.tensor([len(item_sequence)])
        }).to(self.model.device)        

    def _pad_item_sequence_to_max_length(self, item_sequence: list):
        # Если последовательность меньше максимальной длины предметов модели
        new_tensor = torch.zeros((1, self.model.max_seq_length), dtype=torch.int64)
        new_tensor[:, :len(item_sequence)] = torch.tensor([item_sequence])
        return new_tensor


r_handler = RecommendationHandler(dataset, model)
r = r_handler.build_recos(["72133", "22457", "85049E", "17090D", "84763"], 30)
print(r)

[Prediction(iid='84800S', score=5.516401767730713), Prediction(iid='21669', score=5.496496200561523), Prediction(iid='22040', score=5.183253288269043), Prediction(iid='22710', score=5.145431995391846), Prediction(iid='84763', score=5.137936592102051), Prediction(iid='22148', score=5.118233680725098), Prediction(iid='22149', score=4.9793853759765625), Prediction(iid='22697', score=4.9715166091918945), Prediction(iid='21500', score=4.821472644805908), Prediction(iid='22087', score=4.715353488922119), Prediction(iid='21715', score=4.706779479980469), Prediction(iid='22698', score=4.674909591674805), Prediction(iid='22423', score=4.617501258850098), Prediction(iid='22859', score=4.597131729125977), Prediction(iid='21671', score=4.5643744468688965), Prediction(iid='84800L', score=4.558695316314697), Prediction(iid='84625C', score=4.548460483551025), Prediction(iid='22805', score=4.533363342285156), Prediction(iid='23034', score=4.525113105773926), Prediction(iid='21926', score=4.48974752426