In [1]:
from recbole.data import Interaction, create_dataset, data_preparation
from recbole.evaluator import Evaluator
from recbole.model.general_recommender import SLIMElastic, ItemKNN
from recbole.config import Config
import numpy as np
from recbole.trainer import Trainer

In [2]:
conf_dict = {
        'metrics': ['Recall', 'Precision', 'GAUC', 'MRR', 'NDCG', 'Hit', 'MAP', 'AveragePopularity',
                    'GiniIndex', 'ShannonEntropy'],
        'epochs':1
    }




config_1 = Config(model='SLIMElastic', dataset='ml-100k', config_dict=conf_dict)
# model_1, train_data, valid_data, test_data = run_recbole(model='LightGCN', dataset='ml-100k', config_dict=conf_dict)

dataset = create_dataset(config_1)
train_data, valid_data, test_data = data_preparation(config_1, dataset)
model_1 = SLIMElastic(config_1, train_data.dataset).to(config_1['device'])
trainer = Trainer(config_1, model_1)
trainer.fit(train_data, valid_data)

# 2. Конфигурация и обучение второй модели
config_2 = Config(model='ItemKNN', dataset='ml-100k', config_dict=conf_dict)
model_2 = ItemKNN(config_2, train_data.dataset).to(config_2['device'])
trainer = Trainer(config_2, model_2)
trainer.fit(train_data, valid_data)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)


(0.3984,
 OrderedDict([('recall@10', 0.2124),
              ('precision@10', 0.1595),
              ('gauc', 0.9056),
              ('mrr@10', 0.3984),
              ('ndcg@10', 0.2334),
              ('hit@10', 0.7423),
              ('map@10', 0.128),
              ('averagepopularity@10', 222.7719),
              ('giniindex@10', 0.9205),
              ('shannonentropy@10', 0.0126)]))

In [18]:
import torch

def get_predictions(model, data_loader, device):
    model.eval()  # Переключение модели в режим оценки
    all_preds = []

    with torch.no_grad():
        for batch_data in data_loader:
            # Извлечение данных из кортежа
            interaction_data = batch_data[0].to(device)  # Замените индекс, если данные находятся в другом месте

            # Получение предсказаний
            batch_preds = model.full_sort_predict(interaction_data).detach().cpu().numpy()
            all_preds.append(batch_preds)

    # Объединение всех предсказаний в один массив
    all_preds = np.concatenate(all_preds, axis=0)
    return all_preds

# Получение предсказаний для первой модели
test_preds_1 = get_predictions(model_1, test_data, config_1['device'])

# Получение предсказаний для второй модели
test_preds_2 = get_predictions(model_2, test_data, config_2['device'])

In [37]:
final_preds = (test_preds_1 + test_preds_2) / 2
train_data.dataset

[1;35mml-100k[0m
[1;34mThe number of users[0m: 944
[1;34mAverage actions of users[0m: 85.69247083775186
[1;34mThe number of items[0m: 1683
[1;34mAverage actions of items[0m: 48.79710144927536
[1;34mThe number of inters[0m: 80808
[1;34mThe sparsity of the dataset[0m: 94.91374361763195%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'label']


1587069

In [40]:
import pandas as pd
evaluator = Evaluator(config_1)

num_users = len(set(train_data.dataset["user_id"].numpy()))
num_items = len(set(train_data.dataset["item_id"].numpy()))
num_items = 1683

print(num_users)
print(num_items)
print(train_data.dataset)


user_ids = np.arange(num_users)
item_ids = np.arange(num_items)

# Преобразование предсказаний в DataFrame (или другой подходящий формат)
# Предположим, что final_preds имеет форму (num_users, num_items)

final_preds = final_preds.reshape((num_users, num_items))

pred_df = pd.DataFrame(final_preds, index=user_ids, columns=item_ids)

# Преобразование DataFrame в формат Interaction
interaction_data = []
for user_id in user_ids:
    for item_id in item_ids:
        interaction_data.append({
            'user': user_id,
            'item': item_id,
            'score': pred_df.at[user_id, item_id]
        })

# Создание Interaction объекта
interaction_df = pd.DataFrame(interaction_data)
test_interaction = Interaction(interaction_df)

# Оценка предсказаний
metrics = evaluator.evaluate(test_interaction, test_data)

# Печать результатов
for metric_name, metric_value in metrics.items():
    print(f'{metric_name}: {metric_value}')

943
1683
[1;35mml-100k[0m
[1;34mThe number of users[0m: 944
[1;34mAverage actions of users[0m: 85.69247083775186
[1;34mThe number of items[0m: 1683
[1;34mAverage actions of items[0m: 48.79710144927536
[1;34mThe number of inters[0m: 80808
[1;34mThe sparsity of the dataset[0m: 94.91374361763195%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'label']


ValueError: [<class 'numpy.float32'>] is not supported!

## V2

In [28]:
from recbole.utils import init_seed, init_logger, get_model
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.general_recommender import BPR, ItemKNN, LINE
from recbole.trainer import Trainer

config = Config(model='BPR', dataset='ml-100k')
init_seed(config['seed'], config['reproducibility'])
init_logger(config)
dataset = create_dataset(config)
train_data, valid_data, test_data = data_preparation(config, dataset)

# Модель BPR
config['epochs'] = 50
bpr_model = BPR(config, train_data.dataset).to(config['device'])
trainer = Trainer(config, bpr_model)
trainer.fit(train_data)
print(bpr_model)

# Модель NeuMF
config['epochs'] = 1
config['model'] = 'ItemKNN'
config['k'] = 50
neumann_model = ItemKNN(config, train_data.dataset).to(config['device'])
trainer = Trainer(config, neumann_model)
trainer.fit(train_data)


# Модель LightGCN
config['model'] = 'LINE'
config['embedding_size'] = 512
config['order'] = 1
config['second_order_loss_weight'] = 0.5
config['epochs'] = 50
lightgcn_model = LINE(config, train_data.dataset).to(config['device'])
trainer = Trainer(config, lightgcn_model)
trainer.fit(train_data)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)
10 Aug 19:20    INFO  [Training]: train_batch_size = [2048] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'a

BPR(
  (user_embedding): Embedding(944, 64)
  (item_embedding): Embedding(1683, 64)
  (loss): BPRLoss()
)
Trainable parameters: 168128


10 Aug 19:21    INFO  epoch 0 training [time: 0.06s, train loss: 0.0000]
10 Aug 19:21    INFO  Saving current: saved/ItemKNN-Aug-10-2024_19-21-06.pth
10 Aug 19:21    INFO  epoch 0 training [time: 0.39s, train loss: 55.4362]
10 Aug 19:21    INFO  Saving current: saved/LINE-Aug-10-2024_19-21-07.pth
10 Aug 19:21    INFO  epoch 1 training [time: 0.39s, train loss: 54.8793]
10 Aug 19:21    INFO  Saving current: saved/LINE-Aug-10-2024_19-21-07.pth
10 Aug 19:21    INFO  epoch 2 training [time: 0.39s, train loss: 53.5795]
10 Aug 19:21    INFO  Saving current: saved/LINE-Aug-10-2024_19-21-07.pth
10 Aug 19:21    INFO  epoch 3 training [time: 0.39s, train loss: 50.2295]
10 Aug 19:21    INFO  Saving current: saved/LINE-Aug-10-2024_19-21-07.pth
10 Aug 19:21    INFO  epoch 4 training [time: 0.40s, train loss: 46.2754]
10 Aug 19:21    INFO  Saving current: saved/LINE-Aug-10-2024_19-21-07.pth
10 Aug 19:21    INFO  epoch 5 training [time: 0.40s, train loss: 43.6595]
10 Aug 19:21    INFO  Saving current

(-inf, None)

In [33]:
import torch
def ensemble_predict(models, test_data):
    predictions = None
    for model in models:
        # pred = model.full_sort_predict(test_data).cpu().numpy()
        model.eval()
        with torch.no_grad():  # Отключение градиентов
            # Получение предсказаний
            data_loader = model.full_sort_predict(test_data)

            # Предсказания могут быть собраны из data_loader
            all_predictions = []
            for batch in data_loader:
                predictions = batch.cpu().numpy()  # Преобразование в numpy
                all_predictions.append(predictions)

        # Объединение всех предсказаний
        all_predictions = np.concatenate(all_predictions, axis=0)
    #     # Преобразование тензоров в numpy-массив
    #     predictions = pred.cpu().numpy()
    #     if predictions is None:
    #         predictions = pred
    #     else:
    #         predictions += pred
    # predictions /= len(models)
    #return predictions

# Список обученных моделей
models = [bpr_model, neumann_model, lightgcn_model]
ensemble_preds = ensemble_predict(models, test_data)

TypeError: 'FullSortEvalDataLoader' object is not subscriptable

OSError: dlopen(/opt/homebrew/anaconda3/envs/dis/lib/python3.10/site-packages/ray/_raylet.so, 0x000A): tried: '/opt/homebrew/anaconda3/envs/dis/lib/python3.10/site-packages/ray/_raylet.so' (mach-o file, but is an incompatible architecture (have 'x86_64', need 'arm64e' or 'arm64')), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/anaconda3/envs/dis/lib/python3.10/site-packages/ray/_raylet.so' (no such file), '/opt/homebrew/anaconda3/envs/dis/lib/python3.10/site-packages/ray/_raylet.so' (mach-o file, but is an incompatible architecture (have 'x86_64', need 'arm64e' or 'arm64'))