In [1]:
import torch
import random
import datetime
import pandas as pd
import numpy as np
import os

from torch.utils.data import Dataset
from src.datasets import RL4RS, ContentWise, DummyData
from src.utils import evaluate_model, get_dummy_data, get_train_val_test_tmatrix_tnumitems
from src.embeddings import RecsysEmbedding

experiment_name = 'MatrixFactorization'
device = 'cuda:0'
seed = 7331
pkl_path = '../data/'


random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f06740bbfb0>

# Модель

In [2]:
class MF(torch.nn.Module):
    def __init__(self, embedding):
        super().__init__()
        self.embedding = embedding
    
    def forward(self, batch):
        item_embs, user_embs = self.embedding(batch)

        scores = item_embs * user_embs[:, :, None, :].repeat(1, 1, item_embs.size(-2), 1)
        scores = scores.sum(-1)
        return scores
    

d = DummyData()
dummy_loader, dummy_matrix = get_dummy_data(d)

for batch in dummy_loader:
    break

model = MF(
    RecsysEmbedding(d.n_items, dummy_matrix, embeddings='svd', embedding_dim=2),
).to('cpu')

model(batch)


3it [00:00, 2653.50it/s]

biulding affinity matrix...





tensor([[[0.0000, 0.0000, 0.0000],
         [0.5769, 0.1154, 0.0000]],

        [[0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000]]])

# ContentWise

In [3]:
content_wise_results = []
c = ContentWise.load(os.path.join(pkl_path, 'cw.pkl'))
c_train_loader, c_val_loader, c_test_loader, c_train_user_item_matrix, train_num_items = get_train_val_test_tmatrix_tnumitems(c, batch_size=150)
len(c_train_loader), len(c)

(163, 30461)

In [4]:
model = MF(
    RecsysEmbedding(c.n_items, c_train_user_item_matrix, embeddings='svd'),
).to('cpu')

test_scores = evaluate_model(model, c_test_loader, device='cpu', silent=True, debug=False)
test_scores['embeddings'] = 'svd'
test_scores

{'f1': 0.19329693913459778,
 'roc-auc': 0.6267832517623901,
 'accuracy': 0.3119562268257141,
 'embeddings': 'svd'}

In [5]:
pd.DataFrame({key:[test_scores[key]] for key in test_scores}).to_csv(f'results/cw_MatrixFactorization.csv')

# RL4RS

In [6]:
rl4rs_results = []
r = RL4RS.load(os.path.join(pkl_path, 'rl4rs.pkl'))
r_train_loader, r_val_loader, r_test_loader, r_train_user_item_matrix, train_num_items = get_train_val_test_tmatrix_tnumitems(r, batch_size=20000)
len(r_train_loader), len(r)

(21, 519435)

In [None]:
model = MF(
    RecsysEmbedding(r.n_items, r_train_user_item_matrix, embeddings='svd'),
).to('cpu')

test_scores = evaluate_model(model, r_test_loader, device='cpu', silent=True, debug=False)
test_scores['embeddings'] = 'svd'
test_scores

In [None]:
pd.DataFrame({key:[test_scores[key]] for key in test_scores}).to_csv(f'results/rl4rs_MatrixFactorization.csv')