In [1]:
import os
import torch
import random
import datetime
import pandas as pd
import numpy as np

from torch.utils.data import Dataset
from src.datasets import RL4RS, ContentWise, DummyData, OpenCDP
from src.utils import train, get_dummy_data, get_train_val_test_tmatrix_tnumitems, get_svd_encoder
from src.embeddings import RecsysEmbedding

experiment_name = 'SlatewiseAttentionAnorherSeed'
device = 'cuda:0'
seed = 1234
pkl_path = '../pkl/'

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f6bed8b6a50>

In [2]:
torch.__version__

'1.12.1'

# Модель

In [3]:
import torch.nn.functional as F
torch.autograd.set_detect_anomaly(True)

class SlatewiseAttention(torch.nn.Module):
    """
    No recurrent dependency, just slate-wise attention.
    """
    def __init__(self, embedding, nheads=2, output_dim=1):
        super().__init__()
        self.embedding_dim = embedding.embedding_dim
        self.embedding = embedding
        self.attention= torch.nn.MultiheadAttention(
            2 * embedding.embedding_dim,
            num_heads=nheads,
            batch_first=True
        )
        
        self.out_layer = torch.nn.Linear(2 * embedding.embedding_dim, output_dim)

    def forward(self, batch):
        item_embs, user_embs = self.embedding(batch)
        shp = item_embs.shape
        
        # let model attent to first padd token if slate is empty to avoid NaN gradients
        # (anyway they does not contrinute into metrics computation)
        key_padding_mask = batch['slates_mask'].clone()
        key_padding_mask[:,:, 0] = True 
        
        # repeating user embedding for each item embeding
        features = torch.cat(
            [
                item_embs,
                user_embs[:, :, None, :].repeat(1, 1, shp[-2], 1).reshape(shp)
            ],
            dim = -1
        )
        
        # all recomendations in session are scored independently
        features = features.flatten(0,1)
        
        features, attn_map = self.attention(
            features, features, features,
            key_padding_mask=~key_padding_mask.flatten(0, 1)
        )
        
        # transforming back to sequence shape
        shp = list(shp)
        shp[-1] *= 2
        return self.out_layer(features.reshape(shp)).squeeze(-1)

# Игрушечный датасет: проверим, что сходится к идеальным метрикам

In [4]:
d = DummyData()
dummy_loader, dummy_matrix = get_dummy_data(d)

model = SlatewiseAttention(
    RecsysEmbedding(d.n_items, dummy_matrix, embeddings='neural').to('cpu'),
    output_dim=1
).to('cpu')

train(
    model, 
    dummy_loader, dummy_loader, dummy_loader,
    device=device, lr=1e-3, num_epochs=5000, dummy=True,
    silent=True,
)


biulding affinity matrix...


3it [00:00, 3839.77it/s]


Test before learning: {'f1': 0.0, 'roc-auc': 0.3333333134651184, 'accuracy': 0.75}


train:   0%|          | 0/5000 [00:00<?, ?it/s]

Val update: epoch: 0 |accuracy: 0.25 | f1: 0.4000000059604645 | auc: 1.0 | treshold: 0.01
Test: accuracy: 0.25 | f1: 0.4000000059604645 | auc: 1.0 | 
Val update: epoch: 1 |accuracy: 0.75 | f1: 0.6666666865348816 | auc: 1.0 | treshold: 0.47000000000000003
Test: accuracy: 0.75 | f1: 0.6666666865348816 | auc: 1.0 | 
Val update: epoch: 6 |accuracy: 1.0 | f1: 1.0 | auc: 1.0 | treshold: 0.55
Test: accuracy: 1.0 | f1: 1.0 | auc: 1.0 | 


(SlatewiseAttention(
   (embedding): RecsysEmbedding(
     (item_embeddings): Embedding(5, 32)
   )
   (attention): MultiheadAttention(
     (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
   )
   (out_layer): Linear(in_features=64, out_features=1, bias=True)
 ),
 {'f1': 1.0, 'roc-auc': 1.0, 'accuracy': 1.0})

# ContentWise

In [5]:
content_wise_results = []
dataset = ContentWise.load(os.path.join(pkl_path, 'cw.pkl'))
(
    train_loader, 
    val_loader, 
    test_loader, 
    train_user_item_matrix, 
    train_num_items 
) = get_train_val_test_tmatrix_tnumitems(dataset, batch_size=150)

print(f"{len(dataset)} data points among {len(train_loader)} batches")

20216 data points among 108 batches


In [6]:
for embeddings in ['svd', 'neural']:
    print(f"\nEvaluating {experiment_name} with {embeddings} embeddings")
    
    model = SlatewiseAttention(
        RecsysEmbedding(train_num_items, train_user_item_matrix, embeddings=embeddings),
        output_dim=1
    ).to(device)

    _, metrics = train(
        model, 
        train_loader, val_loader, test_loader, 
        device=device, lr=1e-3, num_epochs=5000, early_stopping=7,
       silent=True, 
    )
    
    metrics['embeddings'] = embeddings
    content_wise_results.append(metrics)


Evaluating SlatewiseAttentionAnorherSeed with svd embeddings
Test before learning: {'f1': 0.16563288867473602, 'roc-auc': 0.5132296085357666, 'accuracy': 0.6031929850578308}


train:   0%|          | 0/5000 [00:00<?, ?it/s]

Val update: epoch: 1 |accuracy: 0.09964753687381744 | f1: 0.1812354177236557 | auc: 0.5591129064559937 | treshold: 0.03
Test: accuracy: 0.09974651783704758 | f1: 0.18139910697937012 | auc: 0.5660356283187866 | 
Val update: epoch: 3 |accuracy: 0.8924036622047424 | f1: 0.16022099554538727 | auc: 0.5834084153175354 | treshold: 0.09999999999999999
Test: accuracy: 0.8926787376403809 | f1: 0.1571594923734665 | auc: 0.5913833379745483 | 
Val update: epoch: 4 |accuracy: 0.1331091672182083 | f1: 0.18575207889080048 | auc: 0.5987430214881897 | treshold: 0.08
Test: accuracy: 0.1486043781042099 | f1: 0.18859662115573883 | auc: 0.6025585532188416 | 
Val update: epoch: 5 |accuracy: 0.8822280168533325 | f1: 0.19300727546215057 | auc: 0.6027600765228271 | treshold: 0.15000000000000002
Test: accuracy: 0.8840959668159485 | f1: 0.19416674971580505 | auc: 0.6119797229766846 | 
Val update: epoch: 6 |accuracy: 0.8933622241020203 | f1: 0.16683949530124664 | auc: 0.6074578166007996 | treshold: 0.16
Test: accu

train:   0%|          | 0/5000 [00:00<?, ?it/s]

Val update: epoch: 0 |accuracy: 0.09964753687381744 | f1: 0.1812354177236557 | auc: 0.5952838659286499 | treshold: 0.01
Test: accuracy: 0.09974651783704758 | f1: 0.18139910697937012 | auc: 0.6013993620872498 | 
Val update: epoch: 1 |accuracy: 0.8213068842887878 | f1: 0.20819446444511414 | auc: 0.6128063797950745 | treshold: 0.16
Test: accuracy: 0.8266850709915161 | f1: 0.21340151131153107 | auc: 0.6268128752708435 | 
Val update: epoch: 2 |accuracy: 0.8592517375946045 | f1: 0.21796132624149323 | auc: 0.6665998697280884 | treshold: 0.16
Test: accuracy: 0.8604230880737305 | f1: 0.22220386564731598 | auc: 0.6713399887084961 | 
Val update: epoch: 3 |accuracy: 0.7276320457458496 | f1: 0.25716927647590637 | auc: 0.6741799116134644 | treshold: 0.17
Test: accuracy: 0.7276055812835693 | f1: 0.2606421411037445 | auc: 0.6769337058067322 | 
Val update: epoch: 4 |accuracy: 0.5843177437782288 | f1: 0.2505649924278259 | auc: 0.6914727687835693 | treshold: 0.16
Test: accuracy: 0.6007619500160217 | f1: 

In [7]:
pd.DataFrame(content_wise_results).to_csv(f'results/cw_{experiment_name}.csv')
del dataset, train_loader, val_loader, test_loader, train_user_item_matrix, train_num_items

# RL4RS

In [8]:
rl4rs_results = []
dataset = RL4RS.load(os.path.join(pkl_path, 'rl4rs.pkl'))
(
    train_loader, 
    val_loader, 
    test_loader, 
    train_user_item_matrix, 
    train_num_items 
) = get_train_val_test_tmatrix_tnumitems(dataset, batch_size=350)

print(f"{len(dataset)} data points among {len(train_loader)} batches")

45942 data points among 106 batches


In [9]:
for embeddings in ['neural','explicit', 'svd',  ]:
    print(f"\nEvaluating {experiment_name} with {embeddings} embeddings")

    model = SlatewiseAttention(
        RecsysEmbedding(
            train_num_items, 
            train_user_item_matrix, 
            embeddings=embeddings,
            embedding_dim=40
        ),
        output_dim=1
    ).to(device)

    best_model, metrics = train(
        model, 
        train_loader, val_loader, test_loader, 
        device=device, lr=1e-3, num_epochs=5000, early_stopping=7,
        silent=True
    )
    
    metrics['embeddings'] = embeddings
    rl4rs_results.append(metrics)


Evaluating SlatewiseAttentionAnorherSeed with neural embeddings
Test before learning: {'f1': 0.4759460389614105, 'roc-auc': 0.5111886262893677, 'accuracy': 0.4618546664714813}


train:   0%|          | 0/5000 [00:00<?, ?it/s]

Val update: epoch: 0 |accuracy: 0.7756010293960571 | f1: 0.8416182994842529 | auc: 0.8531349897384644 | treshold: 0.44
Test: accuracy: 0.7743198871612549 | f1: 0.8406223058700562 | auc: 0.846225380897522 | 
Val update: epoch: 1 |accuracy: 0.8117350935935974 | f1: 0.864120364189148 | auc: 0.8910277485847473 | treshold: 0.49
Test: accuracy: 0.8101317882537842 | f1: 0.8629094362258911 | auc: 0.8880918025970459 | 
Val update: epoch: 2 |accuracy: 0.8037294745445251 | f1: 0.8636294603347778 | auc: 0.9058480262756348 | treshold: 0.46
Test: accuracy: 0.8068673610687256 | f1: 0.8659247159957886 | auc: 0.9047122001647949 | 
Val update: epoch: 3 |accuracy: 0.8079137206077576 | f1: 0.865876317024231 | auc: 0.9062777757644653 | treshold: 0.48000000000000004
Test: accuracy: 0.8112199306488037 | f1: 0.8681004643440247 | auc: 0.9060571193695068 | 
Val update: epoch: 4 |accuracy: 0.8250374794006348 | f1: 0.8746534585952759 | auc: 0.9136700630187988 | treshold: 0.5
Test: accuracy: 0.828533411026001 | f1

train:   0%|          | 0/5000 [00:00<?, ?it/s]

Val update: epoch: 0 |accuracy: 0.7906931638717651 | f1: 0.8534611463546753 | auc: 0.8760402202606201 | treshold: 0.44
Test: accuracy: 0.7919235825538635 | f1: 0.8544953465461731 | auc: 0.8777675628662109 | 
Val update: epoch: 1 |accuracy: 0.814540684223175 | f1: 0.8620516061782837 | auc: 0.8868635892868042 | treshold: 0.44
Test: accuracy: 0.8220771551132202 | f1: 0.8674759268760681 | auc: 0.8914128541946411 | 
Val update: epoch: 2 |accuracy: 0.8154597878456116 | f1: 0.8578084111213684 | auc: 0.8890950083732605 | treshold: 0.44
Test: accuracy: 0.8200701475143433 | f1: 0.8612763285636902 | auc: 0.8942693471908569 | 
Val update: epoch: 3 |accuracy: 0.8202728033065796 | f1: 0.8624932765960693 | auc: 0.8928254246711731 | treshold: 0.46
Test: accuracy: 0.8241567015647888 | f1: 0.8653832077980042 | auc: 0.898253321647644 | 
Val update: epoch: 4 |accuracy: 0.8149518966674805 | f1: 0.8683019280433655 | auc: 0.8995446562767029 | treshold: 0.45
Test: accuracy: 0.8215935230255127 | f1: 0.87280189

train:   0%|          | 0/5000 [00:00<?, ?it/s]

Val update: epoch: 0 |accuracy: 0.7006723880767822 | f1: 0.8014821410179138 | auc: 0.7732131481170654 | treshold: 0.4
Test: accuracy: 0.7059605717658997 | f1: 0.8044576048851013 | auc: 0.774376392364502 | 
Val update: epoch: 1 |accuracy: 0.7266724705696106 | f1: 0.805017352104187 | auc: 0.7918669581413269 | treshold: 0.41000000000000003
Test: accuracy: 0.7310603260993958 | f1: 0.8072978258132935 | auc: 0.7936662435531616 | 
Val update: epoch: 2 |accuracy: 0.737048327922821 | f1: 0.8111975789070129 | auc: 0.8039286732673645 | treshold: 0.44
Test: accuracy: 0.7409986853599548 | f1: 0.813153088092804 | auc: 0.8051149249076843 | 
Val update: epoch: 3 |accuracy: 0.7462390661239624 | f1: 0.8150537610054016 | auc: 0.8127063512802124 | treshold: 0.44
Test: accuracy: 0.7513238787651062 | f1: 0.818457841873169 | auc: 0.815850019454956 | 
Val update: epoch: 4 |accuracy: 0.7568809390068054 | f1: 0.8116191625595093 | auc: 0.8222173452377319 | treshold: 0.46
Test: accuracy: 0.7565469741821289 | f1: 

In [10]:
pd.DataFrame(rl4rs_results).to_csv(f'results/rl4rs_{experiment_name}.csv')
del dataset, train_loader, val_loader, test_loader, train_user_item_matrix, train_num_items

# OpenCDP

In [11]:
for group in ['cosmetics', 'multi']:
    for filename in os.listdir(pkl_path):    
        result = []
        if not filename.startswith(group):
            continue
        print(f"\n == {filename} ==")
        dataset = OpenCDP.load(os.path.join(pkl_path, filename))
        (
            train_loader, 
            val_loader,
            test_loader, 
            train_user_item_matrix, 
            train_num_items
        ) = get_train_val_test_tmatrix_tnumitems(dataset, batch_size=800)
    
        print(f"{len(dataset)} data points among {len(train_loader)} batches")
        for embeddings in ['neural', 'svd']:
            print(f"\nEvaluating {experiment_name} with {embeddings} embeddings")

            model = SlatewiseAttention(
                RecsysEmbedding(train_num_items, train_user_item_matrix, embeddings=embeddings),
                output_dim=1
            ).to(device)

            best_model, metrics = train(
                model, 
                train_loader, val_loader, test_loader, 
                device=device, lr=1e-3, num_epochs=5000, early_stopping=7,
                silent=True
            )
            
            print(metrics)
            metrics['embeddings'] = embeddings
            result.append(metrics)
        pd.DataFrame(result).to_csv(f'results/{filename}_{experiment_name}.csv')
        del dataset, train_loader, val_loader, test_loader, train_user_item_matrix, train_num_items


 == cosmetics_10_8.pkl ==


AttributeError: 'OpenCDP' object has no attribute 'item_categorical'