In [None]:
from tqdm import tqdm
import os

import numpy as np
import pandas as pd
import torch

from typing import Dict
from typing import Optional
from typing import Union

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
device='cuda'

example of successive eval of sasrec, given test data, model and topn

model_D_sasrec,
model_e,
model_e3,
model_e5

In [198]:
test_sequences # sequences of interections for each test user

userid
1       [2935, 1160, 1552, 941, 2117, 1633, 3136, 2566...
2       [1090, 1102, 1109, 2479, 1183, 2702, 1117, 108...
3       [573, 2618, 3260, 1762, 1307, 1755, 1156, 1259...
4       [1102, 1008, 3194, 463, 3253, 253, 1088, 1090,...
5       [2479, 832, 843, 1140, 346, 2618, 1033, 1981, ...
                              ...                        
6001    [3464, 3207, 1562, 1544, 2635, 1139, 1242, 889...
6002    [1736, 440, 1740, 2049, 2849, 265, 961, 2898, ...
6016    [3570, 3463, 3507, 1344, 2880, 3601, 27, 2466,...
6028                                               [2755]
6040    [2932, 2348, 1104, 3092, 3148, 1148, 1160, 186...
Name: itemid, Length: 1783, dtype: object

In [None]:
def recommend_sequential(
    model,
    target_seq: Union[list, np.ndarray],
    seen_seq: Union[list, np.ndarray],
    topn: int,
    *,
    user: Optional[int] = None
):
    '''Given an item sequence and a sequence of next target items,
    predict top-n candidates for each next step in the target sequence.
    '''
    model.eval()
    predictions = predict_sequential(model, target_seq[:-1], seen_seq, user=user)
    predictions[:, seen_seq] = -np.inf
    for k in range(1, predictions.shape[0]):
        predictions[k, target_seq[:k]] = -np.inf
    predicted_items = np.apply_along_axis(topidx, 1, predictions, topn)
    return predicted_items

def predict_sequential(model, target_seq, seen_seq, user): # example for SASRec

    maxlen = base_config1['maxlen'] # тут длина контекста сасрека

    n_seen = len(seen_seq)
    n_targets = len(target_seq)
    seq = np.concatenate([seen_seq, target_seq])

    with torch.no_grad():
        pad_seq = torch.as_tensor(
            np.pad(
                seq, (max(0, maxlen-n_seen), 0),
                mode = 'constant',
                constant_values = model.pad_token
            ),
            dtype = torch.int64,
            device = device
        )
        log_seqs = torch.as_strided(pad_seq[-n_targets-maxlen:], (n_targets+1, maxlen), (1, 1))
        log_feats = model.log2feats(log_seqs)[:, -1, :]
        item_embs = model.item_emb.weight
        logits = item_embs.matmul(log_feats.unsqueeze(-1)).squeeze(-1)
    
    return logits.detach().cpu().numpy()

def topidx(arr, topn):
    parted = np.argpartition(arr, -topn)[-topn:]
    return parted[np.argsort(-arr[parted])]

In [241]:
models = {
    'SASRec 1': model_D_sasrec,
    'SASRec 2': model_e,
    'SASRec 3': model_e3,
    'SASRec 4': model_e5
}

topn = 10
results_list = []

for model_name, model in models.items():
    cum_hits = 0
    cum_reciprocal_ranks = 0.
    cum_discounts = 0.
    unique_recommendations = set()
    total_count = 0
    cov = []

    # Loop over each user and test sequence
    for user, test_seq in tqdm(test_sequences.items()):
        seen_seq = test_seq[:1]
        test_seq = test_seq[1:]
        num_predictions = len(test_seq)
        if not num_predictions:  # if no test items left - skip user
            continue

        # Get predicted items
        predicted_items = recommend_sequential(model, test_seq, seen_seq, topn, user=user)
        
        # compute hit steps and indices
        hit_steps, hit_index = np.where(predicted_items == np.atleast_2d(test_seq).T)
        cov.append(len(np.unique(predicted_items.ravel()))/ data_description_temp['n_items'])

        num_hits = hit_index.size
        if num_hits:
            cum_hits += num_hits
            cum_reciprocal_ranks += np.sum(1. / (hit_index + 1))
            cum_discounts += np.sum(1. / np.log2(hit_index + 2))
        total_count += num_predictions

    # evaluation metrics for the current model
    hr = cum_hits / total_count
    mrr = cum_reciprocal_ranks / total_count
    dcg = cum_discounts / total_count
    cov = np.mean(cov)

    results_list.append(pd.DataFrame(
        data={'score': [hr, mrr, dcg, cov]},
        index=[f'{metric}@{topn}' for metric in ['HR', 'MRR', 'NDCG', 'COV']],
        # columns=[model_name]  # Label results by model name
    ))
    
final_results = pd.concat(results_list, axis=1)
final_results.columns = models.keys()
final_results

1783it [00:19, 89.48it/s] 
1783it [00:19, 89.35it/s] 
1783it [00:20, 88.88it/s] 
1783it [00:19, 90.45it/s] 


Unnamed: 0,SASRec 1,SASRec 2,SASRec 3,SASRec 4
HR@10,0.239497,0.22102,0.179104,0.028692
MRR@10,0.094344,0.085006,0.067651,0.009201
NDCG@10,0.128065,0.116584,0.093494,0.013647
COV@10,0.067123,0.065069,0.060589,0.005394
