In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

import sys
sys.path.append('../')

In [2]:
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, ModelSummary
from torch.utils.data import DataLoader

from src.datasets import (CausalLMDataset, CausalLMPredictionDataset, MaskedLMDataset,
                          MaskedLMPredictionDataset, PaddingCollateFn)
from src.metrics import compute_metrics
from src.models import RNN, BERT4Rec, SASRec
from src.modules import SeqRec, SeqRecWithSampling
from src.postprocess import preds2recs
from src.preprocess import add_time_idx
from src.unbiased_metrics import get_metrics, hr, mrr, ndcg


libgomp: Invalid value for environment variable OMP_NUM_THREADS

libgomp: Invalid value for environment variable OMP_NUM_THREADS


## Load data

In [3]:
train = pd.read_csv('../data/ml-1m/train.csv')
test = pd.read_csv('../data/ml-1m/test.csv')
val_1 = pd.read_csv('../data/ml-1m/val_1.csv')
val_2 = pd.read_csv('../data/ml-1m/val_2.csv')
test_users_history = pd.read_csv('../data/ml-1m/test_users_history.csv')
val_users_history_1 = pd.read_csv('../data/ml-1m/val_users_history_1.csv')
val_users_history_2 = pd.read_csv('../data/ml-1m/val_users_history_2.csv')

In [6]:
ITEM_COL = 'item_id'
RELEVANCE_THRESHOLD = 3.5
RELEVANCE_COL = 'rating'

In [7]:
train = train[train[RELEVANCE_COL] >= RELEVANCE_THRESHOLD]
val_users_history_1 = val_users_history_1[val_users_history_1[RELEVANCE_COL] >= RELEVANCE_THRESHOLD]
val_users_history_2 = val_users_history_2[val_users_history_2[RELEVANCE_COL] >= RELEVANCE_THRESHOLD]
test_users_history = test_users_history[test_users_history[RELEVANCE_COL] >= RELEVANCE_THRESHOLD]

In [16]:
def filter_users_by_history_len(clickstream_df, user_col, min_items_per_user = 2):
    uc = clickstream_df['user_id'].value_counts()
    appropriate_users = uc[uc > min_items_per_user].index
    clickstream_df = clickstream_df[clickstream_df.loc[:, user_col].isin(appropriate_users)]
    return clickstream_df

In [18]:
train = filter_users_by_history_len(train, 'user_id')
#val_users_history_1 = filter_users_by_history_len(val_users_history_1, 'user_id')
#val_users_history_2 = filter_users_by_history_len(val_users_history_2, 'user_id')
#test_users_history = filter_users_by_history_len(test_users_history, 'user_id')

## Dataloaders

In [19]:
MAX_LENGTH = 200

VALIDATION_SIZE = 10000
# VALIDATION_SIZE = None

BATCH_SIZE = 64
TEST_BATCH_SIZE = 64
NUM_WORKERS = 8

In [20]:
def get_eval_dataset(validation, validation_size=None, max_length=200):
    validation_users = validation.user_id.unique()

    if validation_size and (validation_size < len(validation_users)):
        validation_users = np.random.choice(validation_users, size=validation_size, replace=False)

    eval_dataset = CausalLMPredictionDataset(
        validation[validation.user_id.isin(validation_users)],
        max_length=max_length,
        user_col='test_user_idx',
        validation_mode=True)

    return eval_dataset

In [21]:
%%time
train_dataset = CausalLMDataset(train, user_col='user_id', max_length=MAX_LENGTH, num_negatives=3000)
val_1_dataset = get_eval_dataset(val_users_history_1, max_length=MAX_LENGTH)
val_2_dataset = get_eval_dataset(val_users_history_2, max_length=MAX_LENGTH)
test_dataset = get_eval_dataset(test_users_history, max_length=MAX_LENGTH)

train_loader = DataLoader(
    train_dataset, batch_size=BATCH_SIZE,
    shuffle=True, num_workers=NUM_WORKERS,
    collate_fn=PaddingCollateFn()
)
val_1_loader = DataLoader(
    val_1_dataset, batch_size=TEST_BATCH_SIZE,
    shuffle=False, num_workers=NUM_WORKERS,
    collate_fn=PaddingCollateFn()
)
val_2_loader = DataLoader(
    val_2_dataset, batch_size=TEST_BATCH_SIZE,
    shuffle=False, num_workers=NUM_WORKERS,
    collate_fn=PaddingCollateFn()
)
test_loader = DataLoader(
    test_dataset, batch_size=TEST_BATCH_SIZE,
    shuffle=False, num_workers=NUM_WORKERS,
    collate_fn=PaddingCollateFn()
)

CPU times: user 6.24 s, sys: 872 ms, total: 7.11 s
Wall time: 7.1 s


In [22]:
batch = next(iter(train_loader))
print(batch['input_ids'].shape)

torch.Size([64, 200])


## Model

In [23]:
SASREC_CONFIG = {
    'maxlen': 200,
    'hidden_units': 128,
    'num_blocks': 2,
    'num_heads': 1,
    'dropout_rate': 0.1,
}

In [24]:
item_count = train.item_id.max()
add_head = False

model = SASRec(item_num=item_count, add_head=add_head, **SASREC_CONFIG)

In [25]:
out = model(batch['input_ids'], batch['attention_mask'])
out.shape

torch.Size([64, 200, 128])

## Train

In [26]:
seqrec_module = SeqRecWithSampling(model, lr=0.001, predict_top_k=10, filter_seen=True)

early_stopping = EarlyStopping(monitor="val_ndcg", mode="max", patience=10, verbose=False)

model_summary = ModelSummary(max_depth=2)
checkpoint = ModelCheckpoint(save_top_k=1, monitor="val_ndcg",
                             mode="max", save_weights_only=True)
callbacks=[early_stopping, model_summary, checkpoint]

trainer = pl.Trainer(callbacks=callbacks, enable_checkpointing=True,
                     gpus=1, max_epochs=100)

trainer.fit(model=seqrec_module,
            train_dataloaders=train_loader,
            val_dataloaders=val_1_loader)

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [3]

  | Name                       | Type       | Params
----------------------------------------------------------
0 | model                      | SASRec     | 731 K 
1 | model.item_emb             | Embedding  | 505 K 
2 | model.pos_emb              | Embedding  | 25.6 K
3 | model.emb_dropout          | Dropout    | 0     
4 | model.attention_layernorms | ModuleList | 512   
5 | model.attention_layers     | ModuleList | 132 K 
6 | model.forward_layernorms   | ModuleList | 512   
7 | model.forward_layers       | ModuleList | 66.0 K
8 | model.last_layernorm       | LayerNorm  | 256   
----------------------------------------------

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [27]:
seqrec_module.load_state_dict(torch.load(checkpoint.best_model_path)['state_dict'])

<All keys matched successfully>

## Evaluation

In [30]:
seqrec_module.predict_top_k = 10
preds = trainer.predict(model=seqrec_module, dataloaders=val_2_loader)

preds_val = preds2recs(preds)
print(preds_val.shape)
preds_val.head()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [3]


Predicting: 94it [00:00, ?it/s]

(239280, 3)


Unnamed: 0,user_id,item_id,prediction
0,0,333,7.286721
1,0,2683,7.142874
2,0,2012,6.509565
3,0,3398,6.385214
4,0,2723,6.380074


In [40]:
seqrec_module.predict_top_k = 10
preds = trainer.predict(model=seqrec_module, dataloaders=test_loader)

preds_test = preds2recs(preds)
print(preds_test.shape)
preds_test.head()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [3]


Predicting: 94it [00:00, ?it/s]

(478600, 3)


Unnamed: 0,user_id,item_id,prediction
0,0,3928,7.497638
1,0,3827,6.371521
2,0,3555,6.324975
3,0,3481,5.819982
4,0,3873,5.788095


In [44]:
preds_val = (preds_val
              .rename(columns={'user_id': 'test_user_idx', 'item_id': 'pred_items'})
              .groupby('test_user_idx')['pred_items']
              .apply(list).reset_index()
              .merge(val_2, on='test_user_idx', how='left'))

In [42]:
preds_test = (preds_test
              .rename(columns={'user_id': 'test_user_idx', 'item_id': 'pred_items'})
              .groupby('test_user_idx')['pred_items']
              .apply(list).reset_index()
              .merge(test, on='test_user_idx', how='left'))

In [47]:
metrics_df, beta = get_metrics(preds_test, preds_val)

In [48]:
metrics_df

Unnamed: 0,type,HR,MRR,nDCG
0,Biased,0.141948,0.051096,0.072087
1,Unbiased,0.229956,0.13913,0.135311
2,Unbiased_feedback_sampling,0.709398,0.271922,0.093481


In [49]:
beta

0.102567183916159

In [63]:
from src.unbiased_metrics import confusion_matrix_metrics

In [64]:
relevance_col = 'rating'
relevance_threshold=3.5

In [60]:
preds_test_pos = preds_test[preds_test[relevance_col] >= relevance_threshold]
preds_val_neg = preds_val[preds_val[relevance_col] < relevance_threshold]
preds_val_pos = preds_val[preds_val[relevance_col] >= relevance_threshold]

tp, fn = confusion_matrix_metrics(preds_val_pos, user_col, item_col)
fp, tn = confusion_matrix_metrics(preds_val_neg, user_col, item_col)

In [61]:
tp / (tp + fp)

0.7197501951600312

In [62]:
tp / (tp + fn)

0.14250386398763523

In [50]:
preds_test_pos = preds_test[preds_test['rating'] >= 3.5]

In [55]:
hr(preds_test_pos, beta=beta, sample_feedback=True, return_confidence_interval=True)

(0.709398, 0.0012524292192671593)

In [56]:
mrr(preds_test_pos, beta=beta, sample_feedback=True, return_confidence_interval=True)

(0.271922, 0.001053596950251534)

In [57]:
ndcg(preds_test_pos, beta=beta, sample_feedback=True, return_confidence_interval=True)

(0.093481, 6.998192142231019e-05)