## Load functionality

In [1]:
import datetime as dt
import os
import shutil
import gc
from pathlib import Path
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
# from torch.utils.tensorboard import SummaryWriter
import random
import polars as pl
import numpy as np
import time
from utils._constants import *
from utils._behaviors import (
    create_binary_labels_column,
    sampling_strategy_wu2019,
    add_prediction_scores,
    truncate_history,
    ebnerd_from_path,
)
from evaluation import MetricEvaluator, AucScore, NdcgScore, MrrScore
from utils._python import (
    write_submission_file,
    rank_predictions_by_score,
    write_json_file,
)
from utils._articles import create_article_id_to_value_mapping
from utils._polars import split_df_chunks

from models.model_config import (
    hparams_nrms_docvec,
    hparams_to_dict,
    print_hparams,
)
from models.nrms_docvec import NRMSDocVec  
torch.set_default_dtype(torch.float32)

from models.nrms_docvec import NRMSDocVec
from args_nrms_docvec import get_args

os.environ["TOKENIZERS_PARALLELISM"] = "false"



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA version:", torch.version.cuda)
    print("cuDNN version:", torch.backends.cudnn.version())
    print("GPU Name:", torch.cuda.get_device_name(0))

PyTorch version: 2.5.1+cu118
CUDA available: True
CUDA version: 11.8
cuDNN version: 90100
GPU Name: NVIDIA GeForce RTX 3060 Laptop GPU


## Load ARGS

In [3]:
def set_seed(seed):
    if seed is not None:
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        np.random.seed(seed)
        random.seed(seed)

In [4]:
from models.dataloader import NRMSDataLoader, NRMSDataLoaderPretransform
from pathlib import Path

CURRENT_DIR = Path(os.getcwd())  # Get the current working directory


PATH = Path(".../Dataset").resolve() 
SEED = None  
DATASPLIT = "ebnerd_demo"
DEBUG = False
BS_TRAIN = 32
BS_TEST = 32
BATCH_SIZE_TEST_WO_B = 32
BATCH_SIZE_TEST_W_B = 4
HISTORY_SIZE = 20
NPRATIO = 4
EPOCHS = 5
TRAIN_FRACTION = 1.0 if not DEBUG else 0.0001
FRACTION_TEST = 0.001 if not DEBUG else 0.0001
DOC_VEC_PATH = "Dataset\\contrastive_vector.parquet"


NRMSLoader_training = NRMSDataLoaderPretransform



model_func = "NRMSDocVec"  
hparams = {
    "title_size": 768,
    "history_size": 20,
    "head_num": 16,
    "head_dim": 16,
    "attention_hidden_dim": 200,
    "newsencoder_units_per_layer": [512, 512, 512],
    "optimizer": "adam",
    "loss": "cross_entropy_loss",
    "dropout": 0.2,
    "learning_rate": 1e-4,
    "newsencoder_l2_regularization": 1e-4,
}



df_articles = pl.read_parquet(DOC_VEC_PATH)


def create_article_id_to_value_mapping(df, value_col):
    return {row[0]: row[1] for row in df.select([df.columns[0], value_col]).iter_rows()}

article_mapping = create_article_id_to_value_mapping(
    df=df_articles, value_col=df_articles.columns[-1]
)

In [5]:
DUMP_DIR = PATH.joinpath(PATH, "DUMP")
DUMP_DIR.mkdir(exist_ok=True, parents=True)

DT_NOW = dt.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
MODEL_NAME = model_func
MODEL_OUTPUT_NAME = f"{MODEL_NAME}-{DT_NOW}"

ARTIFACT_DIR = DUMP_DIR.joinpath("test_predictions", MODEL_OUTPUT_NAME)
ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)

MODEL_WEIGHTS = DUMP_DIR.joinpath(f"state_dict/{MODEL_OUTPUT_NAME}/weights.pt")
MODEL_WEIGHTS.parent.mkdir(parents=True, exist_ok=True)
LOG_DIR = DUMP_DIR.joinpath(f"runs/{MODEL_OUTPUT_NAME}")

TEST_CHUNKS_DIR = ARTIFACT_DIR.joinpath("test_chunks")
TEST_CHUNKS_DIR.mkdir(parents=True, exist_ok=True)

N_CHUNKS_TEST = 1
CHUNKS_DONE = 0

In [6]:
# # We just want to load the necessary columns
COLUMNS = [
    DEFAULT_IMPRESSION_TIMESTAMP_COL,
    DEFAULT_HISTORY_ARTICLE_ID_COL,
    DEFAULT_INVIEW_ARTICLES_COL,
    DEFAULT_CLICKED_ARTICLES_COL,
    DEFAULT_IMPRESSION_ID_COL,
    DEFAULT_USER_COL,
]

In [7]:
params_dict = {
    "data_path": str(PATH), 
    "seed": SEED,
    "datasplit": DATASPLIT,
    "debug": DEBUG,
    "bs_train": BS_TRAIN,
    "bs_test": BS_TEST,
    "batch_size_test_wo_b": BATCH_SIZE_TEST_WO_B,
    "batch_size_test_w_b": BATCH_SIZE_TEST_W_B,
    "history_size": HISTORY_SIZE,
    "npratio": NPRATIO,
    "epochs": EPOCHS,
    "train_fraction": TRAIN_FRACTION,
    "fraction_test": FRACTION_TEST,
    "nrms_loader": str(NRMSLoader_training), 
    "document_embeddings": str(DOC_VEC_PATH),  
    "title_size": hparams["title_size"],
    "head_num": hparams["head_num"],
    "head_dim": hparams["head_dim"],
    "attention_hidden_dim": hparams["attention_hidden_dim"],
    "newsencoder_units_per_layer": hparams["newsencoder_units_per_layer"],
    "optimizer": hparams["optimizer"],
    "loss": hparams["loss"],
    "dropout": hparams["dropout"],
    "learning_rate": hparams["learning_rate"],
    "newsencoder_l2_regularization": hparams["newsencoder_l2_regularization"],
}



write_json_file(params_dict, ARTIFACT_DIR.joinpath(f"{MODEL_NAME}_argparser.json"))


In [8]:

df = (
    pl.concat(
        [
            ebnerd_from_path(
                PATH.joinpath(DATASPLIT, "train"),
                history_size=HISTORY_SIZE,
                padding=0,
            ),
            ebnerd_from_path(
                PATH.joinpath(DATASPLIT, "validation"),
                history_size=HISTORY_SIZE,
                padding=0,
            ),
        ]
    )
    .sample(fraction=TRAIN_FRACTION, shuffle=True, seed=SEED)
    .select(COLUMNS)
    .pipe(
        sampling_strategy_wu2019,
        npratio=NPRATIO,
        shuffle=True,
        with_replacement=True,
        seed=SEED,
    )
    .pipe(create_binary_labels_column)
)

In [9]:
train_df = ebnerd_from_path(
    PATH.joinpath(DATASPLIT, "train"),
    history_size=HISTORY_SIZE,
    padding=0,
)

validation_df = ebnerd_from_path(
    PATH.joinpath(DATASPLIT, "validation"),
    history_size=HISTORY_SIZE,
    padding=0,
)

print(train_df.columns)
print(validation_df.columns)


['impression_id', 'article_id', 'impression_time', 'read_time', 'scroll_percentage', 'device_type', 'article_ids_inview', 'article_ids_clicked', 'user_id', 'is_sso_user', 'gender', 'postcode', 'age', 'is_subscriber', 'session_id', 'next_read_time', 'next_scroll_percentage', 'article_id_fixed']
['impression_id', 'article_id', 'impression_time', 'read_time', 'scroll_percentage', 'device_type', 'article_ids_inview', 'article_ids_clicked', 'user_id', 'is_sso_user', 'gender', 'postcode', 'age', 'is_subscriber', 'session_id', 'next_read_time', 'next_scroll_percentage', 'article_id_fixed']


In [10]:
print("COLUMNS:", COLUMNS)
print("Duplicate columns in COLUMNS:", [col for col in COLUMNS if COLUMNS.count(col) > 1])


COLUMNS: ['impression_time', 'article_id_fixed', 'article_ids_inview', 'article_ids_clicked', 'impression_id', 'user_id']
Duplicate columns in COLUMNS: []


In [11]:
last_dt = df[DEFAULT_IMPRESSION_TIMESTAMP_COL].dt.date().max() - dt.timedelta(days=1)
df_train = df.filter(pl.col(DEFAULT_IMPRESSION_TIMESTAMP_COL).dt.date() < last_dt)
df_validation = df.filter(pl.col(DEFAULT_IMPRESSION_TIMESTAMP_COL).dt.date() >= last_dt)


In [12]:
print(f"NRMSLoader_training type: {type(NRMSLoader_training)}")

NRMSLoader_training type: <class 'type'>


In [13]:
train_dataset = NRMSLoader_training(
    behaviors=df_train,
    article_dict=article_mapping,
    unknown_representation="zeros",
    history_column=DEFAULT_HISTORY_ARTICLE_ID_COL,
    eval_mode=False,
    batch_size=BS_TRAIN,
)
val_dataset = NRMSLoader_training(
    behaviors=df_validation,
    article_dict=article_mapping,
    unknown_representation="zeros",
    history_column=DEFAULT_HISTORY_ARTICLE_ID_COL,
    eval_mode=False,
    batch_size=BS_TRAIN,
)

In [14]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=None, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=None, shuffle=False)


In [15]:

model = NRMSDocVec(hparams=hparams_nrms_docvec, seed=42)  
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.model.to(device)
model.scorer.to(device)
optimizer = model.optimizer 
criterion = model.criterion  

In [16]:

class EarlyStopping:
    def __init__(self, patience=4, mode='max', restore_best_weights=True):
        self.patience = patience
        self.mode = mode
        self.restore_best_weights = restore_best_weights
        self.best_score = None
        self.counter = 0
        self.best_state_dict = None

    def step(self, score, model):
        if self.best_score is None:
            self.best_score = score
            self.best_state_dict = {k: v.cpu().clone() for k, v in model.state_dict().items()}
            return False
        improve = (score > self.best_score) if self.mode == 'max' else (score < self.best_score)
        if improve:
            self.best_score = score
            self.counter = 0
            self.best_state_dict = {k: v.cpu().clone() for k, v in model.state_dict().items()}
            return False
        else:
            self.counter += 1
            if self.counter >= self.patience:
                if self.restore_best_weights:
                    model.load_state_dict(self.best_state_dict)
                return True
            return False
# early_stopping = EarlyStopping(monitor="val_auc", mode="max", patience=4, restore_best_weights=True)
early_stopping = EarlyStopping(patience=4, mode="max", restore_best_weights=True)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.2, patience=2, min_lr=1e-6)

def analyze_time_impact(model, test_loader, device):
    """time"""
    model.eval()
    time_effects = []
    
    with torch.no_grad():
        for batch in test_loader:
            if len(batch[0]) == 4:
                (his_input, pred_input, his_time, pred_time), _ = batch

                pred_with_time = model(his_input, pred_input, his_time, pred_time)
                pred_without_time = model(his_input, pred_input)
                
                time_effect = torch.abs(pred_with_time - pred_without_time).mean()
                time_effects.append(time_effect.item())
    
    return np.mean(time_effects)

In [17]:
early_stopping = EarlyStopping(patience=4, mode="max", restore_best_weights=True)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.2, patience=2, min_lr=1e-6)


In [18]:
def compute_auc(model, dataloader, device):

    model.scorer.eval()
    all_scores = []
    all_labels = []
    
    with torch.no_grad():
        progress_bar = tqdm(dataloader, desc="Computing validation metrics", 
                          dynamic_ncols=True, leave=True)
        
        for (his_input_title, pred_input_title), batch_y in progress_bar:
            his_input_title = his_input_title.to(dtype=torch.float32, device=device)
            pred_input_title = pred_input_title.to(dtype=torch.float32, device=device)
            batch_y = batch_y.to(dtype=torch.float32, device=device)
            

            pred_input_title_one = pred_input_title[:, 0:1, :]
            scores = model.scorer(his_input_title, pred_input_title_one)
            

            scores = scores.cpu().numpy()
            labels = batch_y[:, 0].cpu().numpy()
            
            all_scores.extend(scores)
            all_labels.extend(labels)
    

    metrics_dict = {}
    auc_metric = AucScore()
    mrr_metric = MrrScore()
    ndcg_5_metric = NdcgScore(k=5)
    ndcg_10_metric = NdcgScore(k=10)
    
    metrics_dict['auc'] = auc_metric.calculate([all_labels], [all_scores])
    metrics_dict['mrr'] = mrr_metric.calculate([all_labels], [all_scores])
    metrics_dict['ndcg@5'] = ndcg_5_metric.calculate([all_labels], [all_scores])
    metrics_dict['ndcg@10'] = ndcg_10_metric.calculate([all_labels], [all_scores])
    
    return metrics_dict

In [19]:
def train_one_epoch(model, dataloader, optimizer, criterion, device):
    model.model.train()
    total_loss = 0
    count = 0
    progress_bar = tqdm(total=len(dataloader), desc="Training", dynamic_ncols=True)

    for batch_idx, ((his_input_title, pred_input_title), batch_y) in enumerate(dataloader):

        # if not isinstance(his_input_title, torch.Tensor):
        #     his_input_title = torch.from_numpy(his_input_title).float()
        # his_input_title = his_input_title.to(device)

        # if not isinstance(pred_input_title, torch.Tensor):
        #     pred_input_title = torch.from_numpy(pred_input_title).float()
        # pred_input_title = pred_input_title.to(device)

        # if not isinstance(batch_y, torch.Tensor):
        #     batch_y = torch.from_numpy(batch_y).float()
        # batch_y = batch_y.to(device)


        his_input_title = his_input_title.to(dtype=torch.float32, device=device)
        pred_input_title = pred_input_title.to(dtype=torch.float32, device=device)
        batch_y = batch_y.to(dtype=torch.float32, device=device)

        optimizer.zero_grad()
        preds = model.model(his_input_title, pred_input_title) 
        
        # categorical_crossentropy
        loss = -torch.sum(batch_y * torch.log(preds + 1e-10)) / batch_y.size(0)
        loss.backward()


        grad_stats = []
        for name, param in model.model.named_parameters():
            if param.grad is not None:
                grad_stats.append(f"{name}: grad_mean={param.grad.mean().item():.6f}")

        optimizer.step()
        total_loss += loss.item() * len(batch_y)
        count += len(batch_y)


        progress_bar.set_postfix(
            loss=f"{loss.item():.6f}",
            grad_stats=" | ".join(grad_stats[:2])
        )
        progress_bar.update(1)

    progress_bar.close()
    return total_loss / count

In [20]:
def evaluate(model, dataloader, criterion, device):
    model.model.eval()
    total_loss = 0
    count = 0
    all_labels = []
    all_scores = []
    
    with torch.no_grad():

        progress_bar = tqdm(total=len(dataloader), desc="Evaluating", dynamic_ncols=True, leave=True)
        for batch_idx, batch_data in enumerate(dataloader):
            if len(batch_data[0]) == 4:  
                (his_input_title, pred_input_title, his_time_delta, pred_time_delta), batch_y = batch_data
            else:  
                (his_input_title, pred_input_title), batch_y = batch_data
                his_time_delta, pred_time_delta = None, None


            his_input_title = his_input_title.to(dtype=torch.float32, device=device)
            pred_input_title = pred_input_title.to(dtype=torch.float32, device=device)
            batch_y = batch_y.to(dtype=torch.float32, device=device)

            if his_time_delta is not None:
                his_time_delta = his_time_delta.to(dtype=torch.float32, device=device)
                pred_time_delta = pred_time_delta.to(dtype=torch.float32, device=device)
                preds = model.model(his_input_title, pred_input_title, his_time_delta, pred_time_delta)
            else:
                preds = model.model(his_input_title, pred_input_title)

            loss = -torch.sum(batch_y * torch.log(preds + 1e-10)) / batch_y.size(0)
            total_loss += loss.item() * len(batch_y)
            count += len(batch_y)
            
            for i in range(batch_y.size(0)):
                all_labels.append(batch_y[i].cpu().numpy())
                all_scores.append(preds[i].cpu().numpy())
            
            progress_bar.set_postfix(loss=f"{loss.item():.6f}")
            progress_bar.update(1)
        progress_bar.close()

    metrics_dict = {}
    auc_metric = AucScore()
    mrr_metric = MrrScore()
    ndcg_5_metric = NdcgScore(k=5)
    ndcg_10_metric = NdcgScore(k=10)
    
    try:
        metrics_dict['auc'] = auc_metric.calculate(all_labels, all_scores)
        metrics_dict['mrr'] = mrr_metric.calculate(all_labels, all_scores)
        metrics_dict['ndcg@5'] = ndcg_5_metric.calculate(all_labels, all_scores)
        metrics_dict['ndcg@10'] = ndcg_10_metric.calculate(all_labels, all_scores)
    except Exception as e:
        print("\nError in metric calculation:", str(e))
        print("Example label:", all_labels[0])
        print("Example score:", all_scores[0])

    return total_loss / count, metrics_dict


best_auc = -1
print(f"Initiating {MODEL_NAME}, start training...")
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    for param_group in optimizer.param_groups:
        print(f"Current learning rate: {param_group['lr']}")

    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_metrics = evaluate(model, val_loader, criterion, device)
    
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}")
    print(f"Val Loss: {val_loss:.4f}")
    print(f"Val AUC: {val_metrics.get('auc', 0.0):.4f}")
    print(f"Val MRR: {val_metrics.get('mrr', 0.0):.4f}")
    print(f"Val NDCG@5: {val_metrics.get('ndcg@5', 0.0):.4f}")
    print(f"Val NDCG@10: {val_metrics.get('ndcg@10', 0.0):.4f}")
    
    if val_metrics.get('auc', 0.0) > best_auc:
        best_auc = val_metrics.get('auc', 0.0)
        torch.save(model.model.state_dict(), MODEL_WEIGHTS)
        print("\nBest model updated")

    scheduler.step(val_metrics.get('auc', 0.0))
    stop = early_stopping.step(val_metrics.get('auc', 0.0), model.model)
    if stop:
        print("\nEarly stopping triggered.")
        break

Initiating NRMSDocVec, start training...

Epoch 1/5
Current learning rate: 0.0001


Training: 100%|██████████| 1426/1426 [00:19<00:00, 72.86it/s, grad_stats=userencoder.newsencoder.layers.0.0.weight: grad_mean=0.000021 | userencoder.newsencoder.layers.0.0.bias: grad_mean=0.007871, loss=1.303622]  
Evaluating: 100%|██████████| 150/150 [00:01<00:00, 122.39it/s, loss=1.835554]
AUC: 100%|████████████████████████████████| 4779/4779 [00:03<00:00, 1466.56it/s]
AUC: 100%|██████████████████████████████| 4779/4779 [00:00<00:00, 101971.71it/s]
AUC: 100%|███████████████████████████████| 4779/4779 [00:00<00:00, 50258.20it/s]
AUC: 100%|███████████████████████████████| 4779/4779 [00:00<00:00, 51026.61it/s]



Epoch 1/5
Train Loss: 1.5339
Val Loss: 1.5150
Val AUC: 0.6218
Val MRR: 0.5721
Val NDCG@5: 0.6774
Val NDCG@10: 0.6774

Best model updated

Epoch 2/5
Current learning rate: 0.0001


Training: 100%|██████████| 1426/1426 [00:19<00:00, 73.35it/s, grad_stats=userencoder.newsencoder.layers.0.0.weight: grad_mean=0.000016 | userencoder.newsencoder.layers.0.0.bias: grad_mean=0.004080, loss=1.432175]  
Evaluating: 100%|██████████| 150/150 [00:01<00:00, 124.83it/s, loss=1.770083]
AUC: 100%|████████████████████████████████| 4779/4779 [00:03<00:00, 1483.51it/s]
AUC: 100%|██████████████████████████████| 4779/4779 [00:00<00:00, 102875.54it/s]
AUC: 100%|███████████████████████████████| 4779/4779 [00:00<00:00, 51178.91it/s]
AUC: 100%|███████████████████████████████| 4779/4779 [00:00<00:00, 48395.35it/s]



Epoch 2/5
Train Loss: 1.4609
Val Loss: 1.5057
Val AUC: 0.6252
Val MRR: 0.5776
Val NDCG@5: 0.6815
Val NDCG@10: 0.6815

Best model updated

Epoch 3/5
Current learning rate: 0.0001


Training: 100%|██████████| 1426/1426 [00:18<00:00, 75.42it/s, grad_stats=userencoder.newsencoder.layers.0.0.weight: grad_mean=-0.000026 | userencoder.newsencoder.layers.0.0.bias: grad_mean=-0.008782, loss=1.369875]
Evaluating: 100%|██████████| 150/150 [00:01<00:00, 126.94it/s, loss=1.845830]
AUC: 100%|████████████████████████████████| 4779/4779 [00:03<00:00, 1439.28it/s]
AUC: 100%|██████████████████████████████| 4779/4779 [00:00<00:00, 103526.42it/s]
AUC: 100%|███████████████████████████████| 4779/4779 [00:00<00:00, 46722.54it/s]
AUC: 100%|███████████████████████████████| 4779/4779 [00:00<00:00, 50189.99it/s]



Epoch 3/5
Train Loss: 1.4240
Val Loss: 1.5132
Val AUC: 0.6383
Val MRR: 0.5874
Val NDCG@5: 0.6892
Val NDCG@10: 0.6892

Best model updated

Epoch 4/5
Current learning rate: 0.0001


Training: 100%|██████████| 1426/1426 [00:19<00:00, 74.33it/s, grad_stats=userencoder.newsencoder.layers.0.0.weight: grad_mean=-0.000016 | userencoder.newsencoder.layers.0.0.bias: grad_mean=-0.006106, loss=1.296943]
Evaluating: 100%|██████████| 150/150 [00:01<00:00, 121.37it/s, loss=1.768713]
AUC: 100%|████████████████████████████████| 4779/4779 [00:03<00:00, 1471.84it/s]
AUC: 100%|██████████████████████████████| 4779/4779 [00:00<00:00, 103018.82it/s]
AUC: 100%|███████████████████████████████| 4779/4779 [00:00<00:00, 45150.13it/s]
AUC: 100%|███████████████████████████████| 4779/4779 [00:00<00:00, 50123.60it/s]



Epoch 4/5
Train Loss: 1.4010
Val Loss: 1.5273
Val AUC: 0.6354
Val MRR: 0.5848
Val NDCG@5: 0.6871
Val NDCG@10: 0.6871

Epoch 5/5
Current learning rate: 0.0001


Training: 100%|██████████| 1426/1426 [00:19<00:00, 74.14it/s, grad_stats=userencoder.newsencoder.layers.0.0.weight: grad_mean=0.000002 | userencoder.newsencoder.layers.0.0.bias: grad_mean=0.000471, loss=1.451776]  
Evaluating: 100%|██████████| 150/150 [00:02<00:00, 70.78it/s, loss=1.964568] 
AUC: 100%|████████████████████████████████| 4779/4779 [00:03<00:00, 1485.18it/s]
AUC: 100%|██████████████████████████████| 4779/4779 [00:00<00:00, 101850.46it/s]
AUC: 100%|███████████████████████████████| 4779/4779 [00:00<00:00, 50741.40it/s]
AUC: 100%|███████████████████████████████| 4779/4779 [00:00<00:00, 48152.83it/s]


Epoch 5/5
Train Loss: 1.3775
Val Loss: 1.5764
Val AUC: 0.6277
Val MRR: 0.5806
Val NDCG@5: 0.6838
Val NDCG@10: 0.6838





In [21]:
best_auc = -1

In [22]:
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA version:", torch.version.cuda)
    print("cuDNN version:", torch.backends.cudnn.version())
    print("GPU Name:", torch.cuda.get_device_name(0))


PyTorch version: 2.5.1+cu118
CUDA available: True
CUDA version: 11.8
cuDNN version: 90100
GPU Name: NVIDIA GeForce RTX 3060 Laptop GPU


In [23]:
print("Article mapping example:")
for i, (key, value) in enumerate(article_mapping.items()):
    print(f"Key: {key}, Value: {value[:5]}")  
    if i >= 5:
        break


Article mapping example:
Key: 3000022, Value: [-0.012159083038568497, 0.05709662660956383, 0.018299145624041557, -0.03888377919793129, -0.010862666182219982]
Key: 3000063, Value: [0.034481510519981384, 0.03353268280625343, 0.05459773540496826, -0.023162858560681343, 0.009086905978620052]
Key: 3000613, Value: [-0.014638329856097698, 0.030934402719140053, 0.036162927746772766, 0.039488889276981354, -0.030487006530165672]
Key: 3000700, Value: [-0.06416679173707962, 0.00485263392329216, 0.013270833529531956, -0.00036373414332047105, 0.001436168560758233]
Key: 3000840, Value: [-0.013040119782090187, 0.0245132464915514, 0.031050924211740494, 0.012360169552266598, -0.04919935017824173]
Key: 3001278, Value: [0.0048789093270897865, 0.01565060019493103, 0.046486884355545044, 0.05465223267674446, -0.056941013783216476]


In [24]:
print(f"loading model: {MODEL_WEIGHTS}")
model.model.load_state_dict(torch.load(MODEL_WEIGHTS, map_location=device))
model.model.eval()
model.scorer.eval()

loading model: E:\02456_Assignment\pytorch\test\Dataset\DUMP\state_dict\NRMSDocVec-2024-12-09_00-42-25\weights.pt


  model.model.load_state_dict(torch.load(MODEL_WEIGHTS, map_location=device))


NRMScorer(
  (userencoder): UserEncoder(
    (newsencoder): NewsEncoder(
      (layers): ModuleList(
        (0): Sequential(
          (0): Linear(in_features=768, out_features=512, bias=True)
          (1): ReLU()
          (2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (3): Dropout(p=0.2, inplace=False)
        )
        (1-2): 2 x Sequential(
          (0): Linear(in_features=512, out_features=512, bias=True)
          (1): ReLU()
          (2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (3): Dropout(p=0.2, inplace=False)
        )
      )
      (output_layer): Linear(in_features=512, out_features=256, bias=True)
    )
    (attention): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
    )
    (att_layer): AttLayer2()
  )
  (newsencoder): NewsEncoder(
    (layers): ModuleList(
      (0): Sequential(
        (0): Linear(i

### Test set
We'll use the validation set, as the test set.

In [25]:
print("Initiating testset...")
df_test = (
    ebnerd_from_path(
        PATH.joinpath(DATASPLIT, "validation"),
        history_size=HISTORY_SIZE,
        padding=0,
    )
    .sample(fraction=FRACTION_TEST)
    .with_columns([
        pl.col("article_ids_clicked").alias(DEFAULT_CLICKED_ARTICLES_COL),
        pl.col("article_ids_inview").alias(DEFAULT_INVIEW_ARTICLES_COL),
        pl.lit(False).alias(DEFAULT_IS_BEYOND_ACCURACY_COL)
    ])
    .select(COLUMNS + [DEFAULT_IS_BEYOND_ACCURACY_COL])
)

df_test = df_test.pipe(create_binary_labels_column)

Initiating testset...


In [26]:

print("Validating list lengths after padding...")
list_lengths = df_test[DEFAULT_INVIEW_ARTICLES_COL].list.len().unique()
print(f"Unique list lengths: {list_lengths}")

if len(list_lengths) == 1:
    print("Padding successful: All lists have the same length.")
else:
    print("Padding failed: Inconsistent list lengths detected.")


Validating list lengths after padding...
Unique list lengths: shape: (11,)
Series: 'article_ids_inview' [u32]
[
	5
	6
	7
	9
	10
	…
	12
	13
	14
	15
	27
]
Padding failed: Inconsistent list lengths detected.


In [27]:
@torch.no_grad()
def predict_scores(model_scorer, dataloader, device):
    model_scorer.eval()
    preds_all = []
    
    for (his_input_title, pred_input_title_one), _ in dataloader:

        if not isinstance(his_input_title, torch.Tensor):
            his_input_title = torch.from_numpy(his_input_title).to(dtype=torch.float32)
        elif his_input_title.dtype != torch.float32:
            his_input_title = his_input_title.to(dtype=torch.float32)
        his_input_title = his_input_title.to(device)

        if not isinstance(pred_input_title_one, torch.Tensor):
            pred_input_title_one = torch.from_numpy(pred_input_title_one).to(dtype=torch.float32)
        elif pred_input_title_one.dtype != torch.float32:
            pred_input_title_one = pred_input_title_one.to(dtype=torch.float32)
        pred_input_title_one = pred_input_title_one.to(device)

        scores = model_scorer(his_input_title, pred_input_title_one)
        preds_all.extend(scores.cpu().tolist())
    
    return np.array(preds_all, dtype=object)

In [None]:
print("Initiating testset...")
df_test = (
    ebnerd_from_path(
        PATH.joinpath(DATASPLIT, "validation"),
        history_size=HISTORY_SIZE,
        padding=0,
    )
    .sample(fraction=FRACTION_TEST)
    .with_columns([
        pl.col("article_ids_clicked").alias(DEFAULT_CLICKED_ARTICLES_COL),
        pl.col("article_ids_inview").alias(DEFAULT_INVIEW_ARTICLES_COL),
        pl.lit(False).alias(DEFAULT_IS_BEYOND_ACCURACY_COL)
    ])
    .select(COLUMNS + [DEFAULT_IS_BEYOND_ACCURACY_COL])
)

df_test = df_test.pipe(create_binary_labels_column)

print("Current PATH:", PATH)
print("Training data path:", PATH.joinpath(DATASPLIT, "train"))
print("Validation data path:", PATH.joinpath(DATASPLIT, "validation"))

test_path = PATH.joinpath(DATASPLIT, "validation")
print(f"Loading test data from: {test_path}")
print(f"Path exists: {test_path.exists()}")


df_test_chunks = split_df_chunks(df_test, n_chunks=N_CHUNKS_TEST)
df_pred_chunks = []

print("Processing test chunks...")
for i, df_test_chunk in enumerate(df_test_chunks[CHUNKS_DONE:], start=1 + CHUNKS_DONE):
    print(f"Processing chunk {i}/{len(df_test_chunks)}")
    test_dataloader = NRMSDataLoader(
        behaviors=df_test_chunk,
        article_dict=article_mapping,
        unknown_representation="zeros",
        history_column=DEFAULT_HISTORY_ARTICLE_ID_COL,
        eval_mode=True,
        batch_size=BATCH_SIZE_TEST_WO_B,
    )
    test_loader = torch.utils.data.DataLoader(test_dataloader, batch_size=None, shuffle=False)
    

    try:
        scores = predict_scores(model.scorer, test_loader, device)
        print(f"Scores shape: {scores.shape}")
    except Exception as e:
        print(f"Error in predict_scores: {str(e)}")
        first_batch = next(iter(test_loader))
        print(f"First batch types: {[type(x) for x in first_batch[0]]}")
        print(f"First batch dtypes: {[x.dtype if isinstance(x, torch.Tensor) else None for x in first_batch[0]]}")
        raise e
    
    df_test_chunk = add_prediction_scores(df_test_chunk, scores.tolist())
    df_test_chunk = df_test_chunk.with_columns([
        pl.col("scores")
        .map_elements(lambda x: list(rank_predictions_by_score(x)), return_dtype=pl.List(pl.Float32))
        .alias("ranked_scores")
    ])
    
    df_test_chunk.select(DEFAULT_IMPRESSION_ID_COL, "ranked_scores").write_parquet(
        TEST_CHUNKS_DIR.joinpath(f"pred_{i}.parquet")
    )
    df_pred_chunks.append(df_test_chunk)
    del df_test_chunk, test_dataloader, scores
    gc.collect()

Initiating testset...
Current PATH: E:\02456_Assignment\pytorch\test\Dataset
Training data path: E:\02456_Assignment\pytorch\test\Dataset\ebnerd_demo\train
Validation data path: E:\02456_Assignment\pytorch\test\Dataset\ebnerd_demo\validation
Loading test data from: E:\02456_Assignment\pytorch\test\Dataset\ebnerd_demo\validation
Path exists: True
Processing test chunks...
Processing chunk 1/1
Scores shape: (309,)


In [29]:

for chunk in df_test_chunks:
    print(chunk[DEFAULT_INVIEW_ARTICLES_COL].list.len().value_counts())


shape: (15, 2)
┌────────────────────┬───────┐
│ article_ids_inview ┆ count │
│ ---                ┆ ---   │
│ u32                ┆ u32   │
╞════════════════════╪═══════╡
│ 33                 ┆ 1     │
│ 14                 ┆ 1     │
│ 15                 ┆ 2     │
│ 11                 ┆ 3     │
│ 16                 ┆ 1     │
│ …                  ┆ …     │
│ 13                 ┆ 2     │
│ 23                 ┆ 1     │
│ 17                 ┆ 1     │
│ 8                  ┆ 2     │
│ 10                 ┆ 2     │
└────────────────────┴───────┘


In [30]:

print("Examining test chunk data structure...")
sample_chunk = df_test_chunks[0]  
print("Sample test chunk head:")
print(sample_chunk.head(5))
print("\nColumn types:")
print(sample_chunk.dtypes)
print("\nSample inview articles:")
print(sample_chunk[DEFAULT_INVIEW_ARTICLES_COL].head())

print("\nProcessing test chunks...")
for i, df_test_chunk in enumerate(df_test_chunks[CHUNKS_DONE:], start=1 + CHUNKS_DONE):
    print(f"\nProcessing chunk {i}/{len(df_test_chunks)}")
    print(f"Chunk shape: {df_test_chunk.shape}")
    
    test_dataloader = NRMSDataLoader(
        behaviors=df_test_chunk,
        article_dict=article_mapping,
        unknown_representation="zeros",
        history_column=DEFAULT_HISTORY_ARTICLE_ID_COL,
        eval_mode=True,
        batch_size=BATCH_SIZE_TEST_WO_B,
    )
    test_loader = torch.utils.data.DataLoader(test_dataloader, batch_size=None, shuffle=False)
    
    try:
        scores = predict_scores(model.scorer, test_loader, device)
        print(f"Scores shape: {scores.shape}")
    except Exception as e:
        print(f"Error in predict_scores: {str(e)}")
        first_batch = next(iter(test_loader))
        print(f"First batch types: {[type(x) for x in first_batch[0]]}")
        print(f"First batch dtypes: {[x.dtype if isinstance(x, torch.Tensor) else None for x in first_batch[0]]}")
        raise e
    
    df_test_chunk = add_prediction_scores(df_test_chunk, scores.tolist())
    df_test_chunk = df_test_chunk.with_columns([
        pl.col("scores")
        .map_elements(lambda x: list(rank_predictions_by_score(x)), return_dtype=pl.List(pl.Float32))
        .alias("ranked_scores")
    ])
    
    df_test_chunk.select(DEFAULT_IMPRESSION_ID_COL, "ranked_scores").write_parquet(
        TEST_CHUNKS_DIR.joinpath(f"pred_{i}.parquet")
    )
    df_pred_chunks.append(df_test_chunk)
    del df_test_chunk, test_dataloader, scores
    gc.collect()

Examining test chunk data structure...
Sample test chunk head:
shape: (5, 8)
┌────────────┬────────────┬────────────┬────────────┬────────────┬─────────┬───────────┬───────────┐
│ impression ┆ article_id ┆ article_id ┆ article_id ┆ impression ┆ user_id ┆ is_beyond ┆ labels    │
│ _time      ┆ _fixed     ┆ s_inview   ┆ s_clicked  ┆ _id        ┆ ---     ┆ _accuracy ┆ ---       │
│ ---        ┆ ---        ┆ ---        ┆ ---        ┆ ---        ┆ u32     ┆ ---       ┆ list[i8]  │
│ datetime[μ ┆ list[i32]  ┆ list[i32]  ┆ list[i32]  ┆ u32        ┆         ┆ bool      ┆           │
│ s]         ┆            ┆            ┆            ┆            ┆         ┆           ┆           │
╞════════════╪════════════╪════════════╪════════════╪════════════╪═════════╪═══════════╪═══════════╡
│ 2023-05-25 ┆ [9778302,  ┆ [9780561,  ┆ [9780428]  ┆ 269627573  ┆ 626652  ┆ false     ┆ [0, 0, …  │
│ 11:12:45   ┆ 9777582, … ┆ 8934043, … ┆            ┆            ┆         ┆           ┆ 0]        │
│            ┆

In [34]:

print(f"\nChecking test data distribution...")
print("Unique values in DEFAULT_CLICKED_ARTICLES_COL:")
print(df_test[DEFAULT_CLICKED_ARTICLES_COL].unique())
print("Unique values in DEFAULT_INVIEW_ARTICLES_COL:")
print(df_test[DEFAULT_INVIEW_ARTICLES_COL].unique())


print("\nProcessing test chunks...")
df_pred_chunks = []

with tqdm(total=len(df_test_chunks), desc="Processing chunks", leave=True) as pbar:
    for i, df_test_chunk in enumerate(df_test_chunks[CHUNKS_DONE:], start=1 + CHUNKS_DONE):
        test_dataloader = NRMSDataLoader(
            behaviors=df_test_chunk,
            article_dict=article_mapping,
            unknown_representation="zeros",
            history_column=DEFAULT_HISTORY_ARTICLE_ID_COL,
            eval_mode=True,
            batch_size=BATCH_SIZE_TEST_WO_B,
        )
        test_loader = torch.utils.data.DataLoader(test_dataloader, batch_size=None, shuffle=False)
        scores = predict_scores(model.scorer, test_loader, device)
        

        df_test_chunk = add_prediction_scores(df_test_chunk, scores.tolist())
        df_test_chunk = df_test_chunk.with_columns([
            pl.col("scores")
            .map_elements(lambda x: list(rank_predictions_by_score(x)), return_dtype=pl.List(pl.Float32))
            .alias("ranked_scores")
        ])
        
        df_pred_chunks.append(df_test_chunk)
        pbar.update(1)
        

        del test_dataloader, scores
        gc.collect()


print("\nMerging prediction results...")
df_test = pl.concat(df_pred_chunks)

print("Saving final predictions...")
df_test.select(DEFAULT_IMPRESSION_ID_COL, "ranked_scores").write_parquet(
    ARTIFACT_DIR.joinpath("test_predictions.parquet")
)


print("\nEvaluating test set predictions...")
with tqdm(total=3, desc="Computing metrics", leave=True) as pbar:

    labels = [np.array(label, dtype=np.float32) for label in df_test["labels"].to_list()]
    scores = [np.array(score, dtype=np.float32) for score in df_test["scores"].to_list()]
    pbar.update(1)
    

    test_metrics = MetricEvaluator(
        labels=labels,
        predictions=scores,
        metric_functions=[AucScore(), MrrScore(), NdcgScore(k=5), NdcgScore(k=10)]
    )
    test_results = test_metrics.evaluate()
    pbar.update(1)
    

    write_json_file(test_results.evaluations, ARTIFACT_DIR.joinpath("test_metrics.json"))
    pbar.update(1)

print("\nTest Set Results:")
print(f"AUC: {test_results.evaluations['auc']:.4f}")
print(f"MRR: {test_results.evaluations['mrr']:.4f}")
print(f"NDCG@5: {test_results.evaluations['ndcg@5']:.4f}")
print(f"NDCG@10: {test_results.evaluations['ndcg@10']:.4f}")


if TEST_CHUNKS_DIR.exists() and TEST_CHUNKS_DIR.is_dir():
    shutil.rmtree(TEST_CHUNKS_DIR)

df_test = df_test.sort(DEFAULT_IMPRESSION_ID_COL)    
write_submission_file(
    impression_ids=df_test[DEFAULT_IMPRESSION_ID_COL],
    prediction_scores=df_test["ranked_scores"],
    path=ARTIFACT_DIR.joinpath("predictions.txt"),
    filename_zip=f"{MODEL_NAME}-{SEED}-{DATASPLIT}.zip",
)


Checking test data distribution...
Unique values in DEFAULT_CLICKED_ARTICLES_COL:
shape: (25,)
Series: 'article_ids_clicked' [list[i32]]
[
	[9783720, 9783720]
	[9782407]
	[9777822]
	[9788116]
	[9778796]
	…
	[9781257]
	[9780925]
	[9789745]
	[9788666]
	[9780702]
]
Unique values in DEFAULT_INVIEW_ARTICLES_COL:
shape: (25,)
Series: 'article_ids_inview' [list[i32]]
[
	[9785107, 9784947, … 9785267]
	[9783137, 9783278, … 9782517]
	[9785339, 9782869, … 9783740]
	[9782879, 9782672, … 9782616]
	[9781932, 9786763, … 9786906]
	…
	[9788116, 9789141, … 9789433]
	[9776322, 9778796, … 9780476]
	[9784607, 9784559, … 9783852]
	[9779659, 9781870, … 9781878]
	[9789279, 9788947, … 9771224]
]

Processing test chunks...


Processing chunks: 100%|██████████| 1/1 [00:16<00:00, 16.50s/it]



Merging prediction results...
Saving final predictions...

Evaluating test set predictions...


AUC: 100%|█████████████████████████████████████| 25/25 [00:00<00:00, 793.82it/s]
AUC: 100%|███████████████████████████████████| 25/25 [00:00<00:00, 25007.77it/s]
AUC: 100%|███████████████████████████████████| 25/25 [00:00<00:00, 24989.90it/s]
AUC: 100%|███████████████████████████████████| 25/25 [00:00<00:00, 25055.58it/s]
Computing metrics: 100%|██████████| 3/3 [00:00<00:00, 54.63it/s]



Test Set Results:
AUC: 0.6618
MRR: 0.3706
NDCG@5: 0.3896
NDCG@10: 0.5042


25it [00:00, 25019.71it/s]

Zipping E:\02456_Assignment\pytorch\test\Dataset\DUMP\test_predictions\NRMSDocVec-2024-12-09_00-42-25\predictions.txt to E:\02456_Assignment\pytorch\test\Dataset\DUMP\test_predictions\NRMSDocVec-2024-12-09_00-42-25\NRMSDocVec-None-ebnerd_demo.zip





In [None]:
write_submission_file(
    impression_ids=df_test[DEFAULT_IMPRESSION_ID_COL],
    prediction_scores=df_test["ranked_scores"],
    path=ARTIFACT_DIR.joinpath("predictions.txt"),
    filename_zip=f"{MODEL_NAME}-{SEED}-{DATASPLIT}.zip",
)

13536it [00:00, 170011.35it/s]


Zipping E:\desktop\test\Dataset\DUMP\test_predictions\NRMSDocVec-2024-12-07_21-49-03\predictions.txt to E:\desktop\test\Dataset\DUMP\test_predictions\NRMSDocVec-2024-12-07_21-49-03\NRMSDocVec-None-ebnerd_demo.zip


# DONE 🚀