# Getting started

In this notebook, we illustrate how to use the Neural News Recommendation with Multi-Head Self-Attention ([NRMS](https://aclanthology.org/D19-1671/)). The implementation is taken from the [recommenders](https://github.com/recommenders-team/recommenders) repository. We have simply stripped the model to keep it cleaner.

We use a small dataset, which is downloaded from [recsys.eb.dk](https://recsys.eb.dk/). All the datasets are stored in the folder path ```~/ebnerd_data/*```.

In [27]:
import sys
sys.path.append('C:/Users/janle/Desktop/Master_local/3/Deep Learning/deep-learning/Temporal_external/src')  # Add the parent directory to sys.path

## Load functionality

In [28]:
from transformers import AutoTokenizer, AutoModel
from pathlib import Path
import tensorflow as tf
import polars as pl
import datetime
from typing import List, Dict, Any, Tuple, Optional, Union
from datetime import datetime, timedelta
import numpy as np

from ebrec.utils._constants import *

from ebrec.utils._behaviors import (
    create_binary_labels_column,
    sampling_strategy_wu2019,
    add_prediction_scores,
    truncate_history,
    ebnerd_from_path,
)
from ebrec.evaluation import MetricEvaluator, AucScore, NdcgScore, MrrScore
from ebrec.utils._articles import convert_text2encoding_with_transformers
from ebrec.utils._polars import concat_str_columns, slice_join_dataframes
from ebrec.utils._articles import create_article_id_to_value_mapping
from ebrec.utils._nlp import get_transformers_word_embeddings
from ebrec.utils._python import write_submission_file, rank_predictions_by_score

from ebrec.models.newsrec.dataloader import NewsrecDataLoader, NRMSTemporalDataLoader
from ebrec.models.newsrec.model_config import hparams_nrms
from ebrec.models.newsrec import NRMSTemporalModel

In [29]:
# List all physical devices
gpus = tf.config.experimental.list_physical_devices("GPU")
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

physical_devices = tf.config.list_physical_devices()
print("Available devices:", physical_devices)

Available devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


## Load dataset

### Generate labels
We sample a few just to get started. For testset we just make up a dummy column with 0 and 1 - this is not the true labels.

In [30]:
PATH = Path("C:/Users/janle/Desktop/Master_local/Data_storage/Deep_learning/ebnerd_data")
#
DATASPLIT = "ebnerd_small"
DUMP_DIR = Path.joinpath(PATH,"ebnerd_predictions")
DUMP_DIR.mkdir(exist_ok=True, parents=True)

In [31]:
DUMP_DIR

WindowsPath('C:/Users/janle/Desktop/Master_local/Data_storage/Deep_learning/ebnerd_data/ebnerd_predictions')

History size can often be a memory bottleneck; if adjusted, the NRMS hyperparameter ```history_size``` must be updated to ensure compatibility and efficient memory usage

In [32]:
HISTORY_SIZE = 20
hparams_nrms.history_size = HISTORY_SIZE

In [33]:
# We just want to load the necessary columns
COLUMNS = [
    DEFAULT_USER_COL,
    DEFAULT_IMPRESSION_ID_COL,
    DEFAULT_IMPRESSION_TIMESTAMP_COL,
    DEFAULT_HISTORY_ARTICLE_ID_COL,
    DEFAULT_CLICKED_ARTICLES_COL,
    DEFAULT_INVIEW_ARTICLES_COL,
]
# This notebook is just a simple 'get-started'; we down sample the number of samples to just run quickly through it.
FRACTION = 0.1

In this example we sample the dataset, just to keep it smaller. We'll split the training data into training and validation 

In [34]:
df = (
    ebnerd_from_path(
        PATH.joinpath(DATASPLIT, "train"),
        history_size=HISTORY_SIZE,
        padding=0,
    )
    .select(COLUMNS)
    .pipe(
        sampling_strategy_wu2019,
        npratio=4,
        shuffle=True,
        with_replacement=True,
        seed=123,
    )
    .pipe(create_binary_labels_column)
    .sample(fraction=FRACTION)
)

dt_split = pl.col(DEFAULT_IMPRESSION_TIMESTAMP_COL).max() - timedelta(days=1)
df_train = df.filter(pl.col(DEFAULT_IMPRESSION_TIMESTAMP_COL) < dt_split)
df_validation = df.filter(pl.col(DEFAULT_IMPRESSION_TIMESTAMP_COL) >= dt_split)

print(f"Train samples: {df_train.height}\nValidation samples: {df_validation.height}")
df_train.head(2)

Train samples: 20147
Validation samples: 3280


user_id,impression_id,impression_time,article_id_fixed,article_ids_clicked,article_ids_inview,labels
u32,u32,datetime[μs],list[i32],list[i64],list[i64],list[i8]
568703,269413707,2023-05-20 07:53:39,"[9768377, 9768387, … 9769893]",[9772601],"[9773306, 9773306, … 9772629]","[0, 0, … 0]"
1333156,328878737,2023-05-21 06:22:19,"[9770178, 9770328, … 9769457]",[8054212],"[9569756, 9773857, … 9569756]","[0, 0, … 0]"


### Test set
We'll use the validation set, as the test set.

In [35]:
df_test = (
    ebnerd_from_path(
        PATH.joinpath(DATASPLIT, "validation"),
        history_size=HISTORY_SIZE,
        padding=0,
    )
    .select(COLUMNS)
    .pipe(create_binary_labels_column)
    .sample(fraction=FRACTION)
)

## Load articles

In [36]:
df_articles = pl.read_parquet(PATH.joinpath(DATASPLIT+"/articles.parquet"))
df_articles.head(2)

article_id,title,subtitle,last_modified_time,premium,body,published_time,image_ids,article_type,url,ner_clusters,entity_groups,topics,category,subcategory,category_str,total_inviews,total_pageviews,total_read_time,sentiment_score,sentiment_label
i32,str,str,datetime[μs],bool,str,datetime[μs],list[i64],str,str,list[str],list[str],list[str],i16,list[i16],str,i32,i32,f32,f32,str
3001353,"""Natascha var ikke den første""","""Politiet frygter nu, at Natasc…",2023-06-29 06:20:33,False,"""Sagen om den østriske Natascha…",2006-08-31 08:06:45,[3150850],"""article_default""","""https://ekstrabladet.dk/krimi/…",[],[],"[""Kriminalitet"", ""Personfarlig kriminalitet""]",140,[],"""krimi""",,,,0.9955,"""Negative"""
3003065,"""Kun Star Wars tjente mere""","""Biografgængerne strømmer ind f…",2023-06-29 06:20:35,False,"""Vatikanet har opfordret til at…",2006-05-21 16:57:00,[3006712],"""article_default""","""https://ekstrabladet.dk/underh…",[],[],"[""Underholdning"", ""Film og tv"", ""Økonomi""]",414,"[433, 434]","""underholdning""",,,,0.846,"""Positive"""


In [37]:
# Prepare temporal features

def create_article_time_dict(df_articles: pl.DataFrame) -> Dict[int, datetime]:
    """Create lookup dictionary for article publishing times"""
    return dict(zip(
        df_articles["article_id"].to_list(),
        df_articles["published_time"].to_list()
    ))
article_time_dict = create_article_time_dict(df_articles)

def prepare_temporal_features(
    df: pl.DataFrame,
    article_time_dict: Dict[int, datetime],
    inview_col: str
) -> pl.DataFrame:
    """Add temporal features using vectorized operations"""

    inview_time_col = f"published_time_{inview_col}"

    return df.with_columns([
        pl.col(inview_col).map_elements(
            lambda ids: [article_time_dict.get(id) for id in ids],
            return_dtype=pl.List(pl.Datetime)
        ).alias(inview_time_col)
    ])


In [38]:

# Add temporal features
df_train = prepare_temporal_features(
    df_train,
    article_time_dict,
    DEFAULT_INVIEW_ARTICLES_COL
)

df_validation = prepare_temporal_features(
    df_validation,
    article_time_dict,
    DEFAULT_INVIEW_ARTICLES_COL
)

df_test = prepare_temporal_features(
    df_test,
    article_time_dict,
    DEFAULT_INVIEW_ARTICLES_COL
)   


In [39]:

def compute_temporal_differences(
    df: pl.DataFrame,
    inview_time_col: str
) -> pl.DataFrame:
    """Compute time differences and exponential discounts"""

    # Add reference date (latest date from inview articles)
    df = df.with_columns(
        pl.col(inview_time_col)
        .map_elements(
            lambda dates: max((d for d in dates if d), default=None),
            return_dtype=pl.Datetime
        )
        .alias("reference_date")
    )
    
    return df
def calculate_time_difference_seconds(
    timestamps: List[Optional[datetime]], 
    reference_time: datetime
) -> List[Optional[float]]:
    """
    Calculate the time difference in seconds between a list of timestamps and a reference time.
    
    Args:
        timestamps: List of timestamps to compare (can contain None)
        reference_time: The reference timestamp to compare against
        
    Returns:
        List of time differences in seconds or None if timestamp is None
    """
    return [(reference_time - timestamp).total_seconds() if timestamp else None for timestamp in timestamps]

def add_time_difference_column(
    df: pl.DataFrame,
    timestamp_column: str,
    reference_time_column: str,
    output_column: str
) -> pl.DataFrame:
    """
    Add a column with time differences in seconds between lists of timestamps and a reference time.
    
    Args:
        df: Input Polars DataFrame
        timestamp_column: Name of column containing lists of timestamps
        reference_time_column: Name of column containing the reference time
        output_column: Name of output column
        
    Returns:
        DataFrame with added time difference column
    """
    df = df.with_columns([
        pl.struct([timestamp_column, reference_time_column]).map_elements(
            lambda row: calculate_time_difference_seconds(row[timestamp_column], row[reference_time_column]),
            return_dtype=pl.List(pl.Float64)
        ).alias(output_column)
    ])

    return df
def compute_exponential_discount(deltas: List[Optional[float]]) -> List[Optional[float]]:
    """
    Compute exponential discount based on time deltas.
    
    Args:
        deltas: List of time deltas in seconds
        
    Returns:
        List of discounts
    """
    
    max_delta = max((d for d in deltas if d is not None), default=1)
    max_delta = max(1, max_delta)  # Ensure max_delta is at least 1 to avoid division by zero
    
    return [np.exp(-d / (max_delta*4)) if d is not None else None for d in deltas]

def add_discount_column(
    df: pl.DataFrame,
    time_delta_column: str,
    output_column: str
) -> pl.DataFrame:
    """
    Add a column with exponential discounts based on time deltas.
    
    Args:
        df: Input Polars DataFrame
        time_delta_column: Name of column containing lists of time deltas
        output_column: Name of output column
        
    Returns:
        DataFrame with added discount column
    """
    df = df.with_columns([
        pl.col(time_delta_column).map_elements(
            compute_exponential_discount,
            return_dtype=pl.List(pl.Float64)
        ).alias(output_column)
    ])

    return df

In [40]:

df_train = compute_temporal_differences(
    df_train,
    f"published_time_{DEFAULT_INVIEW_ARTICLES_COL}"
)

df_validation = compute_temporal_differences(
    df_validation,
    f"published_time_{DEFAULT_INVIEW_ARTICLES_COL}"
)

df_test = compute_temporal_differences(
    df_test,
    f"published_time_{DEFAULT_INVIEW_ARTICLES_COL}"
)

df_train = add_time_difference_column(
    df_train,
    f"published_time_{DEFAULT_INVIEW_ARTICLES_COL}",
    "reference_date", 
    "time_delta"
)

df_validation = add_time_difference_column(
    df_validation,
    f"published_time_{DEFAULT_INVIEW_ARTICLES_COL}",
    "reference_date", 
    "time_delta"
)

df_test = add_time_difference_column(
    df_test,
    f"published_time_{DEFAULT_INVIEW_ARTICLES_COL}",
    "reference_date", 
    "time_delta"
)

df_train = add_discount_column(
    df_train,
    "time_delta",
    "discount_time_delta"
)

df_validation = add_discount_column(
    df_validation,
    "time_delta",
    "discount_time_delta"
)

df_test = add_discount_column(
    df_test,
    "time_delta",
    "discount_time_delta"
)


df_train.head(2)

user_id,impression_id,impression_time,article_id_fixed,article_ids_clicked,article_ids_inview,labels,published_time_article_ids_inview,reference_date,time_delta,discount_time_delta
u32,u32,datetime[μs],list[i32],list[i64],list[i64],list[i8],list[datetime[μs]],datetime[μs],list[f64],list[f64]
568703,269413707,2023-05-20 07:53:39,"[9768377, 9768387, … 9769893]",[9772601],"[9773306, 9773306, … 9772629]","[0, 0, … 0]","[2023-05-20 07:41:49, 2023-05-20 07:41:49, … 2023-05-20 06:10:34]",2023-05-20 07:41:49,"[0.0, 0.0, … 5475.0]","[1.0, 1.0, … 0.894444]"
1333156,328878737,2023-05-21 06:22:19,"[9770178, 9770328, … 9769457]",[8054212],"[9569756, 9773857, … 9569756]","[0, 0, … 0]","[2023-01-09 06:17:27, 2023-05-21 04:08:55, … 2023-01-09 06:17:27]",2023-05-21 04:08:55,"[1.1397088e7, 0.0, … 1.1397088e7]","[0.970995, 1.0, … 0.970995]"


## Init model using HuggingFace's tokenizer and wordembedding
In the original implementation, they use the GloVe embeddings and tokenizer. To get going fast, we'll use a multilingual LLM from Hugging Face. 
Utilizing the tokenizer to tokenize the articles and the word-embedding to init NRMS.


In [41]:
TRANSFORMER_MODEL_NAME = "FacebookAI/xlm-roberta-base"
TEXT_COLUMNS_TO_USE = [DEFAULT_SUBTITLE_COL, DEFAULT_TITLE_COL]
MAX_TITLE_LENGTH = 30

# LOAD HUGGINGFACE:
transformer_model = AutoModel.from_pretrained(TRANSFORMER_MODEL_NAME)
transformer_tokenizer = AutoTokenizer.from_pretrained(TRANSFORMER_MODEL_NAME)

# We'll init the word embeddings using the
word2vec_embedding = get_transformers_word_embeddings(transformer_model)
#
df_articles, cat_cal = concat_str_columns(df_articles, columns=TEXT_COLUMNS_TO_USE)
df_articles, token_col_title = convert_text2encoding_with_transformers(
    df_articles, transformer_tokenizer, cat_cal, max_length=MAX_TITLE_LENGTH
)
# =>
article_mapping = create_article_id_to_value_mapping(
    df=df_articles, value_col=token_col_title
)

# Initiate the dataloaders
In the implementations we have disconnected the models and data. Hence, you should built a dataloader that fits your needs.

Note, with this ```NRMSDataLoader``` the ```eval_mode=False``` is meant for ```model.model.fit()``` whereas ```eval_mode=True``` is meant for ```model.scorer.predict()```. 

In [42]:
from dataclasses import dataclass, field
import tensorflow as tf
import polars as pl
import numpy as np

from ebrec.utils._articles_behaviors import map_list_article_id_to_value
from ebrec.utils._python import (
    repeat_by_list_values_from_matrix,
    create_lookup_objects,
)

from ebrec.utils._constants import (
    DEFAULT_INVIEW_ARTICLES_COL,
    DEFAULT_LABELS_COL,
    DEFAULT_USER_COL,
)


In [43]:
BATCH_SIZE = 16

train_dataloader = NRMSTemporalDataLoader(
    behaviors=df_train,
    article_dict=article_mapping,
    unknown_representation="zeros",
    history_column=DEFAULT_HISTORY_ARTICLE_ID_COL,
    eval_mode=False,
    batch_size=BATCH_SIZE,
)
val_dataloader = NRMSTemporalDataLoader(
    behaviors=df_validation,
    article_dict=article_mapping,
    unknown_representation="zeros",
    history_column=DEFAULT_HISTORY_ARTICLE_ID_COL,
    eval_mode=False,
    batch_size=BATCH_SIZE,
)

## Train the model


In [44]:
# List all physical devices
physical_devices = tf.config.list_physical_devices()
print("Available devices:", physical_devices)

Available devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


Initiate the NRMS-model:

In [45]:
model = NRMSTemporalModel(
    hparams=hparams_nrms,
    word2vec_embedding=word2vec_embedding,
    seed=42,
)
model.model.compile(
    optimizer=model.model.optimizer,
    loss=model.model.loss,
    metrics=["AUC"],
)

MODEL_NAME = model.__class__.__name__
MODEL_WEIGHTS = DUMP_DIR.joinpath(f"state_dict/{MODEL_NAME}/mini.weights.h5")
LOG_DIR = DUMP_DIR.joinpath(f"runs/{MODEL_NAME}")
print(MODEL_WEIGHTS)
### Callbacks
#We will add some callbacks to model training.
# Tensorboard:
#tensorboard_callback = tf.keras.callbacks.TensorBoard(
#    log_dir=LOG_DIR,
#    histogram_freq=1,
#)

# Earlystopping:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_AUC",
    mode="max",
    patience=3,
    restore_best_weights=True,
)

# ModelCheckpoint:
modelcheckpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=MODEL_WEIGHTS,
    monitor="val_AUC",
    mode="max",
    save_best_only=False,
    save_weights_only=True,
    verbose=1,
)

# Learning rate scheduler:
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_AUC",
    mode="max",
    factor=0.2,
    patience=2,
    min_lr=1e-6,
)

callbacks = [early_stopping, modelcheckpoint, lr_scheduler]#tensorboard_callback
USE_CALLBACKS = True
EPOCHS = 4

hist = model.model.fit(
    train_dataloader,
    validation_data=val_dataloader,
    epochs=EPOCHS,
    callbacks=callbacks if USE_CALLBACKS else [],
)

C:\Users\janle\Desktop\Master_local\Data_storage\Deep_learning\ebnerd_data\ebnerd_predictions\state_dict\NRMSTemporalModel\mini.weights.h5
Epoch 1/4


  self._warn_if_super_not_called()


[1m1260/1260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - AUC: 0.5212 - loss: 2.4627
Epoch 1: saving model to C:\Users\janle\Desktop\Master_local\Data_storage\Deep_learning\ebnerd_data\ebnerd_predictions\state_dict\NRMSTemporalModel\mini.weights.h5
[1m1260/1260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2779s[0m 2s/step - AUC: 0.5213 - loss: 2.4624 - val_AUC: 0.5536 - val_loss: 1.7266 - learning_rate: 1.0000e-04
Epoch 2/4
[1m1260/1260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - AUC: 0.6022 - loss: 1.5868
Epoch 2: saving model to C:\Users\janle\Desktop\Master_local\Data_storage\Deep_learning\ebnerd_data\ebnerd_predictions\state_dict\NRMSTemporalModel\mini.weights.h5
[1m1260/1260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2263s[0m 2s/step - AUC: 0.6022 - loss: 1.5868 - val_AUC: 0.5473 - val_loss: 1.8412 - learning_rate: 1.0000e-04
Epoch 3/4
[1m1260/1260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - AUC: 0.6534 - loss

In [46]:
if USE_CALLBACKS:
    _ = model.model.load_weights(filepath=MODEL_WEIGHTS)

# Example how to compute some metrics:

In [47]:
BATCH_SIZE_TEST = 16

test_dataloader = NRMSTemporalDataLoader(
    behaviors=df_test,
    article_dict=article_mapping,
    unknown_representation="zeros",
    history_column=DEFAULT_HISTORY_ARTICLE_ID_COL,
    eval_mode=True,
    batch_size=BATCH_SIZE_TEST,
)

In [48]:
pred_test = model.scorer.predict(test_dataloader)



[1m1529/1529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2973s[0m 2s/step


## Add the predictions to the dataframe

In [49]:
df_test = add_prediction_scores(df_test, pred_test)
df_test.head(2)

user_id,impression_id,impression_time,article_id_fixed,article_ids_clicked,article_ids_inview,labels,published_time_article_ids_inview,reference_date,time_delta,discount_time_delta,scores
u32,u32,datetime[μs],list[i32],list[i32],list[i32],list[i8],list[datetime[μs]],datetime[μs],list[f64],list[f64],list[f32]
2402163,215159974,2023-05-27 03:07:54,"[9779489, 9779538, … 9779867]",[9783314],"[9779713, 9783314, … 9193263]","[0, 1, … 0]","[2023-05-25 05:51:21, 2023-05-26 19:11:34, … 2022-03-29 12:34:54]",2023-05-26 19:11:34,"[134413.0, 0.0, … 3.6571e7]","[0.999082, 1.0, … 0.778801]","[0.723931, 0.896529, … 0.563379]"
648257,553921105,2023-05-30 12:23:38,"[9768997, 9762135, … 9776259]",[9788352],"[9788352, 9787769, … 9022428]","[1, 0, … 0]","[2023-05-30 11:47:16, 2023-05-30 11:23:40, … 2021-12-04 13:30:35]",2023-05-30 12:07:25,"[1209.0, 2625.0, … 4.682381e7]","[0.999994, 0.999986, … 0.778801]","[0.842986, 0.813382, … 0.519041]"


### Compute metrics

In [50]:
metrics = MetricEvaluator(
    labels=df_test["labels"].to_list(),
    predictions=df_test["scores"].to_list(),
    metric_functions=[AucScore(), MrrScore(), NdcgScore(k=5), NdcgScore(k=10)],
)
metrics.evaluate()

AUC: 100%|██████████████████████████████| 24464/24464 [00:16<00:00, 1496.89it/s]
AUC: 100%|█████████████████████████████| 24464/24464 [00:00<00:00, 74557.82it/s]
AUC: 100%|█████████████████████████████| 24464/24464 [00:00<00:00, 35286.09it/s]
AUC: 100%|█████████████████████████████| 24464/24464 [00:00<00:00, 35064.30it/s]


<MetricEvaluator class>: 
 {
    "auc": 0.5498668756035231,
    "mrr": 0.3421531055464994,
    "ndcg@5": 0.3820940871150994,
    "ndcg@10": 0.4606448696714717
}

## Make submission file

In [51]:
df_test = df_test.with_columns(
    pl.col("scores")
    .map_elements(lambda x: list(rank_predictions_by_score(x)))
    .alias("ranked_scores")
)
df_test.head(2)

  df_test = df_test.with_columns(


user_id,impression_id,impression_time,article_id_fixed,article_ids_clicked,article_ids_inview,labels,published_time_article_ids_inview,reference_date,time_delta,discount_time_delta,scores,ranked_scores
u32,u32,datetime[μs],list[i32],list[i32],list[i32],list[i8],list[datetime[μs]],datetime[μs],list[f64],list[f64],list[f32],list[i64]
2402163,215159974,2023-05-27 03:07:54,"[9779489, 9779538, … 9779867]",[9783314],"[9779713, 9783314, … 9193263]","[0, 1, … 0]","[2023-05-25 05:51:21, 2023-05-26 19:11:34, … 2022-03-29 12:34:54]",2023-05-26 19:11:34,"[134413.0, 0.0, … 3.6571e7]","[0.999082, 1.0, … 0.778801]","[0.723931, 0.896529, … 0.563379]","[2, 1, … 4]"
648257,553921105,2023-05-30 12:23:38,"[9768997, 9762135, … 9776259]",[9788352],"[9788352, 9787769, … 9022428]","[1, 0, … 0]","[2023-05-30 11:47:16, 2023-05-30 11:23:40, … 2021-12-04 13:30:35]",2023-05-30 12:07:25,"[1209.0, 2625.0, … 4.682381e7]","[0.999994, 0.999986, … 0.778801]","[0.842986, 0.813382, … 0.519041]","[1, 3, … 5]"


This is using the validation, simply add the testset to your flow.

In [52]:
write_submission_file(
    impression_ids=df_test[DEFAULT_IMPRESSION_ID_COL],
    prediction_scores=df_test["ranked_scores"],
    path=DUMP_DIR.joinpath("predictions.txt"),
    filename_zip=f"{DATASPLIT}_predictions-{MODEL_NAME}.zip",
)

24464it [00:00, 176054.43it/s]


Zipping C:\Users\janle\Desktop\Master_local\Data_storage\Deep_learning\ebnerd_data\ebnerd_predictions\predictions.txt to C:\Users\janle\Desktop\Master_local\Data_storage\Deep_learning\ebnerd_data\ebnerd_predictions\ebnerd_small_predictions-NRMSTemporalModel.zip


# DONE 🚀