### **Initial Setup**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
route = '/content/drive/MyDrive/IMPACT PROJECT'

%cd {route}/recommenders

/content/drive/MyDrive/IMPACT PROJECT/recommenders


In [None]:
!pip install scrapbook retrying

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scrapbook
  Downloading scrapbook-0.5.0-py3-none-any.whl (34 kB)
Collecting retrying
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Collecting papermill (from scrapbook)
  Downloading papermill-2.4.0-py3-none-any.whl (38 kB)
Collecting jedi>=0.16 (from ipython->scrapbook)
  Downloading jedi-0.18.2-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m52.4 MB/s[0m eta [36m0:00:00[0m
Collecting ansiwrap (from papermill->scrapbook)
  Downloading ansiwrap-0.8.4-py2.py3-none-any.whl (8.5 kB)
Collecting textwrap3>=0.9.2 (from ansiwrap->papermill->scrapbook)
  Downloading textwrap3-0.9.2-py2.py3-none-any.whl (12 kB)
Installing collected packages: textwrap3, retrying, jedi, ansiwrap, papermill, scrapbook
Successfully installed ansiwrap-0.8.4 jedi-0.18.2 papermill-2.4.0 retrying-1.3.4 scrapbook-0.5.0 textwrap3-0.9.2


###  **Importing the needed libraries**

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import zipfile
from tqdm import tqdm
import scrapbook as sb
from tempfile import TemporaryDirectory
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from recommenders.models.deeprec.deeprec_utils import download_deeprec_resources
from recommenders.models.newsrec.newsrec_utils import prepare_hparams
from recommenders.models.newsrec.models.nrms import NRMSModel
from recommenders.models.newsrec.io.mind_iterator import MINDIterator
from recommenders.models.newsrec.newsrec_utils import get_mind_data_set
from sklearn.metrics import ndcg_score
from recommenders.evaluation.python_evaluation import ndcg_at_k

import warnings
# Avoid printing some FutureWarnings
warnings.filterwarnings("ignore", category=FutureWarning)

print("System version: {}".format(sys.version))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.10.12 (main, Jun  7 2023, 12:45:35) [GCC 9.4.0]
Tensorflow version: 2.12.0


### **Loading the behaviours and news dataframes**

In [None]:
# Options: demo, small, large
MIND_type = 'demo'

In [None]:
tmpdir = TemporaryDirectory()
data_path = tmpdir.name

train_news_file = os.path.join(data_path, 'train', r'news.tsv')
train_behaviors_file = os.path.join(data_path, 'train', r'behaviors.tsv')
valid_news_file = os.path.join(data_path, 'valid', r'news.tsv')
valid_behaviors_file = os.path.join(data_path, 'valid', r'behaviors.tsv')
wordEmb_file = os.path.join(data_path, "utils", "embedding.npy")
userDict_file = os.path.join(data_path, "utils", "uid2index.pkl")
wordDict_file = os.path.join(data_path, "utils", "word_dict.pkl")
yaml_file = os.path.join(data_path, "utils", r'nrms.yaml')

mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set(MIND_type)

if not os.path.exists(train_news_file):
    download_deeprec_resources(mind_url, os.path.join(data_path, 'train'), mind_train_dataset)

if not os.path.exists(valid_news_file):
    download_deeprec_resources(mind_url, \
                               os.path.join(data_path, 'valid'), mind_dev_dataset)
if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.z20.web.core.windows.net/newsrec/', \
                               os.path.join(data_path, 'utils'), mind_utils)

100%|██████████| 17.0k/17.0k [00:02<00:00, 7.47kKB/s]
100%|██████████| 9.84k/9.84k [00:01<00:00, 4.94kKB/s]
100%|██████████| 95.0k/95.0k [00:05<00:00, 17.6kKB/s]


## **NMRS: Neural News Recommendation with Multi-Head Self-Attention**

Setting up the parameters

In [None]:
epochs = 5
seed = 42
batch_size = 32

In [None]:
hparams = prepare_hparams(yaml_file,
                          wordEmb_file=wordEmb_file,
                          wordDict_file=wordDict_file,
                          userDict_file=userDict_file,
                          batch_size=batch_size,
                          epochs=epochs,
                          show_step=10)
print(hparams)

HParams object with values {'support_quick_scoring': True, 'dropout': 0.2, 'attention_hidden_dim': 200, 'head_num': 20, 'head_dim': 20, 'filter_num': 200, 'window_size': 3, 'vert_emb_dim': 100, 'subvert_emb_dim': 100, 'gru_unit': 400, 'type': 'ini', 'user_emb_dim': 50, 'learning_rate': 0.0001, 'optimizer': 'adam', 'epochs': 5, 'batch_size': 32, 'show_step': 10, 'title_size': 30, 'his_size': 50, 'data_format': 'news', 'npratio': 4, 'metrics': ['group_auc', 'mean_mrr', 'ndcg@5;10'], 'word_emb_dim': 300, 'model_type': 'nrms', 'loss': 'cross_entropy_loss', 'wordEmb_file': '/tmp/tmp99sqsu5e/utils/embedding.npy', 'wordDict_file': '/tmp/tmp99sqsu5e/utils/word_dict.pkl', 'userDict_file': '/tmp/tmp99sqsu5e/utils/uid2index.pkl'}


Instantiating the model

In [None]:
iterator = MINDIterator

In [None]:
model = NRMSModel(hparams, iterator, seed=seed)

  super().__init__(name, **kwargs)


Training the NMRS model

In [None]:
%%time
model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file)

step 1080 , total_loss: 1.5144, data_loss: 1.3216: : 1086it [01:27, 12.48it/s]
  updates=self.state_updates,
586it [00:01, 393.52it/s]
236it [00:02, 100.89it/s]
7538it [00:01, 7246.95it/s]


at epoch 1
train info: logloss loss:1.5142645089446292
eval info: group_auc:0.5786, mean_mrr:0.2443, ndcg@10:0.3305, ndcg@5:0.2585
at epoch 1 , train time: 87.0 eval time: 13.5


step 1080 , total_loss: 1.4203, data_loss: 1.3041: : 1086it [01:23, 13.08it/s]
586it [00:00, 899.10it/s]
236it [00:01, 165.26it/s]
7538it [00:01, 7127.26it/s]


at epoch 2
train info: logloss loss:1.4207040892560618
eval info: group_auc:0.5973, mean_mrr:0.2548, ndcg@10:0.3437, ndcg@5:0.2693
at epoch 2 , train time: 83.1 eval time: 11.8


step 1080 , total_loss: 1.3775, data_loss: 1.1619: : 1086it [01:22, 13.13it/s]
586it [00:00, 950.36it/s]
236it [00:01, 168.74it/s]
7538it [00:01, 7379.85it/s]


at epoch 3
train info: logloss loss:1.377581507681044
eval info: group_auc:0.6093, mean_mrr:0.2671, ndcg@10:0.3588, ndcg@5:0.288
at epoch 3 , train time: 82.7 eval time: 11.6


step 1080 , total_loss: 1.3516, data_loss: 1.1476: : 1086it [01:22, 13.10it/s]
586it [00:00, 894.89it/s]
236it [00:01, 166.25it/s]
7538it [00:01, 7456.53it/s]


at epoch 4
train info: logloss loss:1.3521846341823347
eval info: group_auc:0.6103, mean_mrr:0.2677, ndcg@10:0.359, ndcg@5:0.2873
at epoch 4 , train time: 82.9 eval time: 11.7


step 1080 , total_loss: 1.3286, data_loss: 1.3719: : 1086it [01:22, 13.09it/s]
586it [00:00, 922.94it/s]
236it [00:01, 167.66it/s]
7538it [00:01, 7444.25it/s]


at epoch 5
train info: logloss loss:1.3287456378642586
eval info: group_auc:0.6169, mean_mrr:0.2729, ndcg@10:0.3659, ndcg@5:0.2948
at epoch 5 , train time: 82.9 eval time: 11.7
CPU times: user 3min 53s, sys: 1min 13s, total: 5min 7s
Wall time: 7min 58s


<recommenders.models.newsrec.models.nrms.NRMSModel at 0x7f1db80c9780>

Evaluating the NMRS Model

In [None]:
%%time
res_syn = model.run_eval(valid_news_file, valid_behaviors_file)
print(res_syn)

586it [00:00, 936.37it/s]
236it [00:01, 164.57it/s]
7538it [00:01, 7307.71it/s]


{'group_auc': 0.6169, 'mean_mrr': 0.2729, 'ndcg@5': 0.2948, 'ndcg@10': 0.3659}
CPU times: user 13.6 s, sys: 11.1 s, total: 24.7 s
Wall time: 11.9 s


| Model   | group_auc | mean_mrr | ndcg@5 | ndcg@10 |
|----------|-----------|----------|--------|---------|
| NMRS    |   0.6169  |   0.2729  | 0.2948 |  0.3659 |

In [None]:
sb.glue("res_syn", res_syn)

Saving the NMRS Model

In [None]:
model_path = os.path.join(data_path, "model")
os.makedirs(model_path, exist_ok=True)

model.model.save_weights("/content/drive/MyDrive/IMPACT PROJECT/models/nmrs")

--------------