# **LSTUR MODEL**

### **Initial Setup**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
route = '/content/drive/MyDrive/IMPACT PROJECT'

%cd {route}/recommenders

/content/drive/MyDrive/IMPACT PROJECT/recommenders


In [3]:
!pip install scrapbook retrying

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scrapbook
  Downloading scrapbook-0.5.0-py3-none-any.whl (34 kB)
Collecting retrying
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Collecting papermill (from scrapbook)
  Downloading papermill-2.4.0-py3-none-any.whl (38 kB)
Collecting jedi>=0.16 (from ipython->scrapbook)
  Downloading jedi-0.18.2-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
Collecting ansiwrap (from papermill->scrapbook)
  Downloading ansiwrap-0.8.4-py2.py3-none-any.whl (8.5 kB)
Collecting textwrap3>=0.9.2 (from ansiwrap->papermill->scrapbook)
  Downloading textwrap3-0.9.2-py2.py3-none-any.whl (12 kB)
Installing collected packages: textwrap3, retrying, jedi, ansiwrap, papermill, scrapbook
Successfully installed ansiwrap-0.8.4 jedi-0.18.2 papermill-2.4.0 retrying-1.3.4 scrapbook-0.5.0 textwrap3-0.9.2


###  **Importing the needed libraries**

In [9]:
import sys
import os
import numpy as np
import zipfile
from tqdm import tqdm
import scrapbook as sb
from tempfile import TemporaryDirectory
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from recommenders.models.deeprec.deeprec_utils import download_deeprec_resources
from recommenders.models.newsrec.newsrec_utils import prepare_hparams
from recommenders.models.newsrec.models.lstur import LSTURModel
from recommenders.models.newsrec.io.mind_iterator import MINDIterator
from recommenders.models.newsrec.newsrec_utils import get_mind_data_set

print("System version: {}".format(sys.version))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.10.12 (main, Jun  7 2023, 12:45:35) [GCC 9.4.0]
Tensorflow version: 2.12.0


### **Loading the behavior and news dataframes**

In [10]:
# Options: demo, small, large
MIND_type = 'demo'

In [11]:
tmpdir = TemporaryDirectory()
data_path = tmpdir.name

train_news_file = os.path.join(data_path, 'train', r'news.tsv')
train_behaviors_file = os.path.join(data_path, 'train', r'behaviors.tsv')
valid_news_file = os.path.join(data_path, 'valid', r'news.tsv')
valid_behaviors_file = os.path.join(data_path, 'valid', r'behaviors.tsv')
wordEmb_file = os.path.join(data_path, "utils", "embedding.npy")
userDict_file = os.path.join(data_path, "utils", "uid2index.pkl")
wordDict_file = os.path.join(data_path, "utils", "word_dict.pkl")
yaml_file = os.path.join(data_path, "utils", r'lstur.yaml')

mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set(MIND_type)

if not os.path.exists(train_news_file):
    download_deeprec_resources(mind_url, os.path.join(data_path, 'train'), mind_train_dataset)

if not os.path.exists(valid_news_file):
    download_deeprec_resources(mind_url, \
                               os.path.join(data_path, 'valid'), mind_dev_dataset)
if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.z20.web.core.windows.net/newsrec/', \
                               os.path.join(data_path, 'utils'), mind_utils)

100%|██████████| 17.0k/17.0k [00:00<00:00, 23.4kKB/s]
100%|██████████| 9.84k/9.84k [00:00<00:00, 17.4kKB/s]
100%|██████████| 95.0k/95.0k [00:01<00:00, 52.9kKB/s]


---------------

## **LSTUR: Neural News Recommendation with Long- and Short-term User Representations**



Setting up the parameters

In [12]:
epochs = 7
seed = 40
batch_size = 32

In [13]:
hparams = prepare_hparams(yaml_file,
                          wordEmb_file=wordEmb_file,
                          wordDict_file=wordDict_file,
                          userDict_file=userDict_file,
                          batch_size=batch_size,
                          epochs=epochs)
print(hparams)

HParams object with values {'support_quick_scoring': True, 'dropout': 0.2, 'attention_hidden_dim': 200, 'head_num': 4, 'head_dim': 100, 'filter_num': 400, 'window_size': 3, 'vert_emb_dim': 100, 'subvert_emb_dim': 100, 'gru_unit': 400, 'type': 'ini', 'user_emb_dim': 50, 'learning_rate': 0.0001, 'optimizer': 'adam', 'epochs': 7, 'batch_size': 32, 'show_step': 100000, 'title_size': 30, 'his_size': 50, 'data_format': 'news', 'npratio': 4, 'metrics': ['group_auc', 'mean_mrr', 'ndcg@5;10'], 'word_emb_dim': 300, 'cnn_activation': 'relu', 'model_type': 'lstur', 'loss': 'cross_entropy_loss', 'wordEmb_file': '/tmp/tmphybt152w/utils/embedding.npy', 'wordDict_file': '/tmp/tmphybt152w/utils/word_dict.pkl', 'userDict_file': '/tmp/tmphybt152w/utils/uid2index.pkl'}


Instantiating the model

In [14]:
iterator = MINDIterator

In [16]:
model = LSTURModel(hparams, iterator, seed=seed)

Tensor("conv1d_1/Relu:0", shape=(None, 30, 400), dtype=float32)
Tensor("att_layer2/Sum_1:0", shape=(None, 400), dtype=float32)


Training the LSTUR model

In [17]:
%%time
model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file)

1086it [03:10,  5.70it/s]
  updates=self.state_updates,
586it [00:01, 400.81it/s]
236it [00:07, 31.67it/s]
7538it [00:01, 3907.15it/s]


at epoch 1
train info: logloss loss:1.4881353934825454
eval info: group_auc:0.5964, mean_mrr:0.2572, ndcg@10:0.3477, ndcg@5:0.2844
at epoch 1 , train time: 190.4 eval time: 19.1


1086it [02:54,  6.21it/s]
586it [00:00, 747.52it/s]
236it [00:06, 36.65it/s]
7538it [00:01, 3810.72it/s]


at epoch 2
train info: logloss loss:1.404485442361779
eval info: group_auc:0.6198, mean_mrr:0.2786, ndcg@10:0.3697, ndcg@5:0.3058
at epoch 2 , train time: 174.8 eval time: 17.4


1086it [02:55,  6.20it/s]
586it [00:00, 753.84it/s]
236it [00:06, 36.90it/s]
7538it [00:01, 3818.37it/s]


at epoch 3
train info: logloss loss:1.3582260240946467
eval info: group_auc:0.6238, mean_mrr:0.2833, ndcg@10:0.3743, ndcg@5:0.3117
at epoch 3 , train time: 175.3 eval time: 17.4


1086it [02:55,  6.18it/s]
586it [00:00, 757.58it/s]
236it [00:06, 36.81it/s]
7538it [00:01, 3812.07it/s]


at epoch 4
train info: logloss loss:1.3249353159966828
eval info: group_auc:0.627, mean_mrr:0.2801, ndcg@10:0.374, ndcg@5:0.3099
at epoch 4 , train time: 175.8 eval time: 17.4


1086it [02:55,  6.18it/s]
586it [00:00, 757.72it/s]
236it [00:06, 36.68it/s]
7538it [00:01, 3844.16it/s]


at epoch 5
train info: logloss loss:1.2894772678668547
eval info: group_auc:0.6429, mean_mrr:0.299, ndcg@10:0.3928, ndcg@5:0.3332
at epoch 5 , train time: 175.6 eval time: 17.3


1086it [02:55,  6.20it/s]
586it [00:00, 762.64it/s]
236it [00:06, 36.41it/s]
7538it [00:02, 3631.58it/s]


at epoch 6
train info: logloss loss:1.2457926851829333
eval info: group_auc:0.643, mean_mrr:0.2938, ndcg@10:0.3867, ndcg@5:0.3211
at epoch 6 , train time: 175.1 eval time: 17.7


1086it [02:55,  6.18it/s]
586it [00:00, 786.77it/s]
236it [00:06, 36.33it/s]
7538it [00:01, 3778.78it/s]


at epoch 7
train info: logloss loss:1.1992762655184415
eval info: group_auc:0.6443, mean_mrr:0.2961, ndcg@10:0.3897, ndcg@5:0.327
at epoch 7 , train time: 175.7 eval time: 17.2
CPU times: user 24min 39s, sys: 1min 4s, total: 25min 44s
Wall time: 22min 46s


<recommenders.models.newsrec.models.lstur.LSTURModel at 0x7f367dabf610>

Evaluating the LSTUR Model

In [18]:
%%time
res_syn = model.run_eval(valid_news_file, valid_behaviors_file)
print(res_syn)

586it [00:00, 612.04it/s]
236it [00:06, 35.22it/s]
7538it [00:01, 6998.68it/s]


{'group_auc': 0.6443, 'mean_mrr': 0.2961, 'ndcg@5': 0.327, 'ndcg@10': 0.3897}
CPU times: user 15.8 s, sys: 1.5 s, total: 17.3 s
Wall time: 16.3 s


| Model   | group_auc | mean_mrr | ndcg@5 | ndcg@10 |
|----------|-----------|----------|--------|---------|
| LSTUR    |   0.6443  |   0.2961  | 0.327 |  0.3897 |