In [3]:
from typing import Iterator, List, Dict

import torch
import torch.optim as optim

import numpy as np

from allennlp.data import Instance
from allennlp.data.fields import TextField, SequenceLabelField
from allennlp.data.dataset_readers import DatasetReader
from allennlp.common.file_utils import cached_path
from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer
from allennlp.data.tokenizers import Token
from allennlp.data.vocabulary import Vocabulary
from allennlp.models import Model
from allennlp.nn import Activation
from allennlp.modules import FeedForward
from allennlp.modules.text_field_embedders import TextFieldEmbedder, BasicTextFieldEmbedder
from allennlp.modules.token_embedders import Embedding
from allennlp.modules.token_embedders.embedding import _read_pretrained_embeddings_file
from allennlp.modules.seq2seq_encoders import Seq2SeqEncoder, PytorchSeq2SeqWrapper
from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
from allennlp.training.metrics import CategoricalAccuracy
from allennlp.data.iterators import BucketIterator
from allennlp.training.trainer import Trainer
from allennlp.predictors import SentenceTaggerPredictor

from my_library.dataset_readers import EtdTitleAbstractReader
from my_library.models.etd_bcn import EtdBCN
from my_library.models.etd_biattention import BiAttentionEncoder

In [4]:
from typing import List, Dict, Iterable
import json
import logging

from overrides import overrides

import tqdm

from allennlp.common import Params
from allennlp.common.checks import ConfigurationError
from allennlp.common.file_utils import cached_path
#from allennlp.data.dataset import Dataset
from allennlp.data.dataset_readers.dataset_reader import DatasetReader
from allennlp.data.fields import LabelField, TextField, MetadataField, ListField
from allennlp.data.fields.multilabel_field import MultiLabelField
from allennlp.data.instance import Instance
from allennlp.data.tokenizers import Token, Tokenizer, WordTokenizer
from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer

logger = logging.getLogger(__name__)  # pylint: disable=invalid-name

# from multi_toxic_label.fields.multi_label_field import MultiLabelField

class EtdAbstractReader(DatasetReader):
    """
    Reads a CSV-lines file containing abstract only from ETD records
    """
    def __init__(self,
                 tokenizer: Tokenizer = None,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False,
                 start_tokens: List[str] = ["<start>"], 
                 end_tokens: List[str] = ["<end>"]) -> None:
        super().__init__(lazy)
        self._tokenizer = tokenizer or WordTokenizer(start_tokens=start_tokens,end_tokens=end_tokens)
        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
        
    @overrides
    def _read(self, file_path: str) -> Iterable[Instance]:
        with open(cached_path(file_path), "r") as data_file:
            logger.info("Reading instances from lines in file at: %s", file_path)
            for line in data_file:
                line = line.strip("\n")
                if not line:
                    continue
                etd_json = json.loads(line)
                abstract = etd_json['etdAbstract']
                labels = etd_json['lcsh']

                yield self.text_to_instance(abstract, labels)

    @overrides
    def text_to_instance(self, abstract_text: str, labels: Dict[str, int] = None) -> Instance:  # type: ignore
        # pylint: disable=arguments-differ
        tokenized_abstract_text = self._tokenizer.tokenize(abstract_text)
        abstract_text_field = TextField(tokenized_abstract_text, self._token_indexers)
        fields = {'tokens': abstract_text_field}
        
        if labels is not None:
            fields['label'] = MultiLabelField([label for label,value in labels.items() if value == 1])
                
        return Instance(fields)

#     @classmethod
#     def from_params(cls, params: Params) -> 'EtdDatasetReader':
#         tokenizer = Tokenizer.from_params(params.pop('tokenizer', {}))
#         token_indexers = TokenIndexer.dict_from_params(params.pop('token_indexers', {}))
#         lazy = params.pop('lazy', False)
#         params.assert_empty(cls.__name__)
#         return cls(tokenizer=tokenizer, token_indexers=token_indexers, lazy=lazy)


In [5]:
from typing import Dict, Optional, Union

import numpy
from overrides import overrides
import torch
from torch import nn
import torch.nn.functional as F

from allennlp.common import Params
from allennlp.common.checks import check_dimensions_match, ConfigurationError
from allennlp.data import Vocabulary
from allennlp.modules import Elmo, FeedForward, Maxout, Seq2SeqEncoder, TextFieldEmbedder
from allennlp.models.model import Model
from allennlp.nn import InitializerApplicator, RegularizerApplicator
from allennlp.nn import util
from allennlp.training.metrics import CategoricalAccuracy
from my_library.metrics.hit_at_k import *


class BiattentiveClassificationNetwork(Model):
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 embedding_dropout: float,
                 pre_encode_feedforward: FeedForward,
                 encoder: Seq2SeqEncoder,
                 integrator: Seq2SeqEncoder,
                 integrator_dropout: float,
                 output_layer: Union[FeedForward, Maxout],
                 elmo: Elmo,
                 use_input_elmo: bool = False,
                 use_integrator_output_elmo: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BiattentiveClassificationNetwork, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        if "elmo" in self._text_field_embedder._token_embedders.keys():  # pylint: disable=protected-access
            raise ConfigurationError("To use ELMo in the BiattentiveClassificationNetwork input, "
                                     "remove elmo from the text_field_embedder and pass an "
                                     "Elmo object to the BiattentiveClassificationNetwork and set the "
                                     "'use_input_elmo' and 'use_integrator_output_elmo' flags accordingly.")
        self._embedding_dropout = nn.Dropout(embedding_dropout)
        self._num_classes = self.vocab.get_vocab_size("labels")

        self._pre_encode_feedforward = pre_encode_feedforward
        self._encoder = encoder
        self._integrator = integrator
        self._integrator_dropout = nn.Dropout(integrator_dropout)

        self._elmo = elmo
        self._use_input_elmo = use_input_elmo
        self._use_integrator_output_elmo = use_integrator_output_elmo
        self._num_elmo_layers = int(self._use_input_elmo) + int(self._use_integrator_output_elmo)
        # Check that, if elmo is None, none of the elmo flags are set.
        if self._elmo is None and self._num_elmo_layers != 0:
            raise ConfigurationError("One of 'use_input_elmo' or 'use_integrator_output_elmo' is True, "
                                     "but no Elmo object was provided upon construction. Pass in an Elmo "
                                     "object to use Elmo.")

        if self._elmo is not None:
            # Check that, if elmo is not None, we use it somewhere.
            if self._num_elmo_layers == 0:
                raise ConfigurationError("Elmo object provided upon construction, but both 'use_input_elmo' "
                                         "and 'use_integrator_output_elmo' are 'False'. Set one of them to "
                                         "'True' to use Elmo, or do not provide an Elmo object upon construction.")
            # Check that the number of flags set is equal to the num_output_representations of the Elmo object
            # pylint: disable=protected-access,too-many-format-args
            if len(self._elmo._scalar_mixes) != self._num_elmo_layers:
                raise ConfigurationError("Elmo object has num_output_representations=%s, but this does not "
                                         "match the number of use_*_elmo flags set to true. use_input_elmo "
                                         "is %s, and use_integrator_output_elmo is %s".format(
                                                 str(len(self._elmo._scalar_mixes)),
                                                 str(self._use_input_elmo),
                                                 str(self._use_integrator_output_elmo)))

        # Calculate combined integrator output dim, taking into account elmo
        if self._use_integrator_output_elmo:
            self._combined_integrator_output_dim = (self._integrator.get_output_dim() +
                                                    self._elmo.get_output_dim())
        else:
            self._combined_integrator_output_dim = self._integrator.get_output_dim()

        self._self_attentive_pooling_projection = nn.Linear(
                self._combined_integrator_output_dim, 1)
        self._output_layer = output_layer

        if self._use_input_elmo:
            check_dimensions_match(text_field_embedder.get_output_dim() +
                                   self._elmo.get_output_dim(),
                                   self._pre_encode_feedforward.get_input_dim(),
                                   "text field embedder output dim + ELMo output dim",
                                   "Pre-encoder feedforward input dim")
        else:
            check_dimensions_match(text_field_embedder.get_output_dim(),
                                   self._pre_encode_feedforward.get_input_dim(),
                                   "text field embedder output dim",
                                   "Pre-encoder feedforward input dim")

        check_dimensions_match(self._pre_encode_feedforward.get_output_dim(),
                               self._encoder.get_input_dim(),
                               "Pre-encoder feedforward output dim",
                               "Encoder input dim")
        check_dimensions_match(self._encoder.get_output_dim() * 3,
                               self._integrator.get_input_dim(),
                               "Encoder output dim * 3",
                               "Integrator input dim")
        if self._use_integrator_output_elmo:
            check_dimensions_match(self._combined_integrator_output_dim * 4,
                                   self._output_layer.get_input_dim(),
                                   "(Integrator output dim + ELMo output dim) * 4",
                                   "Output layer input dim")
        else:
            check_dimensions_match(self._integrator.get_output_dim() * 4,
                                   self._output_layer.get_input_dim(),
                                   "Integrator output dim * 4",
                                   "Output layer input dim")

        check_dimensions_match(self._output_layer.get_output_dim(),
                               self._num_classes,
                               "Output layer output dim",
                               "Number of classes.")

        self.metrics = {            
            "hit_5": HitAtK(5),
            "hit_10": HitAtK(10),
            "hit_100": HitAtK(100)
        }
        self.loss = torch.nn.BCEWithLogitsLoss(pos_weight = torch.ones(241)*10)
        initializer(self)

    @overrides
    def forward(self,  # type: ignore
                tokens: Dict[str, torch.LongTensor],
                label: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``.
        label : torch.LongTensor, optional (default = None)
            A variable representing the label for each instance in the batch.
        Returns
        -------
        An output dictionary consisting of:
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_classes)`` representing a
            distribution over the label classes for each instance.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.
        """
        text_mask = util.get_text_field_mask(tokens).float()
        # Pop elmo tokens, since elmo embedder should not be present.
        elmo_tokens = tokens.pop("elmo", None)
        if tokens:
            embedded_text = self._text_field_embedder(tokens)
        else:
            # only using "elmo" for input
            embedded_text = None

        # Add the "elmo" key back to "tokens" if not None, since the tests and the
        # subsequent training epochs rely not being modified during forward()
        if elmo_tokens is not None:
            tokens["elmo"] = elmo_tokens

        # Create ELMo embeddings if applicable
        if self._elmo:
            if elmo_tokens is not None:
                elmo_representations = self._elmo(elmo_tokens)["elmo_representations"]
                # Pop from the end is more performant with list
                if self._use_integrator_output_elmo:
                    integrator_output_elmo = elmo_representations.pop()
                if self._use_input_elmo:
                    input_elmo = elmo_representations.pop()
                assert not elmo_representations
            else:
                raise ConfigurationError(
                        "Model was built to use Elmo, but input text is not tokenized for Elmo.")

        if self._use_input_elmo:
            if embedded_text is not None:
                embedded_text = torch.cat([embedded_text, input_elmo], dim=-1)
            else:
                embedded_text = input_elmo

        dropped_embedded_text = self._embedding_dropout(embedded_text)
        pre_encoded_text = self._pre_encode_feedforward(dropped_embedded_text)
        encoded_tokens = self._encoder(pre_encoded_text, text_mask)

        # Compute biattention. This is a special case since the inputs are the same.
        attention_logits = encoded_tokens.bmm(encoded_tokens.permute(0, 2, 1).contiguous())
        attention_weights = util.masked_softmax(attention_logits, text_mask)
        encoded_text = util.weighted_sum(encoded_tokens, attention_weights)

        # Build the input to the integrator
        integrator_input = torch.cat([encoded_tokens,
                                      encoded_tokens - encoded_text,
                                      encoded_tokens * encoded_text], 2)
        integrated_encodings = self._integrator(integrator_input, text_mask)

        # Concatenate ELMo representations to integrated_encodings if specified
        if self._use_integrator_output_elmo:
            integrated_encodings = torch.cat([integrated_encodings,
                                              integrator_output_elmo], dim=-1)

        # Simple Pooling layers
        max_masked_integrated_encodings = util.replace_masked_values(
                integrated_encodings, text_mask.unsqueeze(2), -1e7)
        max_pool = torch.max(max_masked_integrated_encodings, 1)[0]
        min_masked_integrated_encodings = util.replace_masked_values(
                integrated_encodings, text_mask.unsqueeze(2), +1e7)
        min_pool = torch.min(min_masked_integrated_encodings, 1)[0]
        mean_pool = torch.sum(integrated_encodings, 1) / torch.sum(text_mask, 1, keepdim=True)

        # Self-attentive pooling layer
        # Run through linear projection. Shape: (batch_size, sequence length, 1)
        # Then remove the last dimension to get the proper attention shape (batch_size, sequence length).
        self_attentive_logits = self._self_attentive_pooling_projection(
                integrated_encodings).squeeze(2)
        self_weights = util.masked_softmax(self_attentive_logits, text_mask)
        self_attentive_pool = util.weighted_sum(integrated_encodings, self_weights)

        pooled_representations = torch.cat([max_pool, min_pool, mean_pool, self_attentive_pool], 1)
        pooled_representations_dropped = self._integrator_dropout(pooled_representations)

        logits = self._output_layer(pooled_representations_dropped)
        class_probabilities = F.softmax(logits, dim=-1)

        output_dict = {'logits': logits, 'class_probabilities': class_probabilities}
        if label is not None:
            loss = self.loss(logits, label)
            for metric in self.metrics.values():
                metric(logits, label)
            output_dict["loss"] = loss

        return output_dict

    @overrides
    def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        """
        Does a simple argmax over the class probabilities, converts indices to string labels, and
        adds a ``"label"`` key to the dictionary with the result.
        """
        predictions = output_dict["class_probabilities"].cpu().data.numpy()
        argmax_indices = numpy.argmax(predictions, axis=-1)
        labels = [self.vocab.get_token_from_index(x, namespace="labels")
                  for x in argmax_indices]
        output_dict['label'] = labels
        return output_dict

    @overrides
    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        return {metric_name: metric.get_metric(reset) for metric_name, metric in self.metrics.items()}

    # The FeedForward vs Maxout logic here requires a custom from_params.
    @classmethod
    def from_params(cls, vocab: Vocabulary, params: Params) -> 'BiattentiveClassificationNetwork':  # type: ignore
        # pylint: disable=arguments-differ
        embedder_params = params.pop("text_field_embedder")
        text_field_embedder = TextFieldEmbedder.from_params(vocab=vocab, params=embedder_params)
        embedding_dropout = params.pop("embedding_dropout")
        pre_encode_feedforward = FeedForward.from_params(params.pop("pre_encode_feedforward"))
        encoder = Seq2SeqEncoder.from_params(params.pop("encoder"))
        integrator = Seq2SeqEncoder.from_params(params.pop("integrator"))
        integrator_dropout = params.pop("integrator_dropout")

        output_layer_params = params.pop("output_layer")
        if "activations" in output_layer_params:
            output_layer = FeedForward.from_params(output_layer_params)
        else:
            output_layer = Maxout.from_params(output_layer_params)

        elmo = params.pop("elmo", None)
        if elmo is not None:
            elmo = Elmo.from_params(elmo)
        use_input_elmo = params.pop_bool("use_input_elmo", False)
        use_integrator_output_elmo = params.pop_bool("use_integrator_output_elmo", False)

        initializer = InitializerApplicator.from_params(params.pop('initializer', []))
        regularizer = RegularizerApplicator.from_params(params.pop('regularizer', []))
        params.assert_empty(cls.__name__)

        return cls(vocab=vocab,
                   text_field_embedder=text_field_embedder,
                   embedding_dropout=embedding_dropout,
                   pre_encode_feedforward=pre_encode_feedforward,
                   encoder=encoder,
                   integrator=integrator,
                   integrator_dropout=integrator_dropout,
                   output_layer=output_layer,
                   elmo=elmo,
                   use_input_elmo=use_input_elmo,
                   use_integrator_output_elmo=use_integrator_output_elmo,
                   initializer=initializer,
                   regularizer=regularizer)

In [6]:
token_indexers = {"glove_tokens": SingleIdTokenIndexer(namespace="glove_tokens",lowercase_tokens=True),
                  "conceptnet_tokens": SingleIdTokenIndexer(namespace="conceptnet_tokens",lowercase_tokens=True),
                  "fasttext_tokens": SingleIdTokenIndexer(namespace="fasttext_tokens",lowercase_tokens=True)}
reader = EtdAbstractReader(token_indexers=token_indexers,
                                lazy=False)

In [7]:
train_dataset = reader.read(cached_path(
    "/misc/projdata3/info_fil/finance/simon_test/ETD_cataloguing/allennlp-test/etd_debug.json"
))
validation_dataset = reader.read(cached_path(
    "/misc/projdata3/info_fil/finance/simon_test/ETD_cataloguing/allennlp-test/etd_debug.json"
))

94it [00:02, 37.30it/s]
94it [00:01, 53.48it/s]


In [8]:
vocab = Vocabulary.from_instances(train_dataset + validation_dataset, min_count={"glove_tokens": 1,
                                                                                 "concentnet_tokens":1,
                                                                                 "fasttext_tokens":1})

100%|██████████| 188/188 [00:01<00:00, 155.56it/s]


In [9]:
file_url = "/uac/spc/proj/fpredict/finance/ETD_cataloguing/allennlp-test/word_vector/glove/glove.6B.100d.txt"
glove_embedding = _read_pretrained_embeddings_file(file_uri=file_url,
                                                   embedding_dim=100,
                                                   vocab=vocab,
                                                   namespace="glove_tokens")
glove_embedding = Embedding(num_embeddings=vocab.get_vocab_size('glove_tokens'),
                            embedding_dim=100,
                            weight=glove_embedding,
                            trainable=False)

file_url = "/uac/spc/proj/fpredict/finance/ETD_cataloguing/allennlp-test/word_vector/conceptnet/numberbatch-en-17.06.txt"
conceptnet_embedding = _read_pretrained_embeddings_file(file_uri=file_url,
                                                   embedding_dim=300,
                                                   vocab=vocab,
                                                   namespace="conceptnet_tokens")
conceptnet_embedding = Embedding(num_embeddings=vocab.get_vocab_size('conceptnet_tokens'),
                                 embedding_dim=300,
                                 weight=conceptnet_embedding,
                                 trainable=False)

file_url = "/uac/spc/proj/fpredict/finance/ETD_cataloguing/allennlp-test/word_vector/fasttext/crawl-300d-2M.vec"
fasttext_embedding = _read_pretrained_embeddings_file(file_uri=file_url,
                                                   embedding_dim=300,
                                                   vocab=vocab,
                                                   namespace="fasttext_tokens")
fasttext_embedding = Embedding(num_embeddings=vocab.get_vocab_size('fasttext_tokens'),
                               embedding_dim=300,
                               weight=fasttext_embedding,
                               trainable=False)

400000it [00:02, 156456.21it/s]
417194it [00:03, 108552.17it/s]
1999995it [00:14, 136992.37it/s]


In [10]:
text_field_embedder = BasicTextFieldEmbedder({"glove_tokens":glove_embedding,
                                              "conceptnet_tokens":conceptnet_embedding,
                                              "fasttext_tokens":fasttext_embedding})

# model = EtdBCN(vocab=vocab,
#                text_field_embedder=text_field_embedder,
#                title_text_encoder=PytorchSeq2SeqWrapper(torch.nn.GRU(input_size=700,
#                                                                      hidden_size=400,
#                                                                      num_layers=1,
#                                                                      dropout=0.2,
#                                                                      bidirectional=True,
#                                                                      batch_first=True)),
#                abstract_text_encoder=PytorchSeq2SeqWrapper(torch.nn.GRU(input_size=700,
#                                                                      hidden_size=400,
#                                                                      num_layers=1,
#                                                                      dropout=0.2,
#                                                                      bidirectional=True,
#                                                                      batch_first=True)),
#                bi_attention_encoder=BiAttentionEncoder(input_dim=800,
#                                                        integrator_x=PytorchSeq2SeqWrapper(torch.nn.GRU(input_size=2400,
#                                                                      hidden_size=1200,
#                                                                      num_layers=1,
#                                                                      dropout=0.2,
#                                                                      bidirectional=True,
#                                                                      batch_first=True)),
#                                                        integrator_y=PytorchSeq2SeqWrapper(torch.nn.GRU(input_size=2400,
#                                                                      hidden_size=1200,
#                                                                      num_layers=1,
#                                                                      dropout=0.2,
#                                                                      bidirectional=True,
#                                                                      batch_first=True)),
#                                                        integrator_dropout=0.2,
#                                                        combination="x,y"),
#                classifier_feedforward=FeedForward(input_dim=4800,
#                                                   num_layers=2,
#                                                   hidden_dims=[4800,241],
#                                                   activations=[Activation.by_name("relu")(),Activation.by_name("linear")()]),
#                use_positional_encoding=True)

model = BiattentiveClassificationNetwork(vocab=vocab,
                                         text_field_embedder=text_field_embedder,
                                         embedding_dropout=0.0,
                                         pre_encode_feedforward=FeedForward(input_dim=700,
                                                                            num_layers=1,
                                                                            hidden_dims=[700],
                                                                            activations=[Activation.by_name("relu")()]),
                                         encoder=PytorchSeq2SeqWrapper(torch.nn.GRU(input_size=700,
                                                                       hidden_size=400,
                                                                       num_layers=1,
                                                                       dropout=0.2,
                                                                       bidirectional=True,
                                                                       batch_first=True)),
                                         integrator=PytorchSeq2SeqWrapper(torch.nn.GRU(input_size=2400,
                                                                          hidden_size=600,
                                                                          num_layers=1,
                                                                          dropout=0.2,
                                                                          bidirectional=True,
                                                                          batch_first=True)),
                                         integrator_dropout=0.2,
                                         output_layer=FeedForward(input_dim=4800,
                                                      num_layers=2,
                                                      hidden_dims=[4800,241],
                                                      activations=[Activation.by_name("relu")(),Activation.by_name("linear")()]),
                                         elmo=None
                                         )

  "num_layers={}".format(dropout, num_layers))


In [11]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
iterator = BucketIterator(batch_size=2, sorting_keys=[["tokens", "num_tokens"]])
iterator.index_with(vocab)

In [12]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  iterator=iterator,
                  train_dataset=train_dataset,
                  validation_dataset=validation_dataset,
                  patience=10,
                  grad_clipping=5.0,
                  num_epochs=100,
                  num_batches_before_step=16,
                  cuda_device=3)

In [13]:
trainer.train()

hit_5: 0.021910, hit_10: 0.040881, hit_100: 0.284878, loss: 0.714026 ||: 100%|██████████| 47/47 [00:15<00:00,  4.12it/s]
hit_5: 0.049215, hit_10: 0.080066, hit_100: 0.456054, loss: 0.424645 ||: 100%|██████████| 47/47 [00:05<00:00,  4.61it/s]
hit_5: 0.040350, hit_10: 0.065527, hit_100: 0.408536, loss: 0.450399 ||: 100%|██████████| 47/47 [00:10<00:00,  4.60it/s]
hit_5: 0.066413, hit_10: 0.093718, hit_100: 0.434068, loss: 0.448376 ||: 100%|██████████| 47/47 [00:06<00:00,  4.16it/s]
hit_5: 0.056130, hit_10: 0.086626, hit_100: 0.411196, loss: 0.451944 ||: 100%|██████████| 47/47 [00:10<00:00,  4.41it/s]
hit_5: 0.056130, hit_10: 0.091945, hit_100: 0.452077, loss: 0.410705 ||: 100%|██████████| 47/47 [00:03<00:00,  5.88it/s]
hit_5: 0.051342, hit_10: 0.072442, hit_100: 0.445871, loss: 0.407302 ||: 100%|██████████| 47/47 [00:08<00:00,  5.39it/s]
hit_5: 0.050811, hit_10: 0.092503, hit_100: 0.487639, loss: 0.373566 ||: 100%|██████████| 47/47 [00:03<00:00,  8.70it/s]
hit_5: 0.045137, hit_10: 0.07778

KeyboardInterrupt: 