Skip to content

Commit

Permalink
Use warning instead of error in map_token_to_id (#266)
Browse files Browse the repository at this point in the history
* Use warning instead of error in map_token_to_id

* Update test_sentencepiece.model url
  • Loading branch information
gpengzhi committed Jan 2, 2020
1 parent 413e07f commit 261d550
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
7 changes: 4 additions & 3 deletions texar/tf/data/tokenizers/tokenizer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@

from typing import Any, Dict, List, Optional, Tuple, overload

import os
import json
import os
import warnings

from texar.tf.module_base import ModuleBase

Expand Down Expand Up @@ -379,11 +380,11 @@ def map_token_to_id(self, tokens):
for token in tokens:
ids.append(self._map_token_to_id_with_added_voc(token))
if len(ids) > self.max_len:
raise ValueError(
warnings.warn(
"Token indices sequence length is longer than the specified "
"maximum sequence length for this model ({} > {}). Running "
"this sequence through the model will result in indexing "
"errors".format(len(ids), self.max_len))
"errors".format(len(ids), self.max_len), UserWarning)
return ids

# pylint: enable=unused-argument,function-redefined
Expand Down
2 changes: 1 addition & 1 deletion texar/tf/data/tokenizers/xlnet_tokenizer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def setUp(self):
# transformers
self.SAMPLE_VOCAB = maybe_download(
'https://github.com/huggingface/transformers/blob/master/'
'transformers/tests/fixtures/test_sentencepiece.model?raw=true',
'tests/fixtures/test_sentencepiece.model?raw=true',
self.tmp_dir.name)

self.tokenizer = XLNetTokenizer.load(
Expand Down

0 comments on commit 261d550

Please sign in to comment.