Skip to content

Commit

Permalink
add possibility to do upper case for now
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickvonplaten committed Feb 17, 2021
1 parent cc58398 commit 19c1457
Showing 1 changed file with 6 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,18 @@ def __init__(
eos_token="</s>",
pad_token="<pad>",
unk_token="<unk>",
do_upper_case=False,
**kwargs,
):
super().__init__(
bos_token=bos_token,
eos_token=eos_token,
unk_token=unk_token,
pad_token=pad_token,
do_upper_case=do_upper_case,
**kwargs,
)
self.do_upper_case = do_upper_case

self.encoder = load_json(vocab_file)
self.decoder = {v: k for k, v in self.encoder.items()}
Expand All @@ -103,6 +106,9 @@ def _convert_id_to_token(self, index: int) -> str:
def convert_tokens_to_string(self, tokens: List[str]) -> str:
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string = "".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()

if self.do_upper_case:
out_string = out_string.upper()
return out_string

def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None) -> List[int]:
Expand Down

0 comments on commit 19c1457

Please sign in to comment.