From 48391541457807e17c3bf03eea3d51e39d133ed9 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 15 Jan 2020 11:16:01 +0100 Subject: [PATCH] Remove kwargs mapping on Tokenizer decode/decode_batch as their is only one possible arg. This is suggested by the current issue https://github.com/huggingface/tokenizers/issues/54#issuecomment-574104841. kwargs cannot be called as positional argument, they have to be named one, replacing kwargs with the actual skip_special_tokens allows both (named and positional) syntax. Signed-off-by: Morgan Funtowicz --- bindings/python/src/tokenizer.rs | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/bindings/python/src/tokenizer.rs b/bindings/python/src/tokenizer.rs index 4acb7f9de..4aafb6275 100644 --- a/bindings/python/src/tokenizer.rs +++ b/bindings/python/src/tokenizer.rs @@ -189,34 +189,22 @@ impl Tokenizer { .into() } - #[args(kwargs = "**")] - fn decode(&self, ids: Vec, kwargs: Option<&PyDict>) -> PyResult { - let mut skip_special_tokens = true; - - if let Some(kwargs) = kwargs { - if let Some(skip) = kwargs.get_item("skip_special_tokens") { - skip_special_tokens = skip.extract()?; - } - } - - ToPyResult(self.tokenizer.decode(ids, skip_special_tokens)).into() + fn decode(&self, ids: Vec, skip_special_tokens: Option) -> PyResult { + ToPyResult(self.tokenizer.decode( + ids, + skip_special_tokens.unwrap_or(true), + )).into() } - #[args(kwargs = "**")] fn decode_batch( &self, sentences: Vec>, - kwargs: Option<&PyDict>, + skip_special_tokens: Option, ) -> PyResult> { - let mut skip_special_tokens = true; - - if let Some(kwargs) = kwargs { - if let Some(skip) = kwargs.get_item("skip_special_tokens") { - skip_special_tokens = skip.extract()?; - } - } - - ToPyResult(self.tokenizer.decode_batch(sentences, skip_special_tokens)).into() + ToPyResult(self.tokenizer.decode_batch( + sentences, + skip_special_tokens.unwrap_or(true), + )).into() } fn token_to_id(&self, token: &str) -> Option {