Skip to content

Commit

Permalink
Assistant token counter
Browse files Browse the repository at this point in the history
  • Loading branch information
artitw committed Sep 23, 2023
1 parent f0ca8cc commit dc6231b
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="text2text",
version="1.3.1",
version="1.3.2",
author="artitw",
author_email="artitw@gmail.com",
description="Text2Text: Crosslingual NLP/G toolkit",
Expand Down
2 changes: 1 addition & 1 deletion text2text/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def num_tokens(self, input_lines, src_lang='en'):
df = self.preprocess(input_lines, src_lang)
tok = self.__class__.tokenizer
input_ids = tok(df["input_line"].tolist(), return_tensors="pt", padding=True).input_ids
return len(input_ids[0])
return [len(x) for x in input_ids]

def transform(self, input_lines, src_lang='en', retriever=None, **kwargs):
df = self.preprocess(input_lines, src_lang, retriever, **kwargs)
Expand Down

0 comments on commit dc6231b

Please sign in to comment.