fixed qa bug caused by paragraph_tokenize

amaiya · Jul 15, 2022 · 1daacbe · 1daacbe
1 parent 5eba3ec
commit 1daacbe
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,7 +6,7 @@ Most recent releases are shown at the top. Each release shows:
 - **Changed**: Additional parameters, changes to inputs or outputs, etc
 - **Fixed**: Bug fixes that don't change documented behaviour
 
-## 0.31.3 (TBD)
+## 0.31.3 (2022-07-15)
 
 ### new:
 - N/A
@@ -15,7 +15,7 @@ Most recent releases are shown at the top. Each release shows:
 - added `alnum` check and period check to `KeywordExtractor`
 
 ### fixed:
-- N/A
+- fixed bug in `text.qa.core` caused by previous refactoring of `paragraph_tokenize` and `tokenize`
 
 
 ## 0.31.1 (2022-05-17)

diff --git a/ktrain/text/textutils.py b/ktrain/text/textutils.py
@@ -438,15 +438,17 @@ def paragraph_tokenize(
         elif join_sentences and not join_tokens:
             sents = [item for sublist in sents for item in sublist]
         paragraphs.append(sents)
-    paragraphs = paragraphs[0] if len(paragraphs) == 1 else paragraphs
+    # 20220715: moved to tokenize due to text/qa/core.py usage
+    # paragraphs = paragraphs[0] if len(paragraphs) == 1 else paragraphs
     return paragraphs
 
 
 def tokenize(s, join_tokens=False, join_sentences=True, join_char=" "):
     s = s.replace("\n", " ")
-    return paragraph_tokenize(
+    paragraphs = paragraph_tokenize(
         s, join_tokens=join_tokens, join_sentences=join_sentences, join_char=join_char
     )
+    return paragraphs[0] if len(paragraphs) == 1 else paragraphs
 
 
 def extract_noun_phrases(text):