-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Convert clean_whitespace utility into a preprocessor
- Loading branch information
1 parent
81dcf90
commit 5ed7144
Showing
6 changed files
with
82 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
""" | ||
Statement pre-processors. | ||
""" | ||
|
||
def clean_whitespace(chatbot, statement): | ||
""" | ||
Remove any extra whitespace and line breaks as needed. | ||
""" | ||
import re | ||
|
||
# Replace linebreaks and tabs with spaces | ||
statement.text = statement.text.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ') | ||
|
||
# Remove any leeding or trailing whitespace | ||
statement.text = statement.text.strip() | ||
|
||
# Remove consecutive spaces | ||
statement.text = re.sub(' +', ' ', statement.text) | ||
|
||
return statement |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# -*- coding: utf-8 -*- | ||
from .base_case import ChatBotTestCase | ||
from chatterbot.conversation import Statement | ||
from chatterbot import preprocessors | ||
|
||
|
||
class PreprocessorIntegrationTestCase(ChatBotTestCase): | ||
""" | ||
Make sure that preprocessors work with the chat bot. | ||
""" | ||
|
||
def test_clean_whitespace(self): | ||
self.chatbot.preprocessors = [preprocessors.clean_whitespace] | ||
response = self.chatbot.get_response('Hello, how are you?') | ||
|
||
self.assertEqual(response.text, 'Hello, how are you?') | ||
|
||
|
||
class CleanWhitespacePreprocessorTestCase(ChatBotTestCase): | ||
""" | ||
Make sure that ChatterBot's whitespace removing preprocessor works as expected. | ||
""" | ||
|
||
def test_clean_whitespace(self): | ||
statement = Statement('\tThe quick \nbrown fox \rjumps over \vthe \alazy \fdog\\.') | ||
cleaned = preprocessors.clean_whitespace(self.chatbot, statement) | ||
normal_text = 'The quick brown fox jumps over \vthe \alazy \fdog\\.' | ||
|
||
self.assertEqual(cleaned.text, normal_text) | ||
|
||
def test_leading_or_trailing_whitespace_removed(self): | ||
statement = Statement(' The quick brown fox jumps over the lazy dog. ') | ||
cleaned = preprocessors.clean_whitespace(self.chatbot, statement) | ||
normal_text = 'The quick brown fox jumps over the lazy dog.' | ||
|
||
self.assertEqual(cleaned.text, normal_text) | ||
|
||
def test_consecutive_spaces_removed(self): | ||
statement = Statement('The quick brown fox jumps over the lazy dog.') | ||
cleaned = preprocessors.clean_whitespace(self.chatbot, statement) | ||
normal_text = 'The quick brown fox jumps over the lazy dog.' | ||
|
||
self.assertEqual(cleaned.text, normal_text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters