Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix export_for_training to encode utf8 #1731

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
23a0e34
Remove the sentiment comparison method
gunthercox Mar 30, 2019
94735c3
Simplify imports
gunthercox Mar 30, 2019
ebcfffb
Replace synset distance with spacy similarity comparison
gunthercox Mar 30, 2019
d57e221
Update Jaccard similarity to use Spacy
gunthercox Mar 30, 2019
220382c
Make sure that test requirements are downloaded
gunthercox Mar 30, 2019
ef9cd39
Fix tests for getting language classes
gunthercox Apr 4, 2019
8d92fda
Switch from NLTK to Spacy for indexing
gunthercox Mar 31, 2019
9e2dc73
Remove NLTK_DATA from passenv
gunthercox Mar 31, 2019
795c439
Remove initialization methods
gunthercox Apr 1, 2019
c979e20
Update tests
gunthercox Apr 6, 2019
8efc371
Update docstrings
gunthercox Apr 6, 2019
f002e39
Remove multiprocessing to prevent CI errors on Travis
gunthercox Apr 6, 2019
21dfaf0
Allow language paramters to be set for comparison methods
gunthercox Apr 6, 2019
4580dfc
Read version from config instead of using import
gunthercox Apr 10, 2019
7d0755f
Fix indendation in docs
jghyllebert Apr 8, 2019
11cc9cd
Update version to 1.1.0 for alpha release on master branch
gunthercox Apr 27, 2019
1a03dcb
Update sqlalchemy to version >= 1.3
gunthercox Apr 27, 2019
a338315
Try locking issues after 300 days instead of 365
gunthercox May 14, 2019
ccd50f2
Remove pymongo from main dependency list
gunthercox May 17, 2019
5c46de1
Add a second search algorithm for searching the text field
gunthercox May 17, 2019
215e5f1
Normalize sqlite database extensions
gunthercox May 17, 2019
79221d4
Add lowercase tagger and rename indexing method to be more accurate
gunthercox May 17, 2019
009382d
Simplify method signatures
gunthercox May 18, 2019
c429fa5
Save language attribute on lowercase tagger
gunthercox May 18, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/lock.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Configuration for Lock Threads - https://github.com/dessant/lock-threads

# Number of days of inactivity before a closed issue or pull request is locked
daysUntilLock: 365
daysUntilLock: 300

# Skip issues and pull requests created before a given timestamp. Timestamp must
# follow ISO 8601 (`YYYY-MM-DD`). Set to `false` to disable
Expand Down
18 changes: 2 additions & 16 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
bin
build
dist
include
lib
venv
.env
.out
.tox
.coverage
*.pyc
Expand All @@ -15,17 +15,3 @@ venv

# Database files
*.sqlite3
*.db

# IntelliJ
.idea
*.iml

examples/settings.py
examples/ubuntu_dialogs*
sentence_tokenizer.pickle
.env
.out

# ignore Ubuntu corpus files
data
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
include LICENSE
include README.md
include requirements.txt
include setup.cfg

global-exclude __pycache__
global-exclude *.py[co]
Expand Down
4 changes: 0 additions & 4 deletions chatterbot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@
"""
from .chatterbot import ChatBot

__version__ = '1.0.5'
__author__ = 'Gunther Cox'
__email__ = 'gunthercx@gmail.com'
__url__ = 'https://github.com/gunthercox/ChatterBot'

__all__ = (
'ChatBot',
Expand Down
24 changes: 7 additions & 17 deletions chatterbot/__main__.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,20 @@
import importlib
import configparser
import sys
import os


def get_chatterbot_version():
chatterbot = importlib.import_module('chatterbot')
return chatterbot.__version__
config = configparser.ConfigParser()

current_directory = os.path.dirname(os.path.abspath(__file__))
parent_directory = os.path.abspath(os.path.join(current_directory, os.pardir))
config_file_path = os.path.join(parent_directory, 'setup.cfg')

def get_nltk_data_directories():
import nltk.data
config.read(config_file_path)

data_directories = []

# Find each data directory in the NLTK path that has content
for path in nltk.data.path:
if os.path.exists(path):
if os.listdir(path):
data_directories.append(path)

return os.linesep.join(data_directories)
return config['chatterbot']['version']


if __name__ == '__main__':
if '--version' in sys.argv:
print(get_chatterbot_version())

if 'list_nltk_data' in sys.argv:
print(get_nltk_data_directories())
45 changes: 11 additions & 34 deletions chatterbot/chatterbot.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from chatterbot.storage import StorageAdapter
from chatterbot.logic import LogicAdapter
from chatterbot.search import IndexedTextSearch
from chatterbot.search import TextSearch, IndexedTextSearch
from chatterbot import utils


Expand All @@ -13,12 +13,6 @@ class ChatBot(object):
def __init__(self, name, **kwargs):
self.name = name

primary_search_algorithm = IndexedTextSearch(self, **kwargs)

self.search_algorithms = {
primary_search_algorithm.name: primary_search_algorithm
}

storage_adapter = kwargs.get('storage_adapter', 'chatterbot.storage.SQLStorageAdapter')

logic_adapters = kwargs.get('logic_adapters', [
Expand All @@ -33,6 +27,14 @@ def __init__(self, name, **kwargs):

self.storage = utils.initialize_class(storage_adapter, **kwargs)

primary_search_algorithm = IndexedTextSearch(self, **kwargs)
text_search_algorithm = TextSearch(self, **kwargs)

self.search_algorithms = {
primary_search_algorithm.name: primary_search_algorithm,
text_search_algorithm.name: text_search_algorithm
}

for adapter in logic_adapters:
utils.validate_adapter_class(adapter, LogicAdapter)
logic_adapter = utils.initialize_class(adapter, self, **kwargs)
Expand All @@ -54,31 +56,6 @@ def __init__(self, name, **kwargs):
# Allow the bot to save input it receives so that it can learn
self.read_only = kwargs.get('read_only', False)

if kwargs.get('initialize', True):
self.initialize()

def get_initialization_functions(self):
initialization_functions = set()

initialization_functions.update(utils.get_initialization_functions(
self, 'storage.tagger'
))

for search_algorithm in self.search_algorithms.values():
search_algorithm_functions = utils.get_initialization_functions(
search_algorithm, 'compare_statements'
)
initialization_functions.update(search_algorithm_functions)

return initialization_functions

def initialize(self):
"""
Do any work that needs to be done before the chatbot can process responses.
"""
for function in self.get_initialization_functions():
function()

def get_response(self, statement=None, **kwargs):
"""
Return the bot's response based on the input.
Expand Down Expand Up @@ -131,10 +108,10 @@ def get_response(self, statement=None, **kwargs):
# Make sure the input statement has its search text saved

if not input_statement.search_text:
input_statement.search_text = self.storage.tagger.get_bigram_pair_string(input_statement.text)
input_statement.search_text = self.storage.tagger.get_text_index_string(input_statement.text)

if not input_statement.search_in_response_to and input_statement.in_response_to:
input_statement.search_in_response_to = self.storage.tagger.get_bigram_pair_string(input_statement.in_response_to)
input_statement.search_in_response_to = self.storage.tagger.get_text_index_string(input_statement.in_response_to)

response = self.generate_response(input_statement, additional_response_selection_parameters)

Expand Down
Loading