Skip to content

Commit

Permalink
Merge 092e1b4 into b5f38d9
Browse files Browse the repository at this point in the history
  • Loading branch information
barrust committed Oct 3, 2018
2 parents b5f38d9 + 092e1b4 commit a416f4b
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 2 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,11 @@
# pyspellchecker

## Version 0.1.4 (unreleased)
* Remove words based on threshold
* Add ability to iterate over words (keys) in the dictionary
* Add setting to to reduce the edit distance check
[see PR #17](https://github.com/barrust/pyspellchecker/pull/17) Thanks [@mrjamesriley](https://github.com/mrjamesriley)

## Version 0.1.3
* Better handle punctuation and numbers as the word to check

Expand Down
4 changes: 2 additions & 2 deletions spellchecker/__init__.py
@@ -1,7 +1,7 @@
''' SpellChecker Module '''
from . spellchecker import SpellChecker, WordFrequency
from . info import (__author__, __maintainer__, __email__, __license__,
__version__, __credits__, __url__, __bugtrack_url__)
from . info import (__author__, __maintainer__, __email__, __license__, # noqa: F401
__version__, __credits__, __url__, __bugtrack_url__) # noqa: F401


__all__ = ['SpellChecker', 'WordFrequency']
33 changes: 33 additions & 0 deletions spellchecker/spellchecker.py
Expand Up @@ -169,6 +169,7 @@ def _check_if_should_check(word):

return True


class WordFrequency(object):
''' Store the `dictionary` as a word frequency list while allowing for
different methods to load the data and update over time '''
Expand Down Expand Up @@ -221,6 +222,26 @@ def letters(self):
Not settable '''
return self._letters

def keys(self):
''' Iterator over the key of the dictionary
Yields:
str: The next key in the dictionary
Note:
This is the same as `spellchecker.words()` '''
for key in self._dictionary.keys():
yield key

def words(self):
''' Iterator over the words in the dictionary
Yields:
str: The next word in the dictionary
Note:
This is the same as `spellchecker.keys()` '''
for word in self._dictionary.keys():
yield word

def load_dictionary(self, filename):
''' Load in a pre-built word frequency list
Expand Down Expand Up @@ -284,6 +305,18 @@ def remove(self, word):
self._dictionary.pop(word.lower())
self._update_dictionary()

def remove_by_threshold(self, threshold=5):
''' Remove all words at, or below, the provided threshold
Args:
threshold (int): The threshold at which a word is to be \
removed '''
keys = [x.lower() for x in self._dictionary.keys()]
for key in keys:
if self._dictionary[key] <= threshold:
self._dictionary.pop(key)
self._update_dictionary()

def _update_dictionary(self):
''' Update the word frequency object '''
self._total_words = sum(self._dictionary.values())
Expand Down
25 changes: 25 additions & 0 deletions tests/spellchecker_test.py
Expand Up @@ -156,9 +156,34 @@ def test_remove_word(self):
spell.word_frequency.remove('teh')
self.assertEqual(spell['teh'], 0)

def test_remove_by_threshold(self):
''' test removing everything below a certain threshold '''
spell = SpellChecker()
cnt = 0
for key in spell.word_frequency.keys():
if spell.word_frequency[key] < 7:
cnt += 1
self.assertGreater(cnt, 0)
spell.word_frequency.remove_by_threshold(7)
cnt = 0
for key in spell.word_frequency.words(): # synonym for keys
if spell.word_frequency[key] < 7:
cnt += 1
self.assertEqual(cnt, 0)

def test_add_word(self):
''' test adding a word '''
spell = SpellChecker()
self.assertEqual(spell['meh'], 0)
spell.word_frequency.add('meh')
self.assertEqual(spell['meh'], 1)

def test_checking_odd_word(self):
''' test checking a word that is really a number '''
spell = SpellChecker()
self.assertEqual(spell.edit_distance_1('12345'), {'12345'})

def test_unique_words(self):
''' test the unique word count '''
spell = SpellChecker()
self.assertEqual(spell.word_frequency.unique_words, len(list(spell.word_frequency.keys())))

0 comments on commit a416f4b

Please sign in to comment.