V0.1.0 (#1)

* fix python 2 division * README docs * add automated tests
barrust · Feb 25, 2018 · 1640a90 · 1640a90
1 parent 62b7e62
commit 1640a90
Show file tree

Hide file tree

Showing 8 changed files with 187 additions and 6 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,16 @@
+language: python
+python:
+    - "2.7"
+    - "3.4"
+    - "3.5"
+    - "3.6"
+
+install:
+  - pip install -r requirements/requirements-dev.txt
+
+script:
+    - coverage run --source=spellchecker setup.py test
+
+# commands to run after the tests successfully complete
+after_success:
+  - coveralls
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,10 @@
+# pyspellchecker
+
+## Version 0.1.0
+* Move word frequency to its own class
+* Add basic tests
+* Readme documentation
+
+## Version 0.0.1
+* Initial release using code from Peter Norvig
+* Initial release to pypi
diff --git a/README.md b/README.md
@@ -1,2 +1,92 @@
 # pyspellchecker
-Pure Python Spell Checking based on https://norvig.com/spell-correct.html
+
+Pure Python Spell Checking based on
+[Peter Norvig's](https://norvig.com/spell-correct.html) blog post on setting up
+a simple spell checking algorithm.
+
+It uses a [Levenshtein Distance](https://en.wikipedia.org/wiki/Levenshtein_distance)
+algorithm to find permutations within an edit distance of 2 from the
+original word. It then compares all permutations (insertions, deletions,
+replacements, and transpositions) to known words in a word frequency list.
+Those words that are found more often in the frequency list are `more likely`
+the correct results.
+
+
+## Installation
+
+The easiest method to install is using pip:
+
+``` bash
+pip install pyspellchecker
+```
+
+To install from source:
+``` bash
+git clone https://github.com/barrust/pyspellchecker.git
+cd pyspellchecker
+python setup.py install
+```
+
+As always, I highly recommend using the [Pipenv](https://github.com/pypa/pipenv)
+package to help manage dependencies!
+
+## Quickstart
+
+After installation, using pyspellchecker should be fairly straight forward:
+
+``` python
+from spellchecker import SpellChecker
+
+
+spell = SpellChecker()
+
+# find those words that may be misspelled
+misspelled = spell.unknown(['something', 'is', 'hapenning', 'here'])
+
+for word in misspelled:
+    # Get the one `most likely` answer
+    print(spell.correction(word))
+
+    # Get a list of `likely` options
+    print(spell.candidates(word))
+```
+
+If the Word Frequency list is not to your liking, you can add additional text
+to generate a more appropriate list for your use case.
+
+``` python
+from spellChecker import SpellChecker
+
+spell = SpellChecker()  # loads default word frequency list
+spell.word_frequency.load_text_file('./my_free_text_doc.txt')
+
+# if I just want to make sure some words are not flagged as misspelled
+spell.word_frequency.load_words(['microsoft', 'apple', 'google'])
+spell.known(['microsoft', 'google'])  # will return both now!
+```
+
+More work in storing and loading word frequency lists is planned; stay tuned. 
+
+## Additional Methods
+On-line documentation is in the future; until then you can find SpellChecker
+here:
+
+`correction(word)`: Returns the most probable result for the misspelled word
+
+`candidates(word)`: Returns a set of possible candidates for the misspelled
+word
+
+`known([words])`: Returns those words that are in the word frequency list
+
+`unknown([words])`: Returns those words that are not in the frequency list
+
+`word_probability(word)`: The frequency of the given word out of all words in
+the frequency list
+
+#### The following are less likely to be needed by the user but are available:
+
+`edit_distance_1(word)`: Returns a set of all strings at a Levenshtein Distance
+of one
+
+`edit_distance_2(word)`: Returns a set of all strings at a Levenshtein Distance
+of two
diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt
@@ -0,0 +1,6 @@
+# needed for testing purposes
+pycodestyle
+isort
+astroid
+pylint
+coveralls
diff --git a/spellchecker/__init__.py b/spellchecker/__init__.py
@@ -1,7 +1,7 @@
 ''' SpellChecker Module '''
-from . spellchecker import SpellChecker
+from . spellchecker import SpellChecker, WordFrequency
 from . info import (__author__, __maintainer__, __email__, __license__,
                     __version__, __credits__, __url__, __bugtrack_url__)
 
 
-__all__ = ['SpellChecker']
+__all__ = ['SpellChecker', 'WordFrequency']
diff --git a/spellchecker/info.py b/spellchecker/info.py
@@ -5,7 +5,7 @@
 __maintainer__ = 'Tyler Barrus'
 __email__ = 'barrust@gmail.com'
 __license__ = 'MIT'
-__version__ = '0.0.1'
+__version__ = '0.1.0'
 __credits__ = ['Peter Norvig']
 __url__ = 'https://github.com/barrust/pyspellchecker'
 __bugtrack_url__ = '{0}/issues'.format(__url__)
diff --git a/spellchecker/spellchecker.py b/spellchecker/spellchecker.py
@@ -1,6 +1,6 @@
 ''' SpellChecker Module; simple, intuitive spell checker based on the post by
     Peter Norvig. See: https://norvig.com/spell-correct.html '''
-from __future__ import absolute_import
+from __future__ import absolute_import, division
 
 import os
 import re
@@ -43,7 +43,7 @@ def candidates(self, word):
                 self.known(self.edit_distance_2(word)) or [word])
 
     def known(self, words):
-        "The subset of `words` that appear in the dictionary of WORDS."
+        "The subset of `words` that appear in the dictionary of words."
         return set(w for w in words if w in self.word_frequency.dictionary)
 
     def unknown(self, words):

diff --git a/tests/spellchecker_test.py b/tests/spellchecker_test.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+'''
+Unittest class
+'''
+import unittest
+
+from spellchecker import SpellChecker
+
+
+class TestSpellChecker(unittest.TestCase):
+    def test_correction(self):
+        ''' test spell checker corrections '''
+        spell = SpellChecker()
+        self.assertEqual(spell.correction('ths'), 'the')
+        self.assertEqual(spell.correction('ergo'), 'ergot')
+        self.assertEqual(spell.correction('this'), 'this')
+
+    def test_candidates(self):
+        ''' test spell checker candidates '''
+        spell = SpellChecker()
+        self.assertEqual(spell.candidates('ths'), {'tis', 'tss', 'th', 'thus', 'the', 'this', 'thy'})
+        self.assertEqual(spell.candidates('the'), {'the'})
+
+    def test_words(self):
+        spell = SpellChecker()
+        self.assertEqual(spell.words('This is a test of this'), ['this', 'is', 'a', 'test', 'of', 'this'])
+
+    def test_word_frequency(self):
+        spell = SpellChecker()
+        # if the default load changes so will this...
+        self.assertEqual(spell.word_frequency.dictionary['the'], 79809)
+
+    def test_word_probability(self):
+        spell = SpellChecker()
+        # if the default load changes so will this...
+        self.assertEqual(spell.word_probability('the'), 0.07154004401278254)
+
+    def test_word_known(self):
+        ''' test if the word is a `known` word or not '''
+        spell = SpellChecker()
+        self.assertEqual(spell.known(['this']), {'this'})
+        self.assertEqual(spell.known(['sherlock']), {'sherlock'})
+        self.assertEqual(spell.known(['holmes']), {'holmes'})
+        self.assertEqual(spell.known(['known']), {'known'})
+
+        self.assertEqual(spell.known(['foobar']), set())
+        self.assertEqual(spell.known(['ths']), set())
+        self.assertEqual(spell.known(['ergo']), set())
+
+    def test_unknown_words(self):
+        spell = SpellChecker()
+        self.assertEqual(spell.unknown(['this']), set())
+        self.assertEqual(spell.unknown(['sherlock']), set())
+        self.assertEqual(spell.unknown(['holmes']), set())
+        self.assertEqual(spell.unknown(['known']), set())
+
+        self.assertEqual(spell.unknown(['foobar']), {'foobar'})
+        self.assertEqual(spell.unknown(['ths']), {'ths'})
+        self.assertEqual(spell.unknown(['ergo']), {'ergo'})