Merge pull request #66 from DRMacIver/add-hypothesis

Add Hypothesis based test of chardet
chardet · Sep 19, 2015 · cc9d6d2 · cc9d6d2
2 parents 9e419e9 + c058f52
commit cc9d6d2
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 3 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -7,8 +7,16 @@ python:
   - 3.4
   - pypy
 
+cache:
+    directories:
+        - $HOME/.hypothesis
+
+env:
+  global:
+    - HYPOTHESIS_STORAGE_DIRECTORY=$HOME/.hypothesis
+
 install:
-  - travis_retry pip install python-coveralls nose-cov
+  - travis_retry pip install python-coveralls nose-cov hypothesis
   - pip install .
 
 # Run test

diff --git a/setup.py b/setup.py
@@ -45,6 +45,6 @@ def readme():
                    "Topic :: Text Processing :: Linguistic"],
       packages=find_packages(),
       install_requires=['enum34'] if sys.version_info < (3, 4) else [],
-      test_requires=['nose'],
+      test_requires=['nose', 'hypothesis'],
       entry_points={'console_scripts':
                     ['chardetect = chardet.cli.chardetect:main']})
diff --git a/test.py b/test.py
@@ -7,10 +7,12 @@
 
 from __future__ import with_statement
 
+from hypothesis import given, assume, Settings, Verbosity
+import hypothesis.strategies as st
 from os import listdir
 from os.path import dirname, isdir, join, realpath, relpath, splitext
 
-from nose.tools import eq_
+from nose.tools import eq_, assert_raises
 
 import chardet
 
@@ -56,3 +58,32 @@ def test_encoding_detection():
             if ext not in ['.html', '.txt', '.xml', '.srt']:
                 continue
             yield check_file_encoding, join(path, file_name), encoding
+
+
+class JustALengthIssue(Exception):
+    pass
+
+
+@given(st.text(min_size=1), st.sampled_from([
+           'ascii', 'utf-8', 'utf-16', 'utf-32',
+           'iso-8859-7', 'iso-8859-8', 'windows-1255']),
+       st.randoms(), settings=Settings(max_examples=200))
+def test_never_fails_to_detect_if_there_is_a_valid_encoding(txt, enc, rnd):
+    try:
+        data = txt.encode(enc)
+    except UnicodeEncodeError:
+        assume(False)
+    detected = chardet.detect(data)['encoding']
+    if detected is None:
+        @given(st.text(), settings=Settings(
+            verbosity=Verbosity.quiet, max_shrinks=0,
+            max_examples=50,
+        ), random=rnd)
+        def string_poisons_following_text(suffix):
+            try:
+                extended = (txt + suffix).encode(enc)
+            except UnicodeEncodeError:
+                assume(False)
+            if chardet.detect(extended)['encoding'] is not None:
+                raise JustALengthIssue()
+        assert_raises(JustALengthIssue, string_poisons_following_text)