Skip to content

Commit

Permalink
Merge pull request #66 from DRMacIver/add-hypothesis
Browse files Browse the repository at this point in the history
Add Hypothesis based test of chardet
  • Loading branch information
dan-blanchard committed Sep 19, 2015
2 parents 9e419e9 + c058f52 commit cc9d6d2
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 3 deletions.
10 changes: 9 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,16 @@ python:
- 3.4
- pypy

cache:
directories:
- $HOME/.hypothesis

env:
global:
- HYPOTHESIS_STORAGE_DIRECTORY=$HOME/.hypothesis

install:
- travis_retry pip install python-coveralls nose-cov
- travis_retry pip install python-coveralls nose-cov hypothesis
- pip install .

# Run test
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,6 @@ def readme():
"Topic :: Text Processing :: Linguistic"],
packages=find_packages(),
install_requires=['enum34'] if sys.version_info < (3, 4) else [],
test_requires=['nose'],
test_requires=['nose', 'hypothesis'],
entry_points={'console_scripts':
['chardetect = chardet.cli.chardetect:main']})
33 changes: 32 additions & 1 deletion test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@

from __future__ import with_statement

from hypothesis import given, assume, Settings, Verbosity
import hypothesis.strategies as st
from os import listdir
from os.path import dirname, isdir, join, realpath, relpath, splitext

from nose.tools import eq_
from nose.tools import eq_, assert_raises

import chardet

Expand Down Expand Up @@ -56,3 +58,32 @@ def test_encoding_detection():
if ext not in ['.html', '.txt', '.xml', '.srt']:
continue
yield check_file_encoding, join(path, file_name), encoding


class JustALengthIssue(Exception):
pass


@given(st.text(min_size=1), st.sampled_from([
'ascii', 'utf-8', 'utf-16', 'utf-32',
'iso-8859-7', 'iso-8859-8', 'windows-1255']),
st.randoms(), settings=Settings(max_examples=200))
def test_never_fails_to_detect_if_there_is_a_valid_encoding(txt, enc, rnd):
try:
data = txt.encode(enc)
except UnicodeEncodeError:
assume(False)
detected = chardet.detect(data)['encoding']
if detected is None:
@given(st.text(), settings=Settings(
verbosity=Verbosity.quiet, max_shrinks=0,
max_examples=50,
), random=rnd)
def string_poisons_following_text(suffix):
try:
extended = (txt + suffix).encode(enc)
except UnicodeEncodeError:
assume(False)
if chardet.detect(extended)['encoding'] is not None:
raise JustALengthIssue()
assert_raises(JustALengthIssue, string_poisons_following_text)

0 comments on commit cc9d6d2

Please sign in to comment.