# Import Statements + Notebook Setup 

In [1]:
# Requirements:
# pip install ipytest
# pip install hypothesis
# pip install spacy
# Follow spacy instructions to download en_core_web_sm or just don't run the final example

import pytest
import ipytest

ipytest.config(rewrite_asserts=True, magics=True)
__file__ = "HypothesisDemo.ipynb"

# Simple Classes

In [2]:
"""
Some objects to do determinstic tokenziing of arbitrary texts
"""

from collections import namedtuple
from typing import List

Token = namedtuple("Token", "string span")


class Tokenizer:
    name: str = ""

    def __init__(self) -> None:
        pass

    def tokenize(self, string: str) -> List[Token]:
        raise NotImplementedError

    def __call__(self, string: str) -> List[Token]:
        return self.tokenize(string)
    
class WhiteSpaceTokenizer(Tokenizer):
    name = "white_space_tokenizer"
    
    def tokenize(self, string: str) -> List[Token]:
        substrings = string.split()[::-1]
        tokens = []
        left_index = 0
        while substrings:
            substring = substrings.pop()
            span_left = string.index(substring, left_index)
            span_right = span_left + len(substring)
            tokens.append(Token(substring, (span_left, span_right)))
            left_index = span_right + 1
        return tokens

class AsciiCharacterTokenizer(Tokenizer):
    name = "ascii_character_tokenizer"

    def tokenize(self, string: str) -> List[Token]:
        return [
            Token(char, (i, i + 1))
            for i, char in enumerate(self._string_to_ascii(string))
        ]

    @staticmethod
    def _string_to_ascii(string: str) -> List[str]:
        return [char if ord(char) < 128 else "<NON-ASCII>" for char in string]

# Simple Tests

In [3]:
%%run_pytest[clean] -qq

def test_white_space_tokenizer_returns_empty_string():
    tokenizer = WhiteSpaceTokenizer()
    assert tokenizer("") == []

def test_white_space_tokenizer_returns_expected_output():
    tokenizer = WhiteSpaceTokenizer()
    test_string_0 = "♥O◘♦♥O◘♦ nae nae"
    assert tokenizer(test_string_0) == [
        Token("♥O◘♦♥O◘♦", (0, 8)),
        Token("nae", (9, 12)),
        Token("nae", (13, 16)),
    ]

def test_ascii_character_tokenizer_returns_expected_output():
    tokenizer = AsciiCharacterTokenizer()
    test_string_0 = "♥O◘♦♥O◘♦ nae nae"
    assert tokenizer(test_string_0) == [Token(string='<NON-ASCII>', span=(0, 1)),
                                        Token(string='O', span=(1, 2)),
                                        Token(string='<NON-ASCII>', span=(2, 3)),
                                        Token(string='<NON-ASCII>', span=(3, 4)),
                                        Token(string='<NON-ASCII>', span=(4, 5)),
                                        Token(string='O', span=(5, 6)),
                                        Token(string='<NON-ASCII>', span=(6, 7)),
                                        Token(string='<NON-ASCII>', span=(7, 8)),
                                        Token(string=' ', span=(8, 9)),
                                        Token(string='n', span=(9, 10)),
                                        Token(string='a', span=(10, 11)),
                                        Token(string='e', span=(11, 12)),
                                        Token(string=' ', span=(12, 13)),
                                        Token(string='n', span=(13, 14)),
                                        Token(string='a', span=(14, 15)),
                                        Token(string='e', span=(15, 16))
                                       ]
# This final test hasn't been written yet. Exercise: remove the decorator and write a 
# passing test.
@pytest.mark.xfail
def test_ascii_character_tokenizer_string_to_ascii():
    tokenizer = AsciiCharacterTokenizer()
    assert tokenizer._string_to_ascii("♥O◘♦♥O◘♦") == []

...x                                                                                                                                                                                                                                                                                                     [100%]


More than likely, you just ran `tokenizer._string_to_ascii("♥O◘♦♥O◘♦")` and then pasted the output into the test. That is a bit circular, isn't it? We're testing our code with "expected output" that is simply the result of our code being executed in the first place... 

We probably shouldn't have much faith in this.

# What if we change the code being tested?

In [4]:
class AsciiCharacterTokenizer(Tokenizer):
    name = "ascii_character_tokenizer"

    def tokenize(self, string: str) -> List[Token]:
        return [
            Token(char.lower(), (i, i + 1))
            for i, char in enumerate(self._string_to_ascii(string))
        ]
    @staticmethod
    def _string_to_ascii(string: str) -> List[str]:
        return [char if ord(char) < 128 else "<NON-ASCII>" for char in string]

In [5]:
%%run_pytest[clean] -qq

def test_ascii_character_tokenizer_returns_expected_output():
    tokenizer = AsciiCharacterTokenizer()
    test_string_0 = "♥O◘♦♥O◘♦ nae nae"
    assert tokenizer(test_string_0) == [Token(string='<NON-ASCII>', span=(0, 1)),
                                        Token(string='O', span=(1, 2)),
                                        Token(string='<NON-ASCII>', span=(2, 3)),
                                        Token(string='<NON-ASCII>', span=(3, 4)),
                                        Token(string='<NON-ASCII>', span=(4, 5)),
                                        Token(string='O', span=(5, 6)),
                                        Token(string='<NON-ASCII>', span=(6, 7)),
                                        Token(string='<NON-ASCII>', span=(7, 8)),
                                        Token(string=' ', span=(8, 9)),
                                        Token(string='n', span=(9, 10)),
                                        Token(string='a', span=(10, 11)),
                                        Token(string='e', span=(11, 12)),
                                        Token(string=' ', span=(12, 13)),
                                        Token(string='n', span=(13, 14)),
                                        Token(string='a', span=(14, 15)),
                                        Token(string='e', span=(15, 16))
                                       ]

F                                                                                                                                                                                                                                                                                                        [100%]
____________________________________________________________________________________________________________________________ test_ascii_character_tokenizer_returns_expected_output ____________________________________________________________________________________________________________________________

    def test_ascii_character_tokenizer_returns_expected_output():
        tokenizer = AsciiCharacterTokenizer()
        test_string_0 = "♥O◘♦♥O◘♦ nae nae"
>       assert tokenizer(test_string_0) == [Token(string='<NON-ASCII>', span=(0, 1)),
                                            Token(string='O', span=(1, 2)),
                                            Token(string='<NON-ASCII>', s

Now we have a failing test because of a simple change and we need to manually change our test case to capture the change in source code. This is fine, but it feels like tests written this way aren't really helping us much.

(Hint: they feel like they're not helpful because they're not really testing properties of our code, just arbitrary output.)

# What if we add a new object?

In [6]:
class AsciiCharacterTokenizer(Tokenizer):
    name = "ascii_character_tokenizer"

    def tokenize(self, string: str) -> List[Token]:
        return [
            Token(char, (i, i + 1))
            for i, char in enumerate(self._string_to_ascii(string))
        ]

class AsciiLowercaseCharacterTokenizer(Tokenizer):
    name = "ascii_lowercase_character_tokenizer"

    def tokenize(self, string: str) -> List[Token]:
        return [
            Token(char.lower(), (i, i + 1))
            for i, char in enumerate(self._string_to_ascii(string))
        ]

In [7]:
%%run_pytest[clean] -qq

def test_ascii_lowercase_character_tokenizer_returns_expected_output():
    tokenizer = AsciiLowercaseCharacterTokenizer()
    test_string_0 = "♥O◘♦♥O◘♦ nae nae"
    assert tokenizer(test_string_0) == [Token(string='<NON-ASCII>', span=(0, 1)),
                                        Token(string='O', span=(1, 2)),
                                        Token(string='<NON-ASCII>', span=(2, 3)),
                                        Token(string='<NON-ASCII>', span=(3, 4)),
                                        Token(string='<NON-ASCII>', span=(4, 5)),
                                        Token(string='O', span=(5, 6)),
                                        Token(string='<NON-ASCII>', span=(6, 7)),
                                        Token(string='<NON-ASCII>', span=(7, 8)),
                                        Token(string=' ', span=(8, 9)),
                                        Token(string='n', span=(9, 10)),
                                        Token(string='a', span=(10, 11)),
                                        Token(string='e', span=(11, 12)),
                                        Token(string=' ', span=(12, 13)),
                                        Token(string='n', span=(13, 14)),
                                        Token(string='a', span=(14, 15)),
                                        Token(string='e', span=(15, 16))
                                       ]

F                                                                                                                                                                                                                                                                                                        [100%]
_______________________________________________________________________________________________________________________ test_ascii_lowercase_character_tokenizer_returns_expected_output _______________________________________________________________________________________________________________________

    def test_ascii_lowercase_character_tokenizer_returns_expected_output():
        tokenizer = AsciiLowercaseCharacterTokenizer()
        test_string_0 = "♥O◘♦♥O◘♦ nae nae"
>       assert tokenizer(test_string_0) == [Token(string='<NON-ASCII>', span=(0, 1)),
                                            Token(string='O', span=(1, 2)),
                                            Token(stri

Same problem as above. We tediously write new test cases for new objects.

# So

**We've created some very simple objects and tested them with simple expected outputs.**

##### Should we have faith in these tests? 

  - Well, we're testing with a few toy examples for which we already knew the correct answer.
  

##### What if we change the behavior of these objects?

  

  - Because we're testing with very rigid "expected outputs," every time we change something minor in our source code, we have to update our test cases. 
  - At that point, are our test cases even *meaningful*?

##### What if we add new objects?

  - We have to figure out what the expected output for these new objects and add tests for them accordingly.
  
 

# Property-based Testing

**Instead of checking that functions and methods produce expected outputs, property-based tests check that they satisfy expected properties.**

**A property-based test is a formal way of asserting "given some input, the output should have these observable properties." Therefore, property-based testing requires some *generators* of what inputs to functions may look like.**

# Simple properties

In [8]:
# A simple generator for strings:
import random
import string

def generate_string(len: int = 1000):
    characters = string.ascii_letters + " " + string.digits + string.punctuation
    return "".join(random.choice(characters) for _ in range(len))

generate_string(100)

'q5z;O"gA7:+sX#;p{%8`w/g\\`g7Lz2%L~ [Y(`n+fc>{|$W! I26o3>%|TK7zmT*c_^-\']n_m7HKOUjNc\\"J*p+:l@nRo:v9N&/Y'

In [9]:
%%run_pytest[clean] -qq

# Here we have a test which uses a generated test case. It fails *some* of the time, but
# usually it doesn't. This isn't good enough.
def test_white_space_tokenizer_basic_properties():
    tokenizer = WhiteSpaceTokenizer()
    string = generate_string()
    tokens = tokenizer(string)
    # Now, what are some properties we might want to test?
    
#     # When we concatenate all of our tokens back together we should have a same length string
    assert len(" ".join([token.string for token in tokens])) == len(string)
    
#     # For that matter, it should be the exact same string
    assert " ".join([token.string for token in tokens]) == string
    
    # For each token, the span should be able to retrieve the original substring
    for token in tokens:
        assert string[token.span[0]:token.span[1]] == token.string

.                                                                                                                                                                                                                                                                                                        [100%]


## Problem solved?

**No**

# Hypothesis

**Hypothesis is a library that fundamentally does two things:**

- It helps us write generators for data of different types

- It applies those generators to our test cases and tracks the results


https://hypothesis.readthedocs.io/

# Generating Data With Hypothesis

## Basic Types

In [10]:
from hypothesis import strategies as st

In [11]:
# Hypothesis has built in types
text = st.text()
integer = st.integers()
date = st.datetimes()

In [12]:
# Great! So now we have random examples of text, integers, and dates?
print(type(text))
assert type(text) == type(integer) == type(date)

hypothesis.searchstrategy.lazy.LazyStrategy


In [13]:
# These are customizable *strategies* for generating data:
import datetime

assert type(text.example()) == str
assert type(integer.example()) == int
assert type(date.example()) == datetime.datetime

In [14]:
positive_integers = st.integers(min_value=0)
for _ in range(5):
    print(positive_integers.example())

0
1579964525
104
14220
2023257522


## Types of Type with Type Other Than Basic

**Typically, we want to write tests to check properties of objects that consume more complicated data. In our case, we might want a "document" data type that more closely resembles the type of text we might see.**

In [15]:
@st.composite # "The composite decorator works by converting a function that returns one example into a function that returns a strategy that produces such examples"
def document(draw):
    document = ""
    for _ in range(draw(st.integers(1, 100))):
        document += draw(st.text())
        document += " "
    return document.strip()

In [16]:
doc_strategy = document()

for _ in range(5):
    print(doc_strategy.example()[:100])

򚿬 񹛯  0𔨰 􅰡&* 񕠀񬫣🷭"򧻞  󣱂򙍅  )# 򈣛󔸄    -򓎕𩸌" 򁪳󹶍 0󔭑𽹭 񼙂򿽎
.󭓵񤤶񰛶󯅥 􁕓
䪖 𗰋򂫵 󧷔𱢙򈔑񻢜򊜎򌰴 򯇐# 𨄶󒪲󸛁𿎡򁊗󁟌  񼤫 (򪒗 𦚭,𬋤  𛨜$򛣭!)󐂀'񚛭 򣢨򉑄񖈴󤇃 𪲉𠦂񼚯 󗜐󿒹  ƍ0󯴂񮋊 򢰓󩯇򅉉򣊉%󹥳򱥻$ $
񴗊𠘴 񚜃  񖦫􃝔򫴔+%#򟍠򝞺凮 𲮂󰣁󈶳򂝐񞿒񱉟􆃸򗯓􌣀񥴃 򕁩	󖮎𳁎񣱗󄓅𨝳􋖹􋙾􃓪#򼡡򀊫񀀕/   񡎁񄡾
   '+ !$ 򒶃񺗰  *𿐵 𯉞򎗳񸃆!
񵓨󈟫󫖰𢗴 󚠟򅂝!    񐿻򠊢𪏣 򄰥𷍎+ 򅽠򄫗 #󮩫񛙁  𠤂&𣍔컂񈕑𔁓 
򆔷



# Writing tests with Hypothesis

In [17]:
from hypothesis import given

In [18]:
%%run_pytest[clean] -qq

@pytest.mark.xfail
@given(doc_strategy)
def test_white_space_tokenizer_basic_properties(string):
    tokenizer = WhiteSpaceTokenizer()
    tokens = tokenizer(string)
    # Now, what are some properties we might want to test?
    
    # When we concatenate all of our tokens back together we should have a same length string
    assert len(" ".join([token.string for token in tokens])) == len(string)
    
    # For that matter, it should be the exact same string
    assert " ".join([token.string for token in tokens]) == string
    
    # For each token, the span should be able to retrieve the original substring
    for token in tokens:
        assert string[token.span[0]:token.span[1]] == token.string

x                                                                                                                                                                                                                                                                                                        [100%]


In [19]:
# Let's imagine we want this object to raise an exception if passed an empty string:

class WhiteSpaceTokenizer(Tokenizer):
    name = "white_space_tokenizer"

    def tokenize(self, string: str) -> List[Token]:
        if not string:
            raise ValueError("Epstein didn't kill himself!")
        substrings = string.split()[::-1]
        tokens = []
        left_index = 0
        while substrings:
            substring = substrings.pop()
            span_left = string.index(substring, left_index)
            span_right = span_left + len(substring)
            tokens.append(Token(substring, (span_left, span_right)))
            left_index = span_right + 1
        return tokens

In [20]:
from hypothesis import assume

In [21]:
%%run_pytest[clean] -qq

# Without assume keyword
@pytest.mark.xfail
@given(doc_strategy)
def test_white_space_tokenizer_basic_properties1(string):
    tokenizer = WhiteSpaceTokenizer()
    tokens = tokenizer(string)
    
    # For each token, the span should be able to retrieve the original substring
    for token in tokens:
        assert string[token.span[0]:token.span[1]] == token.string

# With assume keyword
@given(doc_strategy)
def test_white_space_tokenizer_basic_properties2(string):
    assume(string)
    tokenizer = WhiteSpaceTokenizer()
    tokens = tokenizer(string)
    
    # For each token, the span should be able to retrieve the original substring
    for token in tokens:
        assert string[token.span[0]:token.span[1]] == token.string

x.                                                                                                                                                                                                                                                                                                       [100%]


# What are properties that all tokenizers should have?

In [22]:
@given(doc_strategy)
def test_some_new_tokenizer_basic_properties(string):
    tokenizer = SomeNewTokenizer()
    tokens = tokenizer(string)
    # Now, what are some properties we might want to test?
    
    # Tokens should be a list of "Token" named tuples
    assert isinstance(tokens, List)
    for token in tokens:
        assert isinstance(token, Token)
        assert isinstance(token.string, str)
        
    # Each token span should be two integers, the second greater than the first
    for token in tokens:
        assert isinstance(token.span[0], int)
        assert isinstance(token.span[1], int)
        assert token.span[1] > token.span[0]
        assert token.span[0] >= 0
        assert token.span[0] < len(string)
        assert token.span[1] > 0
        assert token.span[1] <= len(string)
        
    
    # For each token, the span should be able to retrieve the original substring (maybe?)
    for token in tokens:
        assert string[token.span[0]:token.span[1]] == token.string

## Now we can apply some of these same properties to a new tokenizer object, even if we don't know exactly what its expected output is!

In [23]:
from typing import List

import spacy

class SpacyTokenizer(Tokenizer):
    spacy_model = ""

    def __init__(self) -> None:
        self.nlp = spacy.load(self.spacy_model)

class SpacyEnCoreWebSmTokenizer(SpacyTokenizer):
    name = "spacy_en_core_web_sm_tokenizer"
    spacy_model = "en_core_web_sm"

    def tokenize(self, string: str) -> List[Token]:
        doc = self.nlp(string, disable=["parser", "tagger", "ner"])
        return [Token(token.text, (token.idx, token.idx + len(token))) for token in doc]

In [24]:
from hypothesis import settings

In [25]:
%%run_pytest[clean] -qq

@given(doc_strategy)
@settings(deadline=None, max_examples=5)
def test_spacy_tokenizer_basic_properties(string):
    tokenizer = SpacyEnCoreWebSmTokenizer()
    tokens = tokenizer(string)
    # Now, what are some properties we might want to test?
    
    # Tokens should be a list of "Token" named tuples
    assert isinstance(tokens, List)
    for token in tokens:
        assert isinstance(token, Token)
        assert isinstance(token.string, str)
        
    # Each token span should be two integers, the second greater than the first
    for token in tokens:
        assert isinstance(token.span[0], int)
        assert isinstance(token.span[1], int)
        assert token.span[1] > token.span[0]
        assert token.span[0] >= 0
        assert token.span[0] < len(string)
        assert token.span[1] > 0
        assert token.span[1] <= len(string)
        
    
    # For each token, the span should be able to retrieve the original substring (maybe?)
    for token in tokens:
        assert string[token.span[0]:token.span[1]] == token.string
        

.                                                                                                                                                                                                                                                                                                        [100%]


# A note on fuzzing

There may be a strict definition of "property-based testing" out there somewhere. I don't care. I think fuzzing is property-based testing where you're just testing for the property "this program doesn't crash." Or property-based testing involves some ideas from fuzzing and applies them to unit testing type contexts. Either way they're similar concepts. Although you can test for properties of functions and objects without generating random inputs, too!