# Introduction

The following uses a kind of literate programming approach to build a library of tools useful for writing unit and integration tests directly into a notebook. The library is to be articulated as a Python package built as the concatenation of a subset of the code cells of this notebook, using an ad hoc script. To help with identifying which code cells are parts of the final package and which are inline testing code, we use *tags*, which make up cell metadata in this notebook.

In [1]:
from abc import ABC, abstractmethod
from contextlib import contextmanager, ExitStack
from copy import copy, deepcopy
from inspect import getframeinfo, Traceback, unwrap
from io import TextIOBase
import itertools
from linecache import getline
import sys
from traceback import walk_tb
from typing import ContextManager, Dict, List, Tuple, Iterator, Union, Iterable, Optional, Any, Callable, Mapping

import colors
from pygments import highlight
from pygments.lexers import Python3Lexer
from pygments.formatters import TerminalFormatter

# Test results

In [2]:
class Result(ABC):
    """
    Result of a test. Indicates whether the test passed (was a success), and if it did not,
    whether it was a failure (as opposed to any other kind of issue).
    """
    
    @abstractmethod
    def is_success(self) -> bool:
        """True when an associated test run has passed."""
        raise NotImplementedError()

    def is_failure(self) -> bool:
        """True when an associated has not passed because a designed failure condition was met."""
        return False
    
    def as_dict(self) -> Dict:
        """Expresses this result as a dictionary suitable to structured data serialization."""
        return {"type": type(self).__name__}

## Manual test failure

Tests can be made to fail deliberately by raising a special exception.

In [3]:
class TestFailed(Exception):
    """
    Exception raised by this framework in order to mark a test run as a Failure.
    """
    
    def __init__(self, reason: str) -> None:
        super().__init__(reason)
        self.reason = reason

In [4]:
try:
    raise TestFailed("asdf")
except TestFailed as err:
    assert str(err) == "asdf"

In [5]:
def fail(reason: str = ""):
    "Marks some ongoing test as failed, with an optional reason for failure."
    raise TestFailed(reason)

In [6]:
try:
    fail("asdf")
    assert False
except TestFailed as err:
    assert err.reason == "asdf"

## Test result: success

In [7]:
class Success(Result):
    """
    Result for a test that passed.
    """
    def is_success(self) -> bool:
        return True

In [8]:
assert Success().is_success()

In [9]:
assert Success().as_dict() == {"type": "Success"}

## Test result following the test code raising an exception

### Traceback frames

In [10]:
class Frame:
    """
    Information regarding a frame of a traceback. Provides more than the very limited
    code context that comes from standard library introspection tools.
    """
    
    def __init__(self, tb: Traceback, num_line: int, tags: Optional[List[str]] = None) -> None:
        self.num_line = num_line
        self.name_file = tb.filename
        self.function = tb.function
        self.tags = tags or []
        
    def context(self, before: int = 3, after: int = 3) -> List[Tuple[int, str]]:
        ctx = [(self.num_line, getline(self.name_file, self.num_line).rstrip())]
        for delta in range(1, before + 1):
            ctx.insert(0, (self.num_line - delta, getline(self.name_file, self.num_line - delta).rstrip()))
        for delta in range(1, after + 1):
            ctx.append((self.num_line + delta, getline(self.name_file, self.num_line + delta).rstrip()))

        # Clean up context: remove line-ending blanks and blank lines top and bottom
        # of the context blob.
        while len(ctx) > 0:
            for i in [0, -1]:
                if len(ctx[i][1]) == 0:
                    del ctx[i]
                    break
            else:
                break
                
        return ctx
    
    def as_dict(self, context_before: int = 3, context_after: int = 3) -> Dict:
        return {
            "file": self.name_file,
            "line": self.num_line,
            "function": self.function,
            "context": [[i, line] for i, line in self.context(context_before, context_after)],
            "tags": self.tags
        }
    
    def __str__(self) -> str:
        return f"File {self.name_file}, Line {self.num_line}, Function {self.function}"
    
    def __repr__(self) -> str:
        return str(self)

In [11]:
from inspect import getfile

def my_function():
    getfile("asdf")  # Will raise a TypeError

frames = []
try:
    my_function()
    assert False
except TypeError:
    _, _, tb = sys.exc_info()
    for frame_raw, lineno in walk_tb(tb):
        frame = Frame(getframeinfo(frame_raw), lineno)
        assert frame.name_file == getfile(frame_raw)
        assert frame.num_line == lineno
        assert frame.function == frame_raw.f_code.co_name
        assert frame.tags == []
        frames.append(frame)

assert len(frames) == 3

In [12]:
frame = frames[1]
assert frame.context(0, 0) == [(4, "    getfile(\"asdf\")  # Will raise a TypeError")]
assert frame.context(1, 1) == [(3, "def my_function():"), (4, "    getfile(\"asdf\")  # Will raise a TypeError")]
assert frame.context(3, 3) == [
    (1, "from inspect import getfile"),
    (2, ""),
    (3, "def my_function():"),
    (4, "    getfile(\"asdf\")  # Will raise a TypeError"),
    (5, ""),
    (6, "frames = []"),
    (7, "try:")
]

In [13]:
assert frame.context(45, 0) == list(zip(range(1, 4 + 1), [
    "from inspect import getfile",
    "",
    "def my_function():",
    "    getfile(\"asdf\")  # Will raise a TypeError"
]))

In [14]:
assert frame.context(0, 9000) == list(zip(range(4, 20 + 1), """\
    getfile("asdf")  # Will raise a TypeError

frames = []
try:
    my_function()
    assert False
except TypeError:
    _, _, tb = sys.exc_info()
    for frame_raw, lineno in walk_tb(tb):
        frame = Frame(getframeinfo(frame_raw), lineno)
        assert frame.name_file == getfile(frame_raw)
        assert frame.num_line == lineno
        assert frame.function == frame_raw.f_code.co_name
        assert frame.tags == []
        frames.append(frame)

assert len(frames) == 3\
""".split("\n")))

In [15]:
assert frame.as_dict() == {
    "file": getfile(my_function),
    "line": 4,
    "function": "my_function",
    "tags": [],
    "context": list(list(e) for e in zip(range(1, 7 + 1), """\
from inspect import getfile

def my_function():
    getfile("asdf")  # Will raise a TypeError

frames = []
try:\
""".split("\n")))
}

### The exception-driven result: errors

In [16]:
class Error(Result):
    """
    Non-passing test result due to an exception being raised.
    
    It is passed a set of common functions: the presence of these functions in the
    traceback of the exception are expected and normal, making their eventual
    reporting redundant and sort of trivial. The frames corresponding to these functions
    in the traceback summary kept by this object will be tagged as such.
    """
    TAG_COMMON = "common"
    
    def __init__(self, fns_common: Iterable[Callable]) -> None:
        super().__init__()
        self._type_exc: type
        self._value_exc: Any
        self._type_exc, self._value_exc, tb = sys.exc_info()
        if tb is None:
            raise RuntimeError("Can only instantiate this class when an exception has been raised.")
            
        codes_common = {unwrap(fn).__code__ for fn in fns_common}
        self._traceback: List[Frame] = []
        for frame_raw, num_line in walk_tb(tb):
            tags = []
            if frame_raw.f_code in codes_common:
                tags.append(Error.TAG_COMMON)
            self._traceback.append(Frame(getframeinfo(frame_raw), num_line, tags))
        
    def is_success(self) -> bool:
        return False
    
    @property
    def type_exc(self) -> type:
        """Returns the type of the exception associated to this result."""
        return self._type_exc
    
    @property
    def value_exc(self) -> Any:
        """Returns the exception raised in association to this test result."""
        return self._value_exc
    
    @property
    def traceback(self) -> List[Frame]:
        """
        Returns a summary of the stack trace associated to the exception that brought this test result.
        """
        return self._traceback
    
    def as_dict(self, context_before: int = 3, context_after: int = 3) -> Dict:
        d = super().as_dict()
        d.update(
            {
                "type_exc": self.type_exc.__name__,
                "value_exc": str(self.value_exc),
                "traceback": [frame.as_dict(context_before, context_after) for frame in self.traceback]
            }
        )
        return d

In [17]:
from inspect import getfile

def fn_raise():
    raise RuntimeError()
    
def caller():
    fn_raise()

try:
    caller()
    assert False
except RuntimeError:
    err: Error = Error([caller])
    assert not err.is_success()
    assert not err.is_failure()
    assert err.type_exc == RuntimeError
    assert isinstance(err.value_exc, RuntimeError)
    assert len(err.traceback) == 3
    assert [frame.function for frame in err.traceback] == ["<module>", "caller", "fn_raise"]
    assert [frame.tags for frame in err.traceback] == [[], [Error.TAG_COMMON], []]

In [18]:
# This trick gets us a cell's "file name", given that the `__file__` constant is not defined
# in Jupyter notebooks.
import inspect
def _asdf():
    pass
filename = inspect.getfile(_asdf)

try:
    raise RuntimeError()
    assert False
except RuntimeError:
    assert {
        "type": "Error",
        "type_exc": "RuntimeError",
        "value_exc": "",
        "traceback": [
            {
                "file": filename,
                "line": 9,
                "function": "<module>",
                "tags": [],
                "context": [[9, "    raise RuntimeError()"]]
            }
        ]
    } == Error([]).as_dict(0, 0)

### Deliberate exception: failures

For convenience's sake, we model `Failure`s as a subclass of `Error` to gain the exception breakdown functionality.

In [19]:
class Failure(Error):
    """
    Test result stemming from a condition check that failed, or a test run marked
    as a failure.
    """
    def __init__(self, reason: str, fns_common: Iterable[Callable]):
        super().__init__(fns_common)
        self._reason = reason
        
    @property
    def reason(self) -> str:
        "Reason given by the programmer as to why the test failed."
        return self._reason
    
    def is_failure(self) -> bool:
        return True
    
    def as_dict(self, context_before: int = 3, context_after: int = 3) -> Dict:
        d = super().as_dict(context_before, context_after)
        d["reason"] = self.reason
        return d

In [20]:
try:
    assert False
except:
    err: Failure = Failure("asdf", [])
    assert not err.is_success()
    assert err.is_failure()
    assert err.type_exc == AssertionError
    assert isinstance(err.value_exc, AssertionError)
    assert isinstance(err.traceback, list)

In [21]:
import inspect
def _asdf():
    pass
filename = inspect.getfile(_asdf)

try:
    assert False
except:
    assert {
        "type": "Failure",
        "type_exc": "AssertionError",
        "value_exc": "",
        "traceback": [
            {
                "file": filename,
                "line": 7,
                "function": "<module>",
                "tags": [],
                "context": [[7, "    assert False"]]
            }
        ],
        "reason": "asdf"
    } == Failure("asdf", []).as_dict(0, 0)

# Environment protection

This is an important feature for test isolation: when running some test code, object definitions and redefinitions should be specific to the scope of the test (as if they ran from a function).

In [22]:
@contextmanager
def protect_environment(*names: str) -> ContextManager:
    """
    Isolates the notebook's environment (variables) from redefinition and the definition
    of new symbols during execution of the context. In addition, any variable named in
    parameter is protected from any state change during execution of the context.
    """
    assert get_ipython().ns_table["user_local"] is get_ipython().ns_table["user_global"]

    namespace_orig = copy(globals())
    for name in names:
        if name in namespace_orig:
            namespace_orig[name] = deepcopy(namespace_orig[name])
    
    try:
        yield
    finally:
        G = globals()
        G.clear()
        G.update(namespace_orig)
        for field in ["user_global", "user_local"]:
            get_ipython().ns_table[field] = namespace_orig

In [23]:
mylist = [1, 2, 3, 4, 5]
assert "otherlist" not in get_ipython().ns_table["user_local"]

with protect_environment():
    otherlist = [10, 11, 12]
    assert len(otherlist) == 3
    mylist.pop()
    mylist = [90]
    
assert "otherlist" not in get_ipython().ns_table["user_local"]
assert mylist == [1, 2, 3, 4]

In [24]:
mylist = [1, 2, 3, 4, 5]
assert "otherlist" not in get_ipython().ns_table["user_local"]

with protect_environment("mylist"):
    otherlist = [10, 11, 12]
    assert len(otherlist) == 3
    mylist.pop()
    
assert "otherlist" not in get_ipython().ns_table["user_local"]
assert mylist == [1, 2, 3, 4, 5]

# Test suites

In [25]:
class Subscriber(ABC):
    """
    Object reacting to test results as they are generated by running tests.
    """
    
    @abstractmethod
    def on_result(self, name_test: str, result: Result) -> None:
        raise NotImplementedError()

In [26]:
class Suite:
    """
    Suite of tests, gathering the result of multiple named test runs. Test code fragments
    are named using the `test()` context manager.
    """    
    
    def __init__(self) -> None:
        self._tests: Dict[str, List[Result]] = {}
        self._fns_common = [fail, self.test]
        self._subscribers: List[Subscriber] = []

    @contextmanager
    def test(self, name: str, protect_env: Union[bool, Iterable[str]] = True) -> ContextManager[None]:
        """
        Starts a named testing code fragment. The fragment is run right away, which produces
        a certain test Result that is retained by the Suite instance.
        
        name        - Name of the test
        protect_env - If set to True, any symbol defined or redefined by the code in context
                      of this manager is undone when popping out of the context. This facilitates
                      test isolation. If, instead of True, an iterable sequence of names is passed
                      as value to this parameter, the objects corresponding to these names in the
                      user's namespace are saved by deep copy, thereby protecting these objects
                      from any state change as well. If False is given as parameter value, the
                      user's environment is not isolated from the test code, making any any definition
                      or state change definitive (which is the usual behaviour when computing with
                      notebooks).
        """
        with ExitStack() as stack:
            if protect_env is not False:
                stack.enter_context(
                    protect_environment(*(protect_env if hasattr(protect_env, "__iter__") else []))
                )
            try:
                yield
                result = Success()
            except TestFailed as err:
                result = Failure(err.reason or "Test marked as failed.", self._fns_common)
            except AssertionError as err:
                result = Failure(str(err) or "Assertion failed.", self._fns_common)
            except BaseException:
                result = Error(self._fns_common)
                
        self._tests.setdefault(name, []).append(result)
        for subscriber in self._subscribers:
            subscriber.on_result(name, result)
            
    @property
    def results(self) -> Iterator[Tuple[str, Iterator[Result]]]:
        """
        Iterates through the gathered test results. For each named test, yields a tuple of
        the name of the test and an iterator over each result gathered as the test has
        been run.
        """
        for name, test_results in self._tests.items():
            yield name, iter(test_results)
            
    def as_dict(self) -> Dict[str, List[Dict]]:
        "Provides a structured data representation suitable for data serialization and exportation."
        return {name: [r.as_dict() for r in rez] for name, rez in self.results}
    
    def __or__(self, subscriber: Subscriber) -> "Suite":
        """
        Generates a clone of this suite instance, but with this subscriber subscribed to it.
        
        The new suite will not share member data structures with `self`, but if `self` carries
        test results already, the new suite will reference the same result objects -- we
        assume that Result objects are immutable.
        """
        suite_with_subscriber = Suite()
        suite_with_subscriber._tests = copy(self._tests)  # Under assumption of results immutability.
        suite_with_subscriber._subscribers = copy(self._subscribers)
        suite_with_subscriber._subscribers.append(subscriber)
        return suite_with_subscriber

In [27]:
assert isinstance(Suite()._tests, dict)

In [28]:
with Suite().test("sanity-check") as x:
    assert x is None

In [29]:
suite = Suite()

with suite.test("succeeding"):
    assert True
    
with suite.test("failing-by-assert-terse"):
    assert False
    
with suite.test("failing-by-assert-reason"):
    assert False, "assert reason"
    
with suite.test("failing-manually-terse"):
    fail()
    
with suite.test("failing-manually-reason"):
    fail("fail reason")
    
with suite.test("error"):
    raise RuntimeError("doh")

assert [
    ("succeeding", [(Success, "")]),
    ("failing-by-assert-terse", [(Failure, "Assertion failed.")]),
    ("failing-by-assert-reason", [(Failure, "assert reason")]),
    ("failing-manually-terse", [(Failure, "Test marked as failed.")]),
    ("failing-manually-reason", [(Failure, "fail reason")]),
    ("error", [(Error, "")])
] == [(name, [(type(r), r.reason if hasattr(r, "reason") else "") for r in rez]) for name, rez in suite.results]

In [30]:
# Check each non-success for tagging of the first frame.
num_non_success = 0
for name, rez in suite.results:
    for r in rez:
        if not r.is_success():
            num_non_success += 1
            assert Error.TAG_COMMON in r.traceback[0].tags

assert num_non_success == 5

In [31]:
# Check failures for tagging of the fail call.
num_failures = 0
for name, rez in suite.results:
    if "failing-manually" in name:
        for r in rez:
            num_failures += 1
            assert Error.TAG_COMMON in r.traceback[-1].tags

assert num_failures == 2

In [32]:
suite = Suite()

with suite.test("trial"):
    fail()
    
with suite.test("trial"):
    raise RuntimeError()
    
with suite.test("trial"):
    pass  # Literally!

assert [("trial", [Failure, Error, Success])] == [(name, [type(r) for r in rez]) for name, rez in suite.results]

In [33]:
import inspect
def _asdf():
    pass
filename = inspect.getfile(_asdf)


suite = Suite()

with suite.test("first"):
    fail()

with suite.test("first"):
    pass

with suite.test("second"):
    raise RuntimeError()

assert {name: [r["type"] for r in rez] for name, rez in suite.as_dict().items()} == {
    "first": ["Failure", "Success"],
    "second": ["Error"]
}

## Testing environment protection during test execution

In [34]:
assert "x" not in globals()

suite = Suite()
with suite.test("trial"):
    x = 5
    assert x == 5
    
assert "x" not in globals()
assert [("trial", [Success])] == [(name, [type(r) for r in rez]) for name, rez in suite.results]

In [35]:
assert "x" not in globals()

suite = Suite()
with suite.test("trial", protect_env=[]):  # Test this as [] has False boolean value.
    x = 5
    assert x == 5
    
assert "x" not in globals()
assert [("trial", [Success])] == [(name, [type(r) for r in rez]) for name, rez in suite.results]

In [36]:
assert "x" not in globals()
mylist = [1, 2, 3]

suite = Suite()
with suite.test("trial", protect_env=["mylist"]):
    x = 5
    assert x == 5
    mylist.append(4)
    
assert "x" not in globals()
assert [("trial", [Success])] == [(name, [type(r) for r in rez]) for name, rez in suite.results]
assert [1, 2, 3] == mylist

## Testing publish/subscribe of results

In [37]:
class TestSubscriber(Subscriber):
    
    def __init__(self, lr: List[Tuple[str, Result]]):
        self._results: List[Tuple[str, Result]] = lr
            
    def on_result(self, name_test: str, result: Result) -> None:
        self._results.append((name_test, result))


results: List[Tuple[str, Result]] = []
suite = Suite() | TestSubscriber(results)

with suite.test("passing"):
    pass
assert len(results) == 1
name_last, result_last = results[-1]
assert name_last == "passing"
assert isinstance(result_last, Success)

with suite.test("error"):
    raise RuntimeError("asdf")
assert len(results) == 2
name_last, result_last = results[-1]
assert name_last == "error"
assert isinstance(result_last, Error)
assert str(result_last.value_exc) == "asdf"

with suite.test("failing"):
    fail("There is no why")
assert len(results) == 3
name_last, result_last = results[-1]
assert name_last == "failing"
assert isinstance(result_last, Failure)
assert result_last.reason == "There is no why"

# Generating test run reports

## Report colorizers

In [38]:
class Emphasis(ABC):

    @abstractmethod
    def __call__(self, s: str) -> str:
        raise NotImplementedError()

In [39]:
class Color(Emphasis):
    
    def __init__(self, fg=None, bg=None, style=None) -> None:
        super().__init__()
        self._fg = fg
        self._bg = bg
        self._style = style
        
    def __call__(self, s: str) -> str:
        return colors.color(s, fg=self._fg, bg=self._bg, style=self._style)

In [40]:
assert Color(fg="red", bg="blue", style="bold")("asdf") == '\x1b[31;44;1masdf\x1b[0m'

In [41]:
class Plain(Emphasis):
    
    def __call__(self, s: str) -> str:
        return s

In [42]:
assert Plain()("asdf") == "asdf"

In [43]:
class Colorizer:
    
    def __init__(self, important: Emphasis, trivial: Emphasis, failure: Emphasis, error: Emphasis) -> None:
        self.important: Emphasis = important
        self.trivial: Emphasis = trivial
        self.failure: Emphasis = failure
        self.error: Emphasis = error

In [44]:
def plain() -> Colorizer:
    return Colorizer(Plain(), Plain(), Plain(), Plain())

In [45]:
c = plain()
assert c.important("asdf") == "asdf"
assert c.trivial("asdf") == "asdf"
assert c.failure("asdf") == "asdf"
assert c.error("asdf") == "asdf"

In [46]:
def color(
    important: Optional[Emphasis] = None,
    trivial: Optional[Emphasis] = None,
    failure: Optional[Emphasis] = None,
    error: Optional[Emphasis] = None
) -> Colorizer:
    return Colorizer(
        important or Color(style="bold"),
        trivial or Color(fg="white"),
        failure or Color(fg="yellow"),
        error or Color(fg="red")
    )

In [47]:
c = color(important=Color(fg="white"), trivial=Color(fg="black"), failure=Color(bg="blue"), error=Color(fg="green"))
assert c.important("asdf") == Color(fg="white")("asdf")
assert c.trivial("asdf") == Color(fg="black")("asdf")
assert c.failure("asdf") == Color(bg="blue")("asdf")
assert c.error("asdf") == Color(fg="green")("asdf")

## Exhaustive report

In [48]:
TestNameFormatter = Callable[[str, int], str]

def name_all(name_test: str, num_result: int) -> str:
    """
    Test name formatter that puts out the name of a test even when it has
    been run multiple times.
    """
    return name_test

def ladder(name_test: str, num_result: int) -> str:
    """
    Test name formatter that puts out the name of a test only once, even
    if it has been run multiple times.
    """
    if num_result == 0:
        return name_test
    return " " * len(name_test)

In [49]:
def quoter(formatter: TestNameFormatter) -> TestNameFormatter:
    """
    Test name formatter that surrounds the name between double quotes. Not
    meant to be used directly by users of this module.
    """
    def quoter_format(name_test: str, num_result: int) -> str:
        return f"\"{formatter(name_test, num_result)}\""
    
    return quoter_format

In [50]:
def report_results(
    suite: Suite,
    file: TextIOBase = sys.stdout,
    colorizer: Colorizer = color(),
    format_name_test: TestNameFormatter = ladder,
    sep_name_result: str = "\t",
    quote_names: bool = False,
    labels_result_custom: Mapping[type, str] = {}
) -> None:
    """
    Reports the name and result for each attempt at running a test, without details
    as to issues encountered (failures and errors).
    
    suite
        Suite of test to write report from.
    file
        File-like object to write report to. Default is standard output.
    colorizer
        Policy for emphasizing the written report.
    format_name_test
        Some tests are run more than once (for instance, for iterative problem solving).
        In a report written for human reading, the repeated naming of a test run more
        than once can feel redundant; it is eliminated by setting this to `ladder`.
        All tests will be named if the formatter used instead is `name_all`.
    sep_name_result
        Separating character used between test name and result label. Default is "\t".
    quote_names
        If True, the test names will be surrounded with double quotes in the output.
    labels_result_custom
        Dictionary of labels to use with different result types, when the default
        labels (*ok* for success, *failed* for failure, *ERROR* for error) should be
        changed. The emphasis for each label is derived from the colorizer.
    """
    len_all_names = [len(name) for name, _ in suite.results]
    if len(len_all_names) == 0:
        return
    len_name_largest = max(len_all_names)

    labels_result = {
        type_result: colorize(labels_result_custom.get(type_result, label_default))
        for type_result, colorize, label_default in [
            (Success, Plain(), "ok"),
            (Failure, colorizer.failure, "failed"),
            (Error, colorizer.error, "ERROR")
        ]
    }
    
    if quote_names:
        format_name_test = quoter(format_name_test)
    
    for name, rez in suite.results:
        p_name = f"{name:{len_name_largest}s}"
        for num, r in enumerate(rez):
            print(format_name_test(p_name, num), labels_result[type(r)], sep=sep_name_result, file=file)

In [51]:
suite = Suite()

with suite.test("first"):
    raise RuntimeError()
with suite.test("first"):
    pass
with suite.test("second"):
    fail()
with suite.test("third"):
    pass
with suite.test("fourth"):
    assert False
with suite.test("fourth"):
    pass

Visual check: tests `first` and `fourth` are run twice; `second` and `third` only once. The name of each test is written only once. The results are either `ok`, `failed` or `ERROR` and are emphasized as normal for success, yellow for failure and red for error. The results are also lined up cleanly into a second column.

In [52]:
report_results(suite)

first 	[31mERROR[0m
      	ok
second	[33mfailed[0m
third 	ok
fourth	[33mfailed[0m
      	ok


Visual check: like previous, except that the test names are written on each line a result is reported.

In [53]:
report_results(suite, format_name_test=name_all)

first 	[31mERROR[0m
first 	ok
second	[33mfailed[0m
third 	ok
fourth	[33mfailed[0m
fourth	ok


Visual check: like previous, but CSV-like, with test names quoted.

In [54]:
report_results(suite, format_name_test=name_all, quote_names=True, sep_name_result=",", colorizer=plain())

"first ",ERROR
"first ",ok
"second",failed
"third ",ok
"fourth",failed
"fourth",ok


In [55]:
suite_all_passed = Suite()

with suite_all_passed.test("first"):
    pass
with suite_all_passed.test("second"):
    pass

Visual check: two results, both `ok`.

In [56]:
report_results(suite_all_passed)

first 	ok
second	ok


In [57]:
suite_empty = Suite()

Visual check: nothing written.

In [58]:
report_results(suite_empty)

## Test result summary

In [59]:
def summarize_results(
    suite: Suite,
    file: Optional[TextIOBase] = sys.stdout,
    colorizer: Colorizer = color(),
    sep: str = ", "
) -> Dict[type, int]:
    """
    Writes a very short summary of a test run, counting the number of each result obtained.

    suite
        Suite of test to write report from.
    file
        File-like object to write report to. Default is standard output.
    colorizer
        Policy for emphasizing the written report.
    sep
        Separation string between the labeled numbers of results. Default is ", "
    """
    summary = {t: 0 for t in [Success, Failure, Error]}
    for _, rez in suite.results:
        for r in rez:
            summary[type(r)] += 1

    if file is not None:
        print(
            f"{summary[Success]} passed",
            (colorizer.failure if summary[Failure] > 0 else colorizer.trivial)(f"{summary[Failure]} failed"),
            (colorizer.error if summary[Error] > 0 else colorizer.trivial)(f"{summary[Error]} raised an error"),
            file=file,
            sep=sep
        )

    return summary

Visual check: should show 3 tests passed, 2 failures (in yellow), 1 error (in red).

In [60]:
summarize_results(suite)

3 passed, [33m2 failed[0m, [31m1 raised an error[0m


{__main__.Success: 3, __main__.Failure: 2, __main__.Error: 1}

Check visually that nothing is written.

In [61]:
assert summarize_results(suite, file=None) == {
    Success: 3,
    Failure: 2,
    Error: 1
}

In [62]:
suite_all_passed = Suite()

with suite_all_passed.test("first"):
    pass
with suite_all_passed.test("second"):
    pass

Check visually that report is fine. Should show 2 tests passed, and failed and errors labeled in a subdued color.

In [63]:
summarize_results(suite_all_passed)

2 passed, [37m0 failed[0m, [37m0 raised an error[0m


{__main__.Success: 2, __main__.Failure: 0, __main__.Error: 0}

Check visually that report is fine. Should show 0 for each type of test.

In [64]:
summarize_results(suite_empty)

0 passed, [37m0 failed[0m, [37m0 raised an error[0m


{__main__.Success: 0, __main__.Failure: 0, __main__.Error: 0}

## Detailed report of issues encountered

### Printing one frame for a result's associated traceback

In [65]:
def print_frame(
    frame: Frame,
    file: TextIOBase = sys.stdout,
    colorizer: Colorizer = color(),
    lines_context: int = 3
) -> None:
    """
    Writes up a single stack frame report.

    frame
        Stack frame to report on.
    file
        File-like object to write report to. Default is standard output.
    colorizer
        Policy for emphasizing the written report.
    lines_context
        Number of lines of code to fetch and write up before and after the
        line associated to the stack frame.
    """
    header = (
        colorizer.trivial
        if Error.TAG_COMMON in frame.tags
        else Plain()
    )(" | ".join([f"File {frame.name_file}", f"Line {frame.num_line}", f"Function {frame.function}"]))
    print(header, file=file)
    if Error.TAG_COMMON not in frame.tags:
        context: List[Tuple[int, str]] = frame.context(before=lines_context, after=lines_context)
        if len(context) > 0:
            max_len_num_line = len(str(context[-1][0]))
            for i, line in zip(
                [i for i, _ in context],
                highlight(
                    "\n".join(ln for _, ln in context),
                    lexer=Python3Lexer(),
                    formatter=TerminalFormatter()
                ).split("\n")
            ):
                print(
                    colorizer.trivial(f"{i:{max_len_num_line}d}"),
                    colorizer.trivial("|"),
                    line,
                    sep=" ",
                    file=file
                )
    print(file=file)

Visual check: show show 3 frame reports. First and third must have appropriate code context (check against the line number), three lines of context around target line max. Second frame report should have subdued color and no code context.

In [66]:
def raiser():
    raise RuntimeError()
    
def caller():
    raiser()
    
try:
    caller()
    assert False
except RuntimeError:
    _, _, tb = sys.exc_info()
    for frame, lineno in walk_tb(tb):
        print_frame(
            Frame(
                getframeinfo(frame), 
                lineno,
                [Error.TAG_COMMON] if frame.f_code is caller.__code__ else [])
        )

File <ipython-input-66-338c4a2e17e5> | Line 8 | Function <module>
[37m 5[0m [37m|[0m     raiser()
[37m 6[0m [37m|[0m 
[37m 7[0m [37m|[0m [34mtry[39;49;00m:
[37m 8[0m [37m|[0m     caller()
[37m 9[0m [37m|[0m     [34massert[39;49;00m [34mFalse[39;49;00m
[37m10[0m [37m|[0m [34mexcept[39;49;00m [36mRuntimeError[39;49;00m:
[37m11[0m [37m|[0m     _, _, tb = sys.exc_info()

[37mFile <ipython-input-66-338c4a2e17e5> | Line 5 | Function caller[0m

File <ipython-input-66-338c4a2e17e5> | Line 2 | Function raiser
[37m1[0m [37m|[0m [34mdef[39;49;00m [32mraiser[39;49;00m():
[37m2[0m [37m|[0m     [34mraise[39;49;00m [36mRuntimeError[39;49;00m()
[37m3[0m [37m|[0m 
[37m4[0m [37m|[0m [34mdef[39;49;00m [32mcaller[39;49;00m():
[37m5[0m [37m|[0m     raiser()



### Detailing one result

In [67]:
def detail_result(
    name_test: str,
    result: Error,
    prefix_header: str,
    file: TextIOBase = sys.stdout,
    colorizer: Colorizer = color(),
    lines_context: int = 3
) -> None:
    """
    Writes up a report regarding a single test result.
    
    name_test
        Name of the test the result was gotten for.
    result
        Error-type result to report on.
    prefix_header
        String prepended to the header of the result report.
    file
        File-like object to write report to. Default is standard output.
    colorizer
        Policy for emphasizing the written report.
    lines_context
        Number of lines of code to fetch and write up before and after the
        line associated to the stack frame.
    """
    header = " ** ".join([
        prefix_header,
        f"Test {colorizer.important(name_test)}", 
        {Failure: colorizer.failure, Error: colorizer.error}[type(result)](type(result).__name__)
    ])
    print("-" * len(colors.strip_color(header)), file=file)
    print(header, file=file)
    if result.is_failure():
        print(result.reason, file=file)
    else:
        print(f"{result.type_exc.__name__}:", str(result.value_exc) or "<no detail provided>", file=file)
    print(file=file)
    
    for frame in result.traceback:  # First frame is always Suite.test, which is irrelevant.
        print_frame(frame, file=file, colorizer=colorizer, lines_context=lines_context)

Visual check: header should start with first word **HEY**, label the test as **second** and map it as a *Failure*. Its frame report shows two subdued frames without code context, sandwiching a frame showing the call to the `fail()` function (line 8) that tripped the failure.

In [68]:
name, rez = list(suite.results)[1]
failure = list(rez)[0]
assert isinstance(failure, Failure)
detail_result(name, failure, "HEY")

-----------------------------
HEY ** Test [1msecond[0m ** [33mFailure[0m
Test marked as failed.

[37mFile <ipython-input-26-39c764bb9450> | Line 35 | Function test[0m

File <ipython-input-51-11f8f96b7c56> | Line 8 | Function <module>
[37m 5[0m [37m|[0m [34mwith[39;49;00m suite.test([33m"[39;49;00m[33mfirst[39;49;00m[33m"[39;49;00m):
[37m 6[0m [37m|[0m     [34mpass[39;49;00m
[37m 7[0m [37m|[0m [34mwith[39;49;00m suite.test([33m"[39;49;00m[33msecond[39;49;00m[33m"[39;49;00m):
[37m 8[0m [37m|[0m     fail()
[37m 9[0m [37m|[0m [34mwith[39;49;00m suite.test([33m"[39;49;00m[33mthird[39;49;00m[33m"[39;49;00m):
[37m10[0m [37m|[0m     [34mpass[39;49;00m
[37m11[0m [37m|[0m [34mwith[39;49;00m suite.test([33m"[39;49;00m[33mfourth[39;49;00m[33m"[39;49;00m):

[37mFile <ipython-input-5-f0d8945e4a67> | Line 3 | Function fail[0m



### All results

In [69]:
def detail_errors(
    suite: Suite,
    file: TextIOBase = sys.stdout,
    colorizer: Colorizer = color(),
    lines_context: int = 3,
    max_report: int = sys.maxsize
) -> None:
    """
    Writes up a report detailing the issues encountered while running the test suite.
    
    suite
        The test suite.
    file
        The file-like object to write the report to. Default is standard output.
    colorizer
        Color scheme used for emphasizing the various bits of the report.
    lines_context
        Number of lines of context to provide around each line of code involved
        in a reported problem.
    max_report
        Maximum number of problems to report on.
    """
    summary = summarize_results(suite, file=None)
    num_problems = summary[Failure] + summary[Error]
    if num_problems == 0:
        if summary[Success] == 0:
            print("No test run.", file=file)
        else:
            print(f"All {summary[Success]} tests passed. No failure nor error encountered.", file=file)
    else:
        index = 1
        for name, rez in suite.results:
            for r in rez:
                if not r.is_success():
                    detail_result(name, r, f"# {index}/{num_problems}", lines_context=lines_context, file=file)
                    print()

                    if index >= max_report:
                        num_remaining = num_problems - index
                        print(colorizer.important(f"... plus {num_remaining} other issue{'s' if num_remaining > 1 else ''}."))
                        return
                    index += 1

Visual check: 3 problem reports expected. First one is for an undetailed error (`RuntimeError`). Second and third are for failures. The first frame report for each (...` | Function test`) is in a subdued color, as is the frame for the call to function `fail`.

In [70]:
detail_errors(suite)

----------------------------
# 1/3 ** Test [1mfirst[0m ** [31mError[0m
RuntimeError: <no detail provided>

[37mFile <ipython-input-26-39c764bb9450> | Line 35 | Function test[0m

File <ipython-input-51-11f8f96b7c56> | Line 4 | Function <module>
[37m1[0m [37m|[0m suite = Suite()
[37m2[0m [37m|[0m 
[37m3[0m [37m|[0m [34mwith[39;49;00m suite.test([33m"[39;49;00m[33mfirst[39;49;00m[33m"[39;49;00m):
[37m4[0m [37m|[0m     [34mraise[39;49;00m [36mRuntimeError[39;49;00m()
[37m5[0m [37m|[0m [34mwith[39;49;00m suite.test([33m"[39;49;00m[33mfirst[39;49;00m[33m"[39;49;00m):
[37m6[0m [37m|[0m     [34mpass[39;49;00m
[37m7[0m [37m|[0m [34mwith[39;49;00m suite.test([33m"[39;49;00m[33msecond[39;49;00m[33m"[39;49;00m):


-------------------------------
# 2/3 ** Test [1msecond[0m ** [33mFailure[0m
Test marked as failed.

[37mFile <ipython-input-26-39c764bb9450> | Line 35 | Function test[0m

File <ipython-input-51-11f8f96b7c56> | Line 8 

Visual check: show only the report for the error, and mention that two more problems have yet to be reported. Also, code context is only one line around the target line, making for a 3-line code blurb.

In [71]:
detail_errors(suite, max_report=1, lines_context=1)

----------------------------
# 1/3 ** Test [1mfirst[0m ** [31mError[0m
RuntimeError: <no detail provided>

[37mFile <ipython-input-26-39c764bb9450> | Line 35 | Function test[0m

File <ipython-input-51-11f8f96b7c56> | Line 4 | Function <module>
[37m3[0m [37m|[0m [34mwith[39;49;00m suite.test([33m"[39;49;00m[33mfirst[39;49;00m[33m"[39;49;00m):
[37m4[0m [37m|[0m     [34mraise[39;49;00m [36mRuntimeError[39;49;00m()
[37m5[0m [37m|[0m [34mwith[39;49;00m suite.test([33m"[39;49;00m[33mfirst[39;49;00m[33m"[39;49;00m):


[1m... plus 2 other issues.[0m


Visual check: now show the error and the first failure, with 0 code context (so only the target line is shown). The final message about remaining issues is singular.

In [72]:
detail_errors(suite, max_report=2, lines_context=0)

----------------------------
# 1/3 ** Test [1mfirst[0m ** [31mError[0m
RuntimeError: <no detail provided>

[37mFile <ipython-input-26-39c764bb9450> | Line 35 | Function test[0m

File <ipython-input-51-11f8f96b7c56> | Line 4 | Function <module>
[37m4[0m [37m|[0m     [34mraise[39;49;00m [36mRuntimeError[39;49;00m()


-------------------------------
# 2/3 ** Test [1msecond[0m ** [33mFailure[0m
Test marked as failed.

[37mFile <ipython-input-26-39c764bb9450> | Line 35 | Function test[0m

File <ipython-input-51-11f8f96b7c56> | Line 8 | Function <module>
[37m8[0m [37m|[0m     fail()

[37mFile <ipython-input-5-f0d8945e4a67> | Line 3 | Function fail[0m


[1m... plus 1 other issue.[0m


Visual check: two tests passed, no failure nor error.

In [73]:
detail_errors(suite_all_passed)

All 2 tests passed. No failure nor error encountered.


Visual check: no test has been run.

In [74]:
detail_errors(suite_empty)

No test run.


# On-the-fly result reporting

In [75]:
class Report(Subscriber):
    """
    Test suite subscriber that reports on test results on-the-fly. When a test
    does not succeed, details on the failure can optionally be provided.
    
    file
        File-like object where the test results are reported. Default is
        standard output.
    in_detail
        
    """
    def __init__(
        self,
        verbose: bool = True,
        file: TextIOBase = sys.stdout,
        colorizer: Colorizer = color(),
        lines_context: int = 3
    ) -> None:
        super().__init__()
        self._last: Optional[bool] = None
        self._file = file
        self._verbose = verbose
        self._colorizer = colorizer
        self._lines_context = lines_context
        
    def on_result(self, name_test: str, result: Result) -> None:
        name_test = self._colorizer.important(name_test)
        if result.is_success():
            msg = f"Test {name_test} passed."
            if self._verbose and self._last is False:
                print("-" * len(colors.strip_color(msg)), file=self._file)
            self._last = True
            print(msg, file=self._file)
        else:
            if self._last is True:
                print(file=self._file)
            self._last = False
            if self._verbose:
                detail_result(
                    name_test,
                    result,
                    "Issue encountered",
                    self._file,
                    self._colorizer,
                    self._lines_context
                )
            else:
                index_frame_relevant = -1
                if result.is_failure():
                    label = self._colorizer.failure(f"Test {name_test} failed")
                    print(f"{label}: {result.reason}", file=self._file)
                    if isinstance(result.type_exc, TestFailed):
                        index_frame_relevant = -2
                else:
                    label = self._colorizer.error(f"Error occured during test {name_test}")
                    value_exc = ""
                    if str(result.value_exc):
                        value_exc = f" -- {str(result.value_exc)}"
                    print(f"{label}: {result.type_exc.__name__}{value_exc}", file=self._file)
                frame_relevant = result.traceback[index_frame_relevant]
                print_frame(
                    frame_relevant,
                    file=self._file,
                    colorizer=self._colorizer,
                    lines_context=self._lines_context
                )

In [76]:
def exercise_suite(reporter):
    suite = Suite() | reporter
    with suite.test("first-passing"):
        pass
    with suite.test("second-passing"):
        pass
    with suite.test("failing"):
        assert 1 == 0, "one vs. zero"
    with suite.test("error-raising"):
        raise RuntimeError("Some error")
    with suite.test("next-to-last"):
        pass
    with suite.test("last"):
        pass

Visual check: first and second tests pass, third test fails, fourth test raises an error, last two tests pass. Failure and error are tersely described, only the most relevant stack frame is provided.

In [77]:
exercise_suite(Report(verbose=False))

Test [1mfirst-passing[0m passed.
Test [1msecond-passing[0m passed.

[33mTest [1mfailing[0m failed[0m: one vs. zero
File <ipython-input-76-0b9ff122e3c7> | Line 8 | Function exercise_suite
[37m 5[0m [37m|[0m     [34mwith[39;49;00m suite.test([33m"[39;49;00m[33msecond-passing[39;49;00m[33m"[39;49;00m):
[37m 6[0m [37m|[0m         [34mpass[39;49;00m
[37m 7[0m [37m|[0m     [34mwith[39;49;00m suite.test([33m"[39;49;00m[33mfailing[39;49;00m[33m"[39;49;00m):
[37m 8[0m [37m|[0m         [34massert[39;49;00m [34m1[39;49;00m == [34m0[39;49;00m, [33m"[39;49;00m[33mone vs. zero[39;49;00m[33m"[39;49;00m
[37m 9[0m [37m|[0m     [34mwith[39;49;00m suite.test([33m"[39;49;00m[33merror-raising[39;49;00m[33m"[39;49;00m):
[37m10[0m [37m|[0m         [34mraise[39;49;00m [36mRuntimeError[39;49;00m([33m"[39;49;00m[33mSome error[39;49;00m[33m"[39;49;00m)
[37m11[0m [37m|[0m     [34mwith[39;49;00m suite.test([33m"[39;49;00m[33mn

Visual check: like previous, but with full detail of each non-success.

In [78]:
exercise_suite(Report())

Test [1mfirst-passing[0m passed.
Test [1msecond-passing[0m passed.

--------------------------------------------
Issue encountered ** Test [1m[1mfailing[0m[0m ** [33mFailure[0m
one vs. zero

[37mFile <ipython-input-26-39c764bb9450> | Line 35 | Function test[0m

File <ipython-input-76-0b9ff122e3c7> | Line 8 | Function exercise_suite
[37m 5[0m [37m|[0m     [34mwith[39;49;00m suite.test([33m"[39;49;00m[33msecond-passing[39;49;00m[33m"[39;49;00m):
[37m 6[0m [37m|[0m         [34mpass[39;49;00m
[37m 7[0m [37m|[0m     [34mwith[39;49;00m suite.test([33m"[39;49;00m[33mfailing[39;49;00m[33m"[39;49;00m):
[37m 8[0m [37m|[0m         [34massert[39;49;00m [34m1[39;49;00m == [34m0[39;49;00m, [33m"[39;49;00m[33mone vs. zero[39;49;00m[33m"[39;49;00m
[37m 9[0m [37m|[0m     [34mwith[39;49;00m suite.test([33m"[39;49;00m[33merror-raising[39;49;00m[33m"[39;49;00m):
[37m10[0m [37m|[0m         [34mraise[39;49;00m [36mRuntimeError[39;

# Exportation of testing tools to `jupytest.py` module

In [79]:
import nbformat

In [80]:
with open("jupytest.ipynb", "r", encoding="utf-8") as file_notebook:
    nb = nbformat.read(file_notebook, nbformat.NO_CONVERT)

In [81]:
with open("jupytest.py", "w", encoding="utf-8") as file_module:
    print(
        "\n\n".join(
            cell.source
            for cell in nb.cells
            if cell.cell_type == 'code' and "module" in cell.metadata.get("tags", [])
        ),
        file=file_module,
        end=""
    )

In [82]:
import importlib

# Force the reload of the new jupytest module. If running from a fresh
# kernel, the reload is spurious but innocuous.
import jupytest
importlib.reload(jupytest)

<module 'jupytest' from '/home/hamelin/jupytest/jupytest.py'>

Check that the exported module has all the tools we defined.

In [83]:
for construct in [
    "Result",
    "Success",
    "Frame",
    "Error",
    "Failure",
    "TestFailed",
    "fail",
    "protect_environment",
    "Subscriber",
    "Suite",
    "Emphasis",
    "Color",
    "Plain",
    "Colorizer",
    "color",
    "plain",
    "TestNameFormatter",
    "ladder",
    "name_all",
    "quoter",
    "report_results",
    "summarize_results",
    "print_frame",
    "detail_result",
    "detail_errors",
    "Report"
]:
    getattr(jupytest, construct)