# Introduction

The following uses a kind of literate programming approach to build a library of tools useful for writing unit and integration tests directly into a notebook. The library is to be articulated as a Python package built as the concatenation of a subset of the code cells of this notebook, using an ad hoc script. To help with identifying which code cells are parts of the final package and which are inline testing code, we use *tags*, which make up cell metadata in this notebook.

In [1]:
%load_ext pycodestyle_magic
%flake8_on --max_line_length 120

In [2]:
from abc import ABC, abstractmethod
from copy import copy, deepcopy
from inspect import getframeinfo, Traceback, unwrap
from io import TextIOBase
import itertools
from linecache import getline
import sys
from traceback import walk_tb
from typing import Dict, List, Tuple, Iterator, Union, Iterable, Optional, Any, Callable, Mapping, Sequence

import colors
from IPython import get_ipython
from IPython.core.magic import register_cell_magic
from pygments import highlight
from pygments.lexers import Python3Lexer
from pygments.formatters import TerminalFormatter

# Test results

In [3]:
class Result(ABC):
    """
    Result of a test. Indicates whether the test passed (was a success), and if it did not,
    whether it was a failure (as opposed to any other kind of issue).
    """

    @abstractmethod
    def is_success(self) -> bool:
        """True when an associated test run has passed."""
        raise NotImplementedError()

    def is_failure(self) -> bool:
        """True when an associated has not passed because a designed failure condition was met."""
        return False

    def as_dict(self) -> Dict:
        """Expresses this result as a dictionary suitable to structured data serialization."""
        return {"type": type(self).__name__}

## Manual test failure

Tests can be made to fail deliberately by raising a special exception.

In [4]:
class TestFailed(Exception):
    """
    Exception raised by this framework in order to mark a test run as a Failure.
    """

    def __init__(self, reason: str) -> None:
        super().__init__(reason)
        self.reason = reason

In [5]:
try:
    raise TestFailed("asdf")
except TestFailed as err:
    assert str(err) == "asdf"

In [6]:
def fail(reason: str = ""):
    "Marks some ongoing test as failed, with an optional reason for failure."
    raise TestFailed(reason)

In [7]:
try:
    fail("asdf")
    assert False
except TestFailed as err:
    assert err.reason == "asdf"

## Test result: success

In [8]:
class Success(Result):
    """
    Result for a test that passed.
    """
    def is_success(self) -> bool:
        return True

In [9]:
assert Success().is_success()

In [10]:
assert Success().as_dict() == {"type": "Success"}

## Test result following the test code raising an exception

### Traceback frames

In [11]:
class Frame:
    """
    Information regarding a frame of a traceback. Provides more than the very limited
    code context that comes from standard library introspection tools.
    """

    def __init__(self, tb: Traceback, num_line: int, tags: Optional[List[str]] = None) -> None:
        self.num_line = num_line
        self.name_file = tb.filename
        self.function = tb.function
        self.tags = tags or []

    def context(self, before: int = 3, after: int = 3) -> List[Tuple[int, str]]:
        ctx = [(self.num_line, getline(self.name_file, self.num_line).rstrip())]
        for delta in range(1, before + 1):
            ctx.insert(0, (self.num_line - delta, getline(self.name_file, self.num_line - delta).rstrip()))
        for delta in range(1, after + 1):
            ctx.append((self.num_line + delta, getline(self.name_file, self.num_line + delta).rstrip()))

        # Clean up context: remove line-ending blanks and blank lines top and bottom
        # of the context blob.
        while len(ctx) > 0:
            for i in [0, -1]:
                if len(ctx[i][1]) == 0:
                    del ctx[i]
                    break
            else:
                break

        return ctx

    def as_dict(self, context_before: int = 3, context_after: int = 3) -> Dict:
        return {
            "file": self.name_file,
            "line": self.num_line,
            "function": self.function,
            "context": [[i, line] for i, line in self.context(context_before, context_after)],
            "tags": self.tags
        }

    def __str__(self) -> str:
        return f"File {self.name_file}, Line {self.num_line}, Function {self.function}"

    def __repr__(self) -> str:
        return str(self)

In [12]:
from inspect import getfile

def my_function():  # noqa
    getfile("asdf")  # Will raise a TypeError

frames = []  # noqa
try:
    my_function()
    assert False
except TypeError:
    _, _, tb = sys.exc_info()
    for frame_raw, lineno in walk_tb(tb):
        frame = Frame(getframeinfo(frame_raw), lineno)
        assert frame.name_file == getfile(frame_raw)
        assert frame.num_line == lineno
        assert frame.function == frame_raw.f_code.co_name
        assert frame.tags == []
        frames.append(frame)

assert len(frames) == 3

In [13]:
frame = frames[1]
assert frame.context(0, 0) == [(4, "    getfile(\"asdf\")  # Will raise a TypeError")]
assert frame.context(1, 1) == [(3, "def my_function():  # noqa"), (4, "    getfile(\"asdf\")  # Will raise a TypeError")]
assert frame.context(3, 3) == [
    (1, "from inspect import getfile"),
    (2, ""),
    (3, "def my_function():  # noqa"),
    (4, "    getfile(\"asdf\")  # Will raise a TypeError"),
    (5, ""),
    (6, "frames = []  # noqa"),
    (7, "try:")
]

In [14]:
assert frame.context(45, 0) == list(zip(range(1, 4 + 1), [
    "from inspect import getfile",
    "",
    "def my_function():  # noqa",
    "    getfile(\"asdf\")  # Will raise a TypeError"
]))

In [15]:
assert frame.context(0, 9000) == list(zip(range(4, 20 + 1), """\
    getfile("asdf")  # Will raise a TypeError

frames = []  # noqa
try:
    my_function()
    assert False
except TypeError:
    _, _, tb = sys.exc_info()
    for frame_raw, lineno in walk_tb(tb):
        frame = Frame(getframeinfo(frame_raw), lineno)
        assert frame.name_file == getfile(frame_raw)
        assert frame.num_line == lineno
        assert frame.function == frame_raw.f_code.co_name
        assert frame.tags == []
        frames.append(frame)

assert len(frames) == 3\
""".split("\n")))

In [16]:
assert frame.as_dict() == {
    "file": getfile(my_function),
    "line": 4,
    "function": "my_function",
    "tags": [],
    "context": list(list(e) for e in zip(range(1, 7 + 1), """\
from inspect import getfile

def my_function():  # noqa
    getfile("asdf")  # Will raise a TypeError

frames = []  # noqa
try:\
""".split("\n")))
}

### The exception-driven result: errors

In [17]:
class Error(Result):
    """
    Non-passing test result due to an exception being raised.

    It is passed a set of common functions: the presence of these functions in the
    traceback of the exception are expected and normal, making their eventual
    reporting redundant and sort of trivial. The frames corresponding to these functions
    in the traceback summary kept by this object will be tagged as such.
    """
    TAG_COMMON = "common"

    def __init__(self, fns_common: Iterable[Callable]) -> None:
        super().__init__()
        self._type_exc: type
        self._value_exc: Any
        self._type_exc, self._value_exc, tb = sys.exc_info()
        if tb is None:
            raise RuntimeError("Can only instantiate this class when an exception has been raised.")

        codes_common = {unwrap(fn).__code__ for fn in fns_common}
        self._traceback: List[Frame] = []
        for frame_raw, num_line in walk_tb(tb):
            tags = []
            if frame_raw.f_code in codes_common:
                tags.append(Error.TAG_COMMON)
            self._traceback.append(Frame(getframeinfo(frame_raw), num_line, tags))

    def is_success(self) -> bool:
        return False

    @property
    def type_exc(self) -> type:
        """Returns the type of the exception associated to this result."""
        return self._type_exc

    @property
    def value_exc(self) -> Any:
        """Returns the exception raised in association to this test result."""
        return self._value_exc

    @property
    def traceback(self) -> List[Frame]:
        """
        Returns a summary of the stack trace associated to the exception that brought this test result.
        """
        return self._traceback

    def as_dict(self, context_before: int = 3, context_after: int = 3) -> Dict:
        d = super().as_dict()
        d.update(
            {
                "type_exc": self.type_exc.__name__,
                "value_exc": str(self.value_exc),
                "traceback": [frame.as_dict(context_before, context_after) for frame in self.traceback]
            }
        )
        return d

In [18]:
from inspect import getfile


def fn_raise():
    raise RuntimeError()


def caller():
    fn_raise()


try:
    caller()
    assert False
except RuntimeError:
    err: Error = Error([caller])
    assert not err.is_success()
    assert not err.is_failure()
    assert err.type_exc == RuntimeError
    assert isinstance(err.value_exc, RuntimeError)
    assert len(err.traceback) == 3
    assert [frame.function for frame in err.traceback] == ["<module>", "caller", "fn_raise"]
    assert [frame.tags for frame in err.traceback] == [[], [Error.TAG_COMMON], []]

In [19]:
# This trick gets us a cell's "file name", given that the `__file__` constant is not defined
# in Jupyter notebooks.
import inspect
def _asdf():  # noqa
    pass
filename = inspect.getfile(_asdf)  # noqa

try:
    raise RuntimeError()
    assert False
except RuntimeError:
    assert {
        "type": "Error",
        "type_exc": "RuntimeError",
        "value_exc": "",
        "traceback": [
            {
                "file": filename,
                "line": 9,
                "function": "<module>",
                "tags": [],
                "context": [[9, "    raise RuntimeError()"]]
            }
        ]
    } == Error([]).as_dict(0, 0)

### Deliberate exception: failures

For convenience's sake, we model `Failure`s as a subclass of `Error` to gain the exception breakdown functionality.

In [20]:
class Failure(Error):
    """
    Test result stemming from a condition check that failed, or a test run marked
    as a failure.
    """
    def __init__(self, reason: str, fns_common: Iterable[Callable]):
        super().__init__(fns_common)
        self._reason = reason

    @property
    def reason(self) -> str:
        "Reason given by the programmer as to why the test failed."
        return self._reason

    def is_failure(self) -> bool:
        return True

    def as_dict(self, context_before: int = 3, context_after: int = 3) -> Dict:
        d = super().as_dict(context_before, context_after)
        d["reason"] = self.reason
        return d

In [21]:
try:
    assert False
except AssertionError:
    err: Failure = Failure("asdf", [])
    assert not err.is_success()
    assert err.is_failure()
    assert err.type_exc == AssertionError
    assert isinstance(err.value_exc, AssertionError)
    assert isinstance(err.traceback, list)

In [22]:
import inspect
def _asdf():  # noqa
    pass
filename = inspect.getfile(_asdf)  # noqa

try:
    assert False
except AssertionError:
    assert {
        "type": "Failure",
        "type_exc": "AssertionError",
        "value_exc": "",
        "traceback": [
            {
                "file": filename,
                "line": 7,
                "function": "<module>",
                "tags": [],
                "context": [[7, "    assert False"]]
            }
        ],
        "reason": "asdf"
    } == Failure("asdf", []).as_dict(0, 0)

# Test suites

In [23]:
class Subscriber(ABC):
    """
    Object reacting to test results as they are generated by running tests.
    """

    def on_add_to_suite(self, suite: "Suite") -> None:
        pass

    def on_result(self, name_test: str, result: Result) -> None:
        pass

In [24]:
TestFunction = Callable[..., None]


class Suite:
    """
    Suite of tests, gathering the result of multiple named test runs. Test code fragments
    are named using the `test()` decorator, or leveraging it indirectly by registering
    a shortcut cell magic.

    Test suites gets added functionality through a publish/subscribe system. Subscriber are
    special objects tied to the suite instance through its `|' (bit OR) operator. At the
    moment, the only event broadcast to all subscribers is the generation of a new test
    result (and its appending to the suite's log). For instance, the `Report` plug-in
    supports the suite by giving immediate feedback on a test's results. Thus, to
    instantiate a suite with this added feature, one would use code like

    suite = Suite() | Report()
    """

    def __init__(self, name_magic: str = "test") -> None:
        self._tests: Dict[str, List[Result]] = {}
        self._fns_common = [fail, self.test]
        self._subscribers: List[Subscriber] = []

    def test(
        self,
        fn: Optional[TestFunction] = None,
        name: str = "",
        args: Sequence[Any] = [],
        kwargs: Mapping[str, Any] = {}
    ) -> Union[Callable[[TestFunction], TestFunction], TestFunction]:
        """
        Runs a test encoded into a function. Completing the function's execution counts as a
        test success; tripped assertions and other exceptions generate some other Result;
        and the test result is retained by this Suite instance.

        This decorator can be used two ways. Without application, one can decorate a
        test function without parameter:

            @suite.test
            def this_is_my_test():
                # Test goes here!

        The name of the test corresponds to that of the function. Applying the decorator can
        supply arguments to the test function and override the name of the test.

            @suite.test(name="My test, with spaces", args=(3, 4))
            def fn_test(a, b):
                # Test goes here!

        To run a test with multiple parameter sets, one may even call this function directly,
        not as a decorator:

            def fn_test(a, b):
                # Test test test...

            for a, b in [(2, 8), (3, 4)]:
                suite.test(fn_test, name=f"Test with {a}, {b}", args=(a, b))

        fn
            Function that embodies the test code.
        name
            Name of the test; by default, the name of the function is used.
        args
            Positional arguments to pass to the function to run the test.
        kwargs
            Named arguments to pass to the function to run the test.
        """
        if fn is None:
            return lambda fn: self.test(fn, name=name, args=args, kwargs=kwargs)

        try:
            fn(*args, **kwargs)
            result = Success()
        except TestFailed as err:
            result = Failure(err.reason or "Test marked as failed.", self._fns_common)
        except AssertionError as err:
            result = Failure(str(err) or "Assertion failed.", self._fns_common)
        except BaseException:
            result = Error(self._fns_common)

        name_test = name
        if not name_test:
            name_test = fn.__name__
            if args or kwargs:
                str_args = ", ".join(
                    [repr(str(a)) for a in args] +
                    [f"{k}={repr(str(v))}" for k, v in kwargs.items()]
                )
                name_test += f"({str_args})"
        self._tests.setdefault(name_test, []).append(result)
        for subscriber in self._subscribers:
            subscriber.on_result(name_test, result)

        return fn

    @property
    def results(self) -> Iterator[Tuple[str, Iterator[Result]]]:
        """
        Iterates through the gathered test results. For each named test, yields a tuple of
        the name of the test and an iterator over each result gathered as the test has
        been run.
        """
        for name, test_results in self._tests.items():
            yield name, iter(test_results)

    def as_dict(self) -> Dict[str, List[Dict]]:
        "Provides a structured data representation suitable for data serialization and exportation."
        return {name: [r.as_dict() for r in rez] for name, rez in self.results}

    def __or__(self, subscriber: Subscriber) -> "Suite":
        """
        Generates a clone of this suite instance, but with this subscriber subscribed to it.

        The new suite will not share member data structures with `self`, but if `self` carries
        test results already, the new suite will reference the same result objects -- we
        assume that Result objects are immutable.
        """
        suite_with_subscriber = Suite()
        suite_with_subscriber._tests = copy(self._tests)  # Under assumption of results immutability.
        suite_with_subscriber._subscribers = copy(self._subscribers)
        suite_with_subscriber._subscribers.append(subscriber)
        subscriber.on_add_to_suite(suite_with_subscriber)
        return suite_with_subscriber

In [25]:
assert isinstance(Suite()._tests, dict)

In [26]:
suite = Suite()


@suite.test
def succeeding():
    assert True


@suite.test
def failing_by_assert_terse():
    assert False


@suite.test
def failing_by_assert_reason():
    assert False, "assert reason"


@suite.test
def failing_manually_terse():
    fail()


@suite.test
def failing_manually_reason():
    fail("fail reason")


@suite.test
def error():
    raise RuntimeError("doh")


assert [
    ("succeeding", [(Success, "")]),
    ("failing_by_assert_terse", [(Failure, "Assertion failed.")]),
    ("failing_by_assert_reason", [(Failure, "assert reason")]),
    ("failing_manually_terse", [(Failure, "Test marked as failed.")]),
    ("failing_manually_reason", [(Failure, "fail reason")]),
    ("error", [(Error, "")])
] == [(name, [(type(r), r.reason if hasattr(r, "reason") else "") for r in rez]) for name, rez in suite.results]

In [27]:
# Check each non-success for tagging of the first frame.
num_non_success = 0
for name, rez in suite.results:
    for r in rez:
        if not r.is_success():
            num_non_success += 1
            assert Error.TAG_COMMON in r.traceback[0].tags

assert num_non_success == 5

In [28]:
# Check failures for tagging of the fail call.
num_failures = 0
for name, rez in suite.results:
    if "failing_manually" in name:
        for r in rez:
            num_failures += 1
            assert Error.TAG_COMMON in r.traceback[-1].tags

assert num_failures == 2

In [29]:
suite = Suite()


@suite.test(name="trial")
def t1():
    fail()


@suite.test(name="trial")
def t2():
    raise RuntimeError()


@suite.test(name="trial")
def t3():
    pass  # Literally!


assert [("trial", [Failure, Error, Success])] == [(name, [type(r) for r in rez]) for name, rez in suite.results]

In [30]:
suite = Suite()


@suite.test
def first():
    fail()


@suite.test(name="first")
def first2():
    pass


@suite.test
def second():
    raise RuntimeError()


assert {name: [r["type"] for r in rez] for name, rez in suite.as_dict().items()} == {
    "first": ["Failure", "Success"],
    "second": ["Error"]
}

## Testing environment protection during test execution

In [31]:
assert "x" not in globals()
mylist = [1, 2, 3]


suite = Suite()


@suite.test
def trial():
    x = 5
    assert x == 5
    mylist.append(4)


assert "x" not in globals()
assert [("trial", [Success])] == [(name, [type(r) for r in rez]) for name, rez in suite.results]
# Accidental globals are obviously not protected.
assert [1, 2, 3, 4] == mylist

In [32]:
for ns in [globals(), locals()]:
    assert "C" not in ns


suite = Suite()


@suite.test
def check_something_about_a_new_class():
    class C:
        def f(self):
            return 5

    assert C().f() == 5


for ns in [globals(), locals()]:
    assert "C" not in ns

## Testing publish/subscribe of results

In [33]:
class TestSubscriber(Subscriber):

    def __init__(self, lr: List[Tuple[str, Result]]):
        self.suite = None
        self._results: List[Tuple[str, Result]] = lr

    def on_add_to_suite(self, suite: Suite) -> None:
        self.suite = suite
        
    def on_result(self, name_test: str, result: Result) -> None:
        self._results.append((name_test, result))


results: List[Tuple[str, Result]] = []
sub = TestSubscriber(results)
assert sub.suite is None
suite = Suite() | sub
assert sub.suite is suite


@suite.test
def passing():
    pass


assert len(results) == 1
name_last, result_last = results[-1]
assert name_last == "passing"
assert isinstance(result_last, Success)


@suite.test
def error():
    raise RuntimeError("asdf")


assert len(results) == 2
name_last, result_last = results[-1]
assert name_last == "error"
assert isinstance(result_last, Error)
assert str(result_last.value_exc) == "asdf"


@suite.test
def failing():
    fail("There is no why")


assert len(results) == 3
name_last, result_last = results[-1]
assert name_last == "failing"
assert isinstance(result_last, Failure)
assert result_last.reason == "There is no why"

9:1: W293 blank line contains whitespace


# Generating test run reports

## Report colorizers

In [34]:
class Emphasis(ABC):

    @abstractmethod
    def __call__(self, s: str) -> str:
        raise NotImplementedError()

In [35]:
class Color(Emphasis):

    def __init__(self, fg=None, bg=None, style=None) -> None:
        super().__init__()
        self._fg = fg
        self._bg = bg
        self._style = style

    def __call__(self, s: str) -> str:
        return colors.color(s, fg=self._fg, bg=self._bg, style=self._style)

In [36]:
assert Color(fg="red", bg="blue", style="bold")("asdf") == '\x1b[31;44;1masdf\x1b[0m'

In [37]:
class Plain(Emphasis):

    def __call__(self, s: str) -> str:
        return s

In [38]:
assert Plain()("asdf") == "asdf"

In [39]:
class Colorizer:

    def __init__(self, important: Emphasis, trivial: Emphasis, failure: Emphasis, error: Emphasis) -> None:
        self.important: Emphasis = important
        self.trivial: Emphasis = trivial
        self.failure: Emphasis = failure
        self.error: Emphasis = error

In [40]:
def plain() -> Colorizer:
    return Colorizer(Plain(), Plain(), Plain(), Plain())

In [41]:
c = plain()
assert c.important("asdf") == "asdf"
assert c.trivial("asdf") == "asdf"
assert c.failure("asdf") == "asdf"
assert c.error("asdf") == "asdf"

In [42]:
def color(  # noqa
    important: Optional[Emphasis] = None,
    trivial: Optional[Emphasis] = None,
    failure: Optional[Emphasis] = None,
    error: Optional[Emphasis] = None
) -> Colorizer:
    return Colorizer(
        important or Color(style="bold"),
        trivial or Color(fg="white"),
        failure or Color(fg="yellow"),
        error or Color(fg="red")
    )

In [43]:
c = color(important=Color(fg="white"), trivial=Color(fg="black"), failure=Color(bg="blue"), error=Color(fg="green"))
assert c.important("asdf") == Color(fg="white")("asdf")
assert c.trivial("asdf") == Color(fg="black")("asdf")
assert c.failure("asdf") == Color(bg="blue")("asdf")
assert c.error("asdf") == Color(fg="green")("asdf")

## Raising an exception when reporting failures and errors

This can be useful when running all tests after a code modification, especially if running from within a CI system.

In [44]:
class ProblemsEncountered(Exception):
    """Raised (optionally) when a reporting routine must report failures and errors."""

    def __init__(self, num_failures, num_errors):
        plural_failure = "s" if num_failures > 1 else ""
        plural_errors = "s" if num_errors > 1 else ""
        super().__init__(
            f"Problems encountered during testing: {num_failures} failure{plural_failure}, "
            f"{num_errors} error{plural_errors}"
        )
        self.num_failures = num_failures
        self.num_errors = num_errors

In [45]:
import re

try:
    raise ProblemsEncountered(2, 2)
except ProblemsEncountered as err:
    assert re.search("2 failures, 2 errors", str(err))

try:
    raise ProblemsEncountered(1, 0)
except ProblemsEncountered as err:
    assert re.search("1 failure, 0 error", str(err))

In [46]:
def raise_on_error(suite: Suite) -> None:
    num_failures = 0
    num_errors = 0
    for _, rez in suite.results:
        for r in rez:
            if not r.is_success():
                if r.is_failure():
                    num_failures += 1
                else:
                    num_errors += 1
    if num_failures > 0 or num_errors > 0:
        raise ProblemsEncountered(num_failures, num_errors)

In [47]:
def make_suite_with_two_tests():
    suite = Suite()
    for name in ["one", "two"]:
        @suite.test(name=f"passing-{name}", args=(name,))
        def passing(a):
            pass

    return suite

In [48]:
suite = make_suite_with_two_tests()

try:
    raise_on_error(suite)
except ProblemsEncountered:
    assert False

In [49]:
suite = make_suite_with_two_tests()


@suite.test
def failure():
    fail()


try:
    raise_on_error(suite)
    assert False
except ProblemsEncountered as err:
    assert err.num_failures == 1
    assert err.num_errors == 0

In [50]:
suite = make_suite_with_two_tests()


@suite.test
def error():
    raise RuntimeError()


try:
    raise_on_error(suite)
    assert False
except ProblemsEncountered as err:
    assert err.num_failures == 0
    assert err.num_errors == 1

## Exhaustive report

In [51]:
TestNameFormatter = Callable[[str, int], str]


def name_all(name_test: str, num_result: int) -> str:
    """
    Test name formatter that puts out the name of a test even when it has
    been run multiple times.
    """
    return name_test


def ladder(name_test: str, num_result: int) -> str:
    """
    Test name formatter that puts out the name of a test only once, even
    if it has been run multiple times.
    """
    if num_result == 0:
        return name_test
    return " " * len(name_test)

In [52]:
def quoter(formatter: TestNameFormatter) -> TestNameFormatter:
    """
    Test name formatter that surrounds the name between double quotes. Not
    meant to be used directly by users of this module.
    """
    def quoter_format(name_test: str, num_result: int) -> str:
        return f"\"{formatter(name_test, num_result)}\""

    return quoter_format

In [53]:
class PolicyReportingProblems:
    """What to do when reporting test results that involve problems (failures and errors)."""
    def __init__(self, label: str) -> None:
        self.label = label

    def __str__(self) -> str:
        return self.label

    def __eq__(self, other: Any) -> bool:
        if not isinstance(other, PolicyReportingProblems):
            return False
        return self.label == other.label


IGNORE = PolicyReportingProblems("ignore")
RAISE = PolicyReportingProblems("raise")

In [54]:
def report_results(  # noqa
    suite: Suite,
    file: TextIOBase = sys.stdout,
    colorizer: Colorizer = color(),
    format_name_test: TestNameFormatter = ladder,
    sep_name_result: str = "\t",
    quote_names: bool = False,
    labels_result_custom: Mapping[type, str] = {},
    on_error: PolicyReportingProblems = IGNORE
) -> None:
    """
    Reports the name and result for each attempt at running a test, without details
    as to issues encountered (failures and errors).

    suite
        Suite of test to write report from.
    file
        File-like object to write report to. Default is standard output.
    colorizer
        Policy for emphasizing the written report.
    format_name_test
        Some tests are run more than once (for instance, for iterative problem solving).
        In a report written for human reading, the repeated naming of a test run more
        than once can feel redundant; it is eliminated by setting this to `ladder`.
        All tests will be named if the formatter used instead is `name_all`.
    sep_name_result
        Separating character used between test name and result label. Default is "\t".
    quote_names
        If True, the test names will be surrounded with double quotes in the output.
    labels_result_custom
        Dictionary of labels to use with different result types, when the default
        labels (*ok* for success, *failed* for failure, *ERROR* for error) should be
        changed. The emphasis for each label is derived from the colorizer.
    on_error
        What to do when reporting results that include problems such as failures and
        errors. If set to RAISE, it will raise a ProblemsEncountered exception, which
        is useful when running the notebook as part of a CI/CD pipeline; otherwise,
        or if set to IGNORE, nothing more is done than writing the report.
        Default is IGNORE.
    """
    len_all_names = [len(name) for name, _ in suite.results]
    if len(len_all_names) == 0:
        return
    len_name_largest = max(len_all_names)

    labels_result = {
        type_result: colorize(labels_result_custom.get(type_result, label_default))
        for type_result, colorize, label_default in [
            (Success, Plain(), "ok"),
            (Failure, colorizer.failure, "failed"),
            (Error, colorizer.error, "ERROR")
        ]
    }

    if quote_names:
        format_name_test = quoter(format_name_test)

    for name, rez in suite.results:
        p_name = f"{name:{len_name_largest}s}"
        for num, r in enumerate(rez):
            print(format_name_test(p_name, num), labels_result[type(r)], sep=sep_name_result, file=file)

    if on_error is RAISE:
        raise_on_error(suite)

In [55]:
suite = Suite()


@suite.test
def first():
    raise RuntimeError()


@suite.test(name="first")
def first2():
    pass


@suite.test
def second():
    fail()


@suite.test
def third():
    pass


@suite.test
def fourth():
    assert False


@suite.test(name="fourth")
def fourth2():
    pass

Visual check: tests `first` and `fourth` are run twice; `second` and `third` only once. The name of each test is written only once. The results are either `ok`, `failed` or `ERROR` and are emphasized as normal for success, yellow for failure and red for error. The results are also lined up cleanly into a second column.

In [56]:
report_results(suite)

first 	[31mERROR[0m
      	ok
second	[33mfailed[0m
third 	ok
fourth	[33mfailed[0m
      	ok


Same as above, but with a `RAISE` policy.

In [57]:
try:
    report_results(suite, on_error=RAISE)
    assert False
except ProblemsEncountered as err:
    assert err.num_failures == 2
    assert err.num_errors == 1

first 	[31mERROR[0m
      	ok
second	[33mfailed[0m
third 	ok
fourth	[33mfailed[0m
      	ok


Visual check: like previous, except that the test names are written on each line a result is reported.

In [58]:
report_results(suite, format_name_test=name_all)

first 	[31mERROR[0m
first 	ok
second	[33mfailed[0m
third 	ok
fourth	[33mfailed[0m
fourth	ok


Visual check: like previous, but CSV-like, with test names quoted.

In [59]:
report_results(suite, format_name_test=name_all, quote_names=True, sep_name_result=",", colorizer=plain())

"first ",ERROR
"first ",ok
"second",failed
"third ",ok
"fourth",failed
"fourth",ok


In [60]:
suite_all_passed = Suite()


@suite_all_passed.test
def first():
    pass


@suite_all_passed.test
def second():
    pass

Visual check: two results, both `ok`. No exception raised.

In [61]:
report_results(suite_all_passed, on_error=RAISE)

first 	ok
second	ok


In [62]:
suite_empty = Suite()

Visual check: nothing written.

In [63]:
report_results(suite_empty)

## Test result summary

In [64]:
def summarize_results(  # noqa
    suite: Suite,
    file: Optional[TextIOBase] = sys.stdout,
    colorizer: Colorizer = color(),
    sep: str = ", ",
    on_error: PolicyReportingProblems = IGNORE
) -> Dict[type, int]:
    """
    Writes a very short summary of a test run, counting the number of each result obtained.

    suite
        Suite of test to write report from.
    file
        File-like object to write report to. Default is standard output.
    colorizer
        Policy for emphasizing the written report.
    sep
        Separation string between the labeled numbers of results. Default is ", "
    on_error
        What to do when reporting results that include problems such as failures and
        errors. If set to RAISE, it will raise a ProblemsEncountered exception, which
        is useful when running the notebook as part of a CI/CD pipeline; otherwise,
        or if set to IGNORE, nothing more is done than writing the report.
        Default is IGNORE.
    """
    summary = {t: 0 for t in [Success, Failure, Error]}
    for _, rez in suite.results:
        for r in rez:
            summary[type(r)] += 1

    if file is not None:
        print(
            f"{summary[Success]} passed",
            (colorizer.failure if summary[Failure] > 0 else colorizer.trivial)(f"{summary[Failure]} failed"),
            (colorizer.error if summary[Error] > 0 else colorizer.trivial)(f"{summary[Error]} raised an error"),
            file=file,
            sep=sep
        )

    if on_error == RAISE:
        raise_on_error(suite)
    return summary

Visual check: should show 3 tests passed, 2 failures (in yellow), 1 error (in red).

In [65]:
summarize_results(suite)

3 passed, [33m2 failed[0m, [31m1 raised an error[0m


{__main__.Success: 3, __main__.Failure: 2, __main__.Error: 1}

Same, but raising an exception.

In [66]:
try:
    summarize_results(suite, on_error=RAISE)
    assert False
except ProblemsEncountered as err:
    assert err.num_failures == 2
    assert err.num_errors == 1

3 passed, [33m2 failed[0m, [31m1 raised an error[0m


Check visually that nothing is written.

In [67]:
assert summarize_results(suite, file=None) == {
    Success: 3,
    Failure: 2,
    Error: 1
}

In [68]:
suite_all_passed = Suite()


@suite_all_passed.test
def first():
    pass


@suite_all_passed.test
def second():
    pass

Check visually that report is fine. Should show 2 tests passed, and failed and errors labeled in a subdued color.

In [69]:
summarize_results(suite_all_passed)

2 passed, [37m0 failed[0m, [37m0 raised an error[0m


{__main__.Success: 2, __main__.Failure: 0, __main__.Error: 0}

Check visually that report is fine. Should show 0 for each type of test.

In [70]:
summarize_results(suite_empty)

0 passed, [37m0 failed[0m, [37m0 raised an error[0m


{__main__.Success: 0, __main__.Failure: 0, __main__.Error: 0}

## Detailed report of issues encountered

### Printing one frame for a result's associated traceback

In [71]:
def print_frame(  # noqa
    frame: Frame,
    file: TextIOBase = sys.stdout,
    colorizer: Colorizer = color(),
    lines_context: int = 3
) -> None:
    """
    Writes up a single stack frame report.

    frame
        Stack frame to report on.
    file
        File-like object to write report to. Default is standard output.
    colorizer
        Policy for emphasizing the written report.
    lines_context
        Number of lines of code to fetch and write up before and after the
        line associated to the stack frame.
    """
    header = (
        colorizer.trivial
        if Error.TAG_COMMON in frame.tags
        else Plain()
    )(
        " | ".join([
            "Code cell" if "cell" == frame.name_file else f"File {frame.name_file}",
            f"Line {frame.num_line}",
            f"Function {frame.function}"
        ])
    )
    print(header, file=file)
    if Error.TAG_COMMON not in frame.tags:
        context: List[Tuple[int, str]] = frame.context(before=lines_context, after=lines_context)
        if len(context) > 0:
            max_len_num_line = len(str(context[-1][0]))
            for i, line in zip(
                [i for i, _ in context],
                highlight(
                    "\n".join(ln for _, ln in context),
                    lexer=Python3Lexer(),
                    formatter=TerminalFormatter()
                ).split("\n")
            ):
                print(
                    colorizer.trivial(f"{i:{max_len_num_line}d}"),
                    colorizer.trivial("|"),
                    line,
                    sep=" ",
                    file=file
                )
    print(file=file)

Visual check: show show 3 frame reports. First and third must have appropriate code context (check against the line number), three lines of context around target line max. Second frame report should have subdued color and no code context.

In [72]:
def raiser():
    raise RuntimeError()


def caller():
    raiser()


try:
    caller()
    assert False
except RuntimeError:
    _, _, tb = sys.exc_info()
    for frame, lineno in walk_tb(tb):
        print_frame(
            Frame(
                getframeinfo(frame),
                lineno,
                [Error.TAG_COMMON] if frame.f_code is caller.__code__ else [])
        )

File <ipython-input-72-f4f6026f3037> | Line 10 | Function <module>
[37m 9[0m [37m|[0m [34mtry[39;49;00m:
[37m10[0m [37m|[0m     caller()
[37m11[0m [37m|[0m     [34massert[39;49;00m [34mFalse[39;49;00m
[37m12[0m [37m|[0m [34mexcept[39;49;00m [36mRuntimeError[39;49;00m:
[37m13[0m [37m|[0m     _, _, tb = sys.exc_info()

[37mFile <ipython-input-72-f4f6026f3037> | Line 6 | Function caller[0m

File <ipython-input-72-f4f6026f3037> | Line 2 | Function raiser
[37m1[0m [37m|[0m [34mdef[39;49;00m [32mraiser[39;49;00m():
[37m2[0m [37m|[0m     [34mraise[39;49;00m [36mRuntimeError[39;49;00m()
[37m3[0m [37m|[0m 
[37m4[0m [37m|[0m 
[37m5[0m [37m|[0m [34mdef[39;49;00m [32mcaller[39;49;00m():



### Detailing one result

In [73]:
def detail_result(  # noqa
    name_test: str,
    result: Error,
    prefix_header: str,
    file: TextIOBase = sys.stdout,
    colorizer: Colorizer = color(),
    lines_context: int = 3
) -> None:
    """
    Writes up a report regarding a single test result.

    name_test
        Name of the test the result was gotten for.
    result
        Error-type result to report on.
    prefix_header
        String prepended to the header of the result report.
    file
        File-like object to write report to. Default is standard output.
    colorizer
        Policy for emphasizing the written report.
    lines_context
        Number of lines of code to fetch and write up before and after the
        line associated to the stack frame.
    """
    header = " ** ".join([
        prefix_header,
        f"Test {colorizer.important(name_test)}",
        {Failure: colorizer.failure, Error: colorizer.error}[type(result)](type(result).__name__)
    ])
    print("-" * len(colors.strip_color(header)), file=file)
    print(header, file=file)
    if result.is_failure():
        print(result.reason, file=file)
    else:
        print(f"{result.type_exc.__name__}:", str(result.value_exc) or "<no detail provided>", file=file)
    print(file=file)

    for frame in result.traceback:  # First frame is always Suite.test, which is irrelevant.
        print_frame(frame, file=file, colorizer=colorizer, lines_context=lines_context)

Visual check: header should start with first word **HEY**, label the test as **second** and map it as a *Failure*. Its frame report shows two subdued frames without code context, sandwiching a frame showing the call to the `fail()` function (line 16) that tripped the failure.

In [74]:
name, rez = list(suite.results)[1]
failure = list(rez)[0]
assert isinstance(failure, Failure)
detail_result(name, failure, "HEY")

-----------------------------
HEY ** Test [1msecond[0m ** [33mFailure[0m
Test marked as failed.

[37mFile <ipython-input-24-4027f2cde817> | Line 73 | Function test[0m

File <ipython-input-55-56d79be7ffb4> | Line 16 | Function second
[37m14[0m [37m|[0m [90m@suite[39;49;00m.test
[37m15[0m [37m|[0m [34mdef[39;49;00m [32msecond[39;49;00m():
[37m16[0m [37m|[0m     fail()
[37m17[0m [37m|[0m 
[37m18[0m [37m|[0m 
[37m19[0m [37m|[0m [90m@suite[39;49;00m.test

[37mFile <ipython-input-6-f0d8945e4a67> | Line 3 | Function fail[0m



### All results

In [75]:
def detail_issues(  # noqa
    suite: Suite,
    file: TextIOBase = sys.stdout,
    colorizer: Colorizer = color(),
    lines_context: int = 3,
    max_report: int = sys.maxsize,
    on_error: PolicyReportingProblems = IGNORE
) -> None:
    """
    Writes up a report detailing the issues encountered while running the test suite.

    suite
        The test suite.
    file
        The file-like object to write the report to. Default is standard output.
    colorizer
        Color scheme used for emphasizing the various bits of the report.
    lines_context
        Number of lines of context to provide around each line of code involved
        in a reported problem.
    max_report
        Maximum number of problems to report on.
    on_error
        What to do when reporting results that include problems such as failures and
        errors. If set to RAISE, it will raise a ProblemsEncountered exception, which
        is useful when running the notebook as part of a CI/CD pipeline; otherwise,
        or if set to IGNORE, nothing more is done than writing the report.
        Default is IGNORE.
    """
    summary = summarize_results(suite, file=None)
    num_problems = summary[Failure] + summary[Error]
    num_remaining: int = -1
    if num_problems == 0:
        if summary[Success] == 0:
            print("No test run.", file=file)
        else:
            print(f"All {summary[Success]} tests passed. No failure nor error encountered.", file=file)
    else:
        index = 1
        for name, rez in suite.results:
            if num_remaining < 0:
                for r in rez:
                    if not r.is_success():
                        detail_result(name, r, f"# {index}/{num_problems}", lines_context=lines_context, file=file)
                        print()

                        if index >= max_report:
                            num_remaining = num_problems - index
                            break
                        index += 1

    if num_remaining > 0:
        print(
            colorizer.important(
                f"... plus {num_remaining} other issue{'s' if num_remaining > 1 else ''}."
            ),
            file=file
        )
    if on_error == RAISE:
        raise_on_error(suite)

Visual check: 3 problem reports expected. First one is for an undetailed error (`RuntimeError`). Second and third are for failures. The first frame report for each (...` | Function test`) is in a subdued color, as is the frame for the call to function `fail`.

In [76]:
detail_issues(suite)

----------------------------
# 1/3 ** Test [1mfirst[0m ** [31mError[0m
RuntimeError: <no detail provided>

[37mFile <ipython-input-24-4027f2cde817> | Line 73 | Function test[0m

File <ipython-input-55-56d79be7ffb4> | Line 6 | Function first
[37m4[0m [37m|[0m [90m@suite[39;49;00m.test
[37m5[0m [37m|[0m [34mdef[39;49;00m [32mfirst[39;49;00m():
[37m6[0m [37m|[0m     [34mraise[39;49;00m [36mRuntimeError[39;49;00m()
[37m7[0m [37m|[0m 
[37m8[0m [37m|[0m 
[37m9[0m [37m|[0m [90m@suite[39;49;00m.test(name=[33m"[39;49;00m[33mfirst[39;49;00m[33m"[39;49;00m)


-------------------------------
# 2/3 ** Test [1msecond[0m ** [33mFailure[0m
Test marked as failed.

[37mFile <ipython-input-24-4027f2cde817> | Line 73 | Function test[0m

File <ipython-input-55-56d79be7ffb4> | Line 16 | Function second
[37m14[0m [37m|[0m [90m@suite[39;49;00m.test
[37m15[0m [37m|[0m [34mdef[39;49;00m [32msecond[39;49;00m():
[37m16[0m [37m|[0m     fail()


Visual check: show only the report for the error, and mention that two more problems have yet to be reported. Also, code context is only one line around the target line, making for a 3-line code blurb.

In [77]:
detail_issues(suite, max_report=1, lines_context=1)

----------------------------
# 1/3 ** Test [1mfirst[0m ** [31mError[0m
RuntimeError: <no detail provided>

[37mFile <ipython-input-24-4027f2cde817> | Line 73 | Function test[0m

File <ipython-input-55-56d79be7ffb4> | Line 6 | Function first
[37m5[0m [37m|[0m [34mdef[39;49;00m [32mfirst[39;49;00m():
[37m6[0m [37m|[0m     [34mraise[39;49;00m [36mRuntimeError[39;49;00m()


[1m... plus 2 other issues.[0m


Same, but with exception raising.

In [78]:
try:
    detail_issues(suite, max_report=1, lines_context=1, on_error=RAISE)
    assert False
except ProblemsEncountered as err:
    assert err.num_failures == 2
    assert err.num_errors == 1

----------------------------
# 1/3 ** Test [1mfirst[0m ** [31mError[0m
RuntimeError: <no detail provided>

[37mFile <ipython-input-24-4027f2cde817> | Line 73 | Function test[0m

File <ipython-input-55-56d79be7ffb4> | Line 6 | Function first
[37m5[0m [37m|[0m [34mdef[39;49;00m [32mfirst[39;49;00m():
[37m6[0m [37m|[0m     [34mraise[39;49;00m [36mRuntimeError[39;49;00m()


[1m... plus 2 other issues.[0m


Visual check: now show the error and the first failure, with 0 code context (so only the target line is shown). The final message about remaining issues is singular.

In [79]:
detail_issues(suite, max_report=2, lines_context=0)

----------------------------
# 1/3 ** Test [1mfirst[0m ** [31mError[0m
RuntimeError: <no detail provided>

[37mFile <ipython-input-24-4027f2cde817> | Line 73 | Function test[0m

File <ipython-input-55-56d79be7ffb4> | Line 6 | Function first
[37m6[0m [37m|[0m     [34mraise[39;49;00m [36mRuntimeError[39;49;00m()


-------------------------------
# 2/3 ** Test [1msecond[0m ** [33mFailure[0m
Test marked as failed.

[37mFile <ipython-input-24-4027f2cde817> | Line 73 | Function test[0m

File <ipython-input-55-56d79be7ffb4> | Line 16 | Function second
[37m16[0m [37m|[0m     fail()

[37mFile <ipython-input-6-f0d8945e4a67> | Line 3 | Function fail[0m


[1m... plus 1 other issue.[0m


Visual check: two tests passed, no failure nor error.

In [80]:
detail_issues(suite_all_passed)

All 2 tests passed. No failure nor error encountered.


Visual check: no test has been run.

In [81]:
detail_issues(suite_empty)

No test run.


# On-the-fly result reporting

In [82]:
class Report(Subscriber):
    """
    Test suite subscriber that reports on test results on-the-fly. When a test
    does not succeed, details on the failure can optionally be provided. See
    documentation on class `Suite` to get an example on the usage of this
    plug-in.

    file
        File-like object where the test results are reported. Default is
        standard output.
    verbose
        If True, the feedback on test results contains traceback information
        when problems are encountered.
    file
        File-like object where the feedback is put out.
    colorizer
        Policy on how to emphasize the feedback output.
    lines_context
        Number of lines of code to provide as context in traceback frames
        around the line of code at the nexus of an issue.
    """
    def __init__(
        self,
        verbose: bool = True,
        file: TextIOBase = sys.stdout,
        colorizer: Colorizer = color(),
        lines_context: int = 3
    ) -> None:
        super().__init__()
        self._last: Optional[bool] = None
        self._file = file
        self._verbose = verbose
        self._colorizer = colorizer
        self._lines_context = lines_context

    def on_result(self, name_test: str, result: Result) -> None:
        name_test = self._colorizer.important(name_test)
        if result.is_success():
            msg = f"Test {name_test} passed."
            if self._verbose and self._last is False:
                print("-" * len(colors.strip_color(msg)), file=self._file)
            self._last = True
            print(msg, file=self._file)
        else:
            if self._last is True:
                print(file=self._file)
            self._last = False
            if self._verbose:
                detail_result(
                    name_test,
                    result,
                    "Issue encountered",
                    self._file,
                    self._colorizer,
                    self._lines_context
                )
            else:
                index_frame_relevant = -1
                if result.is_failure():
                    label = self._colorizer.failure(f"Test {name_test} failed")
                    print(f"{label}: {result.reason}", file=self._file)
                    if isinstance(result.type_exc, TestFailed):
                        index_frame_relevant = -2
                else:
                    label = self._colorizer.error(f"Error occured during test {name_test}")
                    value_exc = ""
                    if str(result.value_exc):
                        value_exc = f" -- {str(result.value_exc)}"
                    print(f"{label}: {result.type_exc.__name__}{value_exc}", file=self._file)
                frame_relevant = result.traceback[index_frame_relevant]
                print_frame(
                    frame_relevant,
                    file=self._file,
                    colorizer=self._colorizer,
                    lines_context=self._lines_context
                )

In [83]:
def exercise_suite(reporter):
    suite = Suite() | reporter

    @suite.test
    def first_passing():
        pass

    @suite.test
    def second_passing():
        pass

    @suite.test
    def failing():
        assert 1 == 0, "one vs. zero"

    @suite.test
    def error_raising():
        raise RuntimeError("Some error()")

    @suite.test
    def next_to_last():
        pass

    @suite.test
    def last():
        pass

Visual check: first and second tests pass, third test fails, fourth test raises an error, last two tests pass. Failure and error are tersely described, only the most relevant stack frame is provided.

In [84]:
exercise_suite(Report(verbose=False))

Test [1mfirst_passing[0m passed.
Test [1msecond_passing[0m passed.

[33mTest [1mfailing[0m failed[0m: one vs. zero
File <ipython-input-83-04424abffded> | Line 14 | Function failing
[37m12[0m [37m|[0m     [90m@suite[39;49;00m.test
[37m13[0m [37m|[0m     [34mdef[39;49;00m [32mfailing[39;49;00m():
[37m14[0m [37m|[0m         [34massert[39;49;00m [34m1[39;49;00m == [34m0[39;49;00m, [33m"[39;49;00m[33mone vs. zero[39;49;00m[33m"[39;49;00m
[37m15[0m [37m|[0m 
[37m16[0m [37m|[0m     [90m@suite[39;49;00m.test
[37m17[0m [37m|[0m     [34mdef[39;49;00m [32merror_raising[39;49;00m():

[31mError occured during test [1merror_raising[0m[0m: RuntimeError -- Some error()
File <ipython-input-83-04424abffded> | Line 18 | Function error_raising
[37m16[0m [37m|[0m     [90m@suite[39;49;00m.test
[37m17[0m [37m|[0m     [34mdef[39;49;00m [32merror_raising[39;49;00m():
[37m18[0m [37m|[0m         [34mraise[39;49;00m [36mRuntimeError

Visual check: like previous, but with full detail of each non-success.

In [85]:
exercise_suite(Report())

Test [1mfirst_passing[0m passed.
Test [1msecond_passing[0m passed.

--------------------------------------------
Issue encountered ** Test [1m[1mfailing[0m[0m ** [33mFailure[0m
one vs. zero

[37mFile <ipython-input-24-4027f2cde817> | Line 73 | Function test[0m

File <ipython-input-83-04424abffded> | Line 14 | Function failing
[37m12[0m [37m|[0m     [90m@suite[39;49;00m.test
[37m13[0m [37m|[0m     [34mdef[39;49;00m [32mfailing[39;49;00m():
[37m14[0m [37m|[0m         [34massert[39;49;00m [34m1[39;49;00m == [34m0[39;49;00m, [33m"[39;49;00m[33mone vs. zero[39;49;00m[33m"[39;49;00m
[37m15[0m [37m|[0m 
[37m16[0m [37m|[0m     [90m@suite[39;49;00m.test
[37m17[0m [37m|[0m     [34mdef[39;49;00m [32merror_raising[39;49;00m():

------------------------------------------------
Issue encountered ** Test [1m[1merror_raising[0m[0m ** [31mError[0m
RuntimeError: Some error()

[37mFile <ipython-input-24-4027f2cde817> | Line 73 | Function 

# Using a cell magic to write tests

The current decorator approach involves little boilerplate, but it can be reduced further nonetheless. Let's use a cell magic, in which all code written is wrapped into an ad hoc test function. Let's use a subscriber to register this cell magic.

In [86]:
class Magic(Subscriber):

    def __init__(self, name_magic: str = "test") -> None:
        self._name_magic = name_magic

    def on_add_to_suite(self, suite):
        suite._fns_common.append(run_test_from_cell)
        ipython = get_ipython()
        if ipython and self._name_magic:
            register_cell_magic(self._name_magic)(lambda line, cell: test_cell(suite, line, cell))


def test_cell(suite: Suite, line: str, cell: Optional[str]) -> None:
    """
    Runs a test written using a cell magic.
    """
    line = line.strip()
    if not line:
        raise ValueError("Please provide a title for the test (right after the cell magic invocation).")
    cell = (cell or "").strip()
    if not cell:
        raise ValueError("There is no test to execute! Please write some code in there.")

    suite.test(run_test_from_cell, name=line, args=(cell,))


def run_test_from_cell(cell: str) -> None:
    """
    Executes the body of a cell, in context of the execution of a test.
    """
    code_source = "\n" + cell
    ipython = get_ipython()
    name_cell = ipython.compile.cache(code_source)
    code = compile(code_source, name_cell, "exec")
    exec(code, ipython.user_global_ns, locals())

## Tests

### Simple usage of the magic

In [87]:
suite = Suite() | Magic()

In [88]:
%%test First test, passing
assert True

In [89]:
%%test Second test, failing
fail()

In [90]:
%%test Third test, raising an error
raise RuntimeError()

In [91]:
def my_function(x, y):
    return x * y + x + y

In [92]:
%%test function in my notebook
assert my_function(4, 5) == 29

In [93]:
assert {name: [r["type"] for r in rez] for name, rez in suite.as_dict().items()} == {
    "First test, passing": ["Success"],
    "Second test, failing": ["Failure"],
    "Third test, raising an error": ["Error"],
    "function in my notebook": ["Success"]
}

Ensure even with the cell magic environment is not polluted.

In [94]:
for name in ["x", "f", "C"]:
    for ns in [globals(), locals(), get_ipython().user_ns]:
        assert name not in ns

In [95]:
%%test Add x, f and C
x = 5

def f():
    return 5


class C():
    
    def f(self):
        return 5
    
    
assert f() == x
assert C().f() == f()

In [96]:
for name in ["x", "f", "C"]:
    for ns in [globals(), locals(), get_ipython().user_ns]:
        try:
            assert name not in ns
        except AssertionError:
            print(f"Name {name} still defined.")
            raise

### Detailing one result, obtained by testing with cell magic

In [97]:
suite_magic = Suite() | Magic("test_magic")

In [98]:
%%test_magic the label
x = 5
assert x == 5
fail()

Visual check: header should start with first word **Head**, label the test as **the label** and map it as a *Failure*. Its frame report shows three subdued frames without code context, sandwiching a frame showing the call to the `fail()` function (line 16) that tripped the failure.

In [99]:
name, rez = list(suite_magic.results)[0]
failure = list(rez)[0]
assert isinstance(failure, Failure)
detail_result(name, failure, "Head")

---------------------------------
Head ** Test [1mthe label[0m ** [33mFailure[0m
Test marked as failed.

[37mFile <ipython-input-24-4027f2cde817> | Line 73 | Function test[0m

[37mFile <ipython-input-86-b19f42f5c076> | Line 35 | Function run_test_from_cell[0m

File <ipython-input-0-576533794e6c> | Line 4 | Function <module>
[37m2[0m [37m|[0m x = [34m5[39;49;00m
[37m3[0m [37m|[0m [34massert[39;49;00m x == [34m5[39;49;00m
[37m4[0m [37m|[0m fail()

[37mFile <ipython-input-6-f0d8945e4a67> | Line 3 | Function fail[0m



# Documentation index

Let's compose a docstring for the `jupytest` module (which we will export out of bits of this notebook). This docstring will act as a reference documentation index.

# Exportation of testing tools to `jupytest.py` module

In [100]:
import os
import os.path
import shutil

if os.path.isdir("jupytest"):
    print("Clean up previous")
    shutil.rmtree("jupytest")
os.makedirs("jupytest")

Clean up previous


In [101]:
import nbformat
with open("jupytest.ipynb", "r", encoding="utf-8") as file_notebook:
    nb = nbformat.read(file_notebook, nbformat.NO_CONVERT)

In [102]:
with open("jupytest/__init__.py", "w", encoding="utf-8") as file_module:
    print(
        "\n\n".join(
            [
                "\n\n".join(
                    f'"""\n{cell.source}\n"""'
                    for cell in nb.cells
                    if cell.cell_type == 'raw' and "header" in cell.metadata.get("tags", [])
                )
            ] +
            [
                cell.source
                for cell in nb.cells
                if cell.cell_type == 'code' and "module" in cell.metadata.get("tags", [])
            ]
        ),
        file=file_module,
        end=""
    )

In [103]:
import importlib

# Force the reload of the new jupytest module. If running from a fresh
# kernel, the reload is spurious but innocuous.
import jupytest
importlib.reload(jupytest)
None

Check that the exported module has all the tools we defined.

In [104]:
for construct in [
    "Result",
    "Success",
    "Frame",
    "Error",
    "Failure",
    "TestFailed",
    "fail",
    "Subscriber",
    "Suite",
    "Emphasis",
    "Color",
    "Plain",
    "Colorizer",
    "color",
    "plain",
    "TestNameFormatter",
    "ladder",
    "name_all",
    "quoter",
    "ProblemsEncountered",
    "raise_on_error",
    "PolicyReportingProblems",
    "IGNORE",
    "RAISE",
    "report_results",
    "summarize_results",
    "print_frame",
    "detail_result",
    "detail_issues",
    "Report"
]:
    assert hasattr(jupytest, construct), f"Have not got construct {construct}"

Visual check: ensure the documentation for the `jupytest` module itself corresponds to the `header` cell.

In [105]:
print(help(jupytest))

Help on package jupytest:

NAME
    jupytest - Unit and integration testing in a notebook

DESCRIPTION
    *** Building and running suites of tests ***
    
    class Suite
        Method test (context manager)
    Function fail
    
    *** Reporting test results ***
    
    Function report_results
    Function summarize_results
    Function detail_issues
    Class Report (used as a subscriber plug-in to class Suite)
    
    *** Delving deeper into test results (going beyond the tools described above) ***
    
    Class Suite
        Property results
    Class Result
        Sub-class Success
        Sub-class Error
            Sub-class Failure
        Method is_success
        Method is_failure
    Class Frame
    
    *** Customizing result reporting ***
    
    Class Colorizer
        Function plain
        Function color
    Class Emphasis
        Sub-class Plain
        Sub-class Color
    Type TestNameFormatter
        Function ladder
        Function name_all
        Functi