# Include the code that we'll test

In [None]:
%pip install unittest-xml-reporting

[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m
Collecting unittest-xml-reporting
  Downloading unittest_xml_reporting-3.2.0-py2.py3-none-any.whl (20 kB)
Collecting lxml
  Downloading lxml-4.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (7.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.1/7.1 MB 43.9 MB/s eta 0:00:00
Installing collected packages: lxml, unittest-xml-reporting
Successfully installed lxml-4.9.2 unittest-xml-reporting-3.2.0
[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m


In [None]:
%run "./Library Functions"

# Import package for unit testing

We'll use built-in package: [unittest](https://docs.python.org/3/library/unittest.html).

In [None]:
import unittest

# Tests could be implemented as classes

One of the main advantages of implementing as classes is that we can execute some functions before/after our test functions by implementing the `setUp()`, `tearDown()`, etc.

Another advantage that we'll inhering many useful functions such as, `assertEqual`, `assertIsNotNone`, ..., and we can annotate the function & class with additional information, for example, if failure is expected.

In [None]:
class SimpleTest(unittest.TestCase):
    def test_data_generation(self):
      n = 100
      name = "tmp42"
      generate_data(n=n, name=name)
      df = spark.sql(f"select * from {name}")
      self.assertEqual(df.count(), n)

    def test_data_prediction(self):
      predicted = get_data_prediction()
      self.assertEqual(predicted, 42)

# Tests could be implemented as functions

This is mostly done when we already have some code that performs testing, and we want to integrate it into the testing pipeline.

In [None]:
def test_data_generation():
  n = 100
  name = "tmp42"
  generate_data(n=n, name=name)
  df = spark.sql(f"select * from {name}")
  assert df.count() == n

In [None]:
def test_data_prediction():
  predicted = get_data_prediction()
  assert predicted == 42

# Generate the test suite

Some of Python packages for unit testign are relying on automatic tests discovery based on the file analysis, etc.  When working with Databricks notebooks this may not work, so we may need somehow generate the test suite that will be executed.

For test classes we can either:
* build the test suite explicitly for every test class and each test case inside it, as in the `generate_test_class_suite` function
* automatically discover all test cases in the given class(-es) as in the `discover_test_cases` function
* discover all available test classes and their test cases completely automatically as in the `discover_test_classes` (in combination with `discover_test_cases`) by looking for classes with name ending with `Test`

In [None]:
def generate_test_class_suite():
  suite = unittest.TestSuite()
  suite.addTest(SimpleTest('test_data_generation'))
  suite.addTest(SimpleTest('test_data_prediction'))

  return suite

In [None]:
def discover_test_cases(*test_classes):
  suite = unittest.TestSuite()
  for test_class in test_classes:
    for test in unittest.defaultTestLoader.getTestCaseNames(test_class):
      suite.addTest(test_class(test))
      
  return suite

In [None]:
def discover_test_classes():
  classes = [obj for name, obj in globals().items()
    if name.endswith('Test') and obj.__module__ == '__main__' and isinstance(obj, type) and unittest.case.TestCase in set(obj.__bases__)]

  return discover_test_cases(*classes)

In [None]:
suite = generate_test_class_suite()
# or
# suite = discover_test_cases(SimpleTest)
# # or 
# suite = discover_test_classes()

For functions, test suite could be either built explicitly, like in the function `generate_function_suite`, or discovered by analyzing the global declarations as in the `discover_function_suite` function that includes into the test suite all functions with given prefix (`test_` in our example) - this could lead to some errors if you have functions with name starting with selected prefix that are defined in the current context:

In [None]:
def generate_function_suite(suite = None):
    if suite is None:
      suite = unittest.TestSuite()
    suite.addTest(unittest.FunctionTestCase(test_data_generation))
    suite.addTest(unittest.FunctionTestCase(test_data_prediction))
    return suite

In [None]:
def discover_function_suite(suite = None):
    if suite is None:
      suite = unittest.TestSuite()
    for name, obj in globals().items():
      if name.startswith('test_') and callable(obj) and obj.__module__ == '__main__':
        suite.addTest(unittest.FunctionTestCase(obj))
    
    return suite

In [None]:
suite = generate_function_suite()
# or
# suite = discover_function_suite()

and we can combine discovery of the both test functions & test classes:

In [None]:
def discover_test_classes_and_functions():
  return discover_function_suite(suite = discover_test_classes())

In [None]:
suite = discover_test_classes_and_functions()

In [None]:
print(suite)

<unittest.suite.TestSuite tests=[<__main__.SimpleTest testMethod=test_data_generation>, <__main__.SimpleTest testMethod=test_data_prediction>, <unittest.case.FunctionTestCase tec=<function test_data_generation at 0x7f38ac7300d0>>, <unittest.case.FunctionTestCase tec=<function test_data_prediction at 0x7f38ac7308b0>>]>


# Execute the test suite

When the test suite is generated, we can execute it and get testing results

In [None]:
runner = unittest.TextTestRunner()
print(runner)
print(suite)

<unittest.runner.TextTestRunner object at 0x7f38acafbbe0>
<unittest.suite.TestSuite tests=[<__main__.SimpleTest testMethod=test_data_generation>, <__main__.SimpleTest testMethod=test_data_prediction>, <unittest.case.FunctionTestCase tec=<function test_data_generation at 0x7f38ac7300d0>>, <unittest.case.FunctionTestCase tec=<function test_data_prediction at 0x7f38ac7308b0>>]>


In [None]:
r = runner.run(suite)

....
----------------------------------------------------------------------
Ran 4 tests in 9.845s

OK


In [None]:
# # if we want to generate JUnit-compatible output, set to True
use_xml_runner = False

if use_xml_runner:
  import xmlrunner
  runner = xmlrunner.XMLTestRunner(output='/dbfs/Users/dustin.vannoy@databricks.com/tmp/test-reports')
else:
  runner = unittest.TextTestRunner()
results = runner.run(suite)

In [None]:
%sh ls -ls /dbfs/tmp/test-reports

# Use tests auto-discovery
 
For `unittest` library we may use tests auto-discovery, that will find tests implemented as classes.  The **main requirement for use with Databricks is to set `exit = False` in the list of arguments of `unittest.main` function.** It also makes sense to explicitly pass `argv` as single-element list, to avoid use of `sys.argv` that on Databricks contains parameters that were used to start Python subprocess. (see [documentation on `unittest.main`](https://docs.python.org/3.7/library/unittest.html#unittest.main))

In [None]:
test_runner = unittest.main(argv=[''], exit=False)
test_runner.result.printErrors()

if not test_runner.result.wasSuccessful():
  raise Exception(f"{len(test_runner.result.failures)} of {test_runner.result.testsRun} tests failed.")

..
----------------------------------------------------------------------
Ran 2 tests in 0.380s

OK

