In [None]:
from IPython.display import HTML
HTML('''<link rel="stylesheet" href="eniram-theme/eniram-theme.css" type="text/css"></link>
        <script type="text/javascript" src="eniram-theme/rise-shortcuts.js"></script>''')

# Introduction

## In this presentation
- Python @ Eniram
- Scientific libraries – what? why?
- Equality assertions in NumPy, Pandas and beyond
- Parameterized tests
- Test discovery
- Summary

## Elsewhere about testing and scientific Python
- [Testing in Python for Scientific Computing ](https://www.olivierverdier.com/posts/2015/04/10/testing-scientific-computing/)
  (Maths & Programming blog by Olivier Verdier)
- [Testing With NumPy and Pandas](https://penandpants.com/2014/10/07/testing-with-numpy-and-pandas/)
  (Pen and Pants blog by jiffyclub)
- [Testing for Data Scientists](https://www.youtube.com/watch?v=GEqM9uJi64Q)
  (PyData Seattle 2015 talk video by Trey Causey)

## Python development & testing at Eniram
- ~8 Python developers (PyCharm on Mac/Linux/Win)
- ~10 other active Python users (hosted Jupyter Notebook)
- ~30 Python packages with unit tests

### Types of testing we do:
- unit tests
- functional/integration tests
- full-stack UI-driven tests
- manual QA tests

### Continuous integration
- Jenkins cloud cluster with 10 Python slaves
- automated unit tests on
  - Python 2.7 "onboard production" (legacy package versions)
  - Python 2.7 "onshore production" (more modern versions)
  - Python 2.7 "cutting edge" (up-to-date versions)
  - Python 3.5 "cutting edge"

### Test frameworks
- 95% of tests are run with Nose 1
- 5% of tests written for and run experimentally with Pytest

## Software updates to ships are slow and expensive
- underlines the importance of testing
- keeps us tied to legacy library versions

### Data types we deal with
- strings
- integers
- floats
  - including NaN
  - try to stick with float64
- timestamps
  - prefer timezone aware UTC
  - `pandas.Timestamp` is a good choice  
- arrays of ints/floats/strings (usually 1-dimensional)
- arrays of timestamps (time indices)
- arrays of time intervals (time interval indices)
- time-indexed arrays of floats
- time-indexed data frames of floats
- lists, dictionaries and objects
  - containing some of the above
  - rich nested structures

# The [SciPy stack](http://scipy.org/)
- brings huge value for number crunching...

- ...but also complications in unit testing

## Packages
- [NumPy](http://numpy.org/)
  - numeric array types
  - memory efficient
  - high performance manipulation

- [Pandas](http://pandas.pydata.org/)
  - indexed arrays and data frames
  - powerful I/O, query and manipulation tools
    - "Excel, SQL on steroids"
  - basic math/stats algorithms

- [SciPy](http://scipy.org/scipylib/),
  [Matplotlib](http://matplotlib.org/),
  [SymPy](http://sympy.org/),
  [scikit-learn](http://scikit-learn.org/stable/) etc. etc.
  - advanced algorithms & visualization

## SciPy stack benefit: Memory efficiency

In [None]:
from contextlib2 import contextmanager
import psutil
proc = psutil.Process()
storage = []

@contextmanager
def report_memory_usage():
    mem_before = proc.memory_info().data
    yield storage.append
    mem_after = proc.memory_info().data
    print((mem_after - mem_before) / 1000000.0, 'MB')

In [None]:
with report_memory_usage() as store:
    store([x / 1000000.0 for x in range(1000000)])
storage[-1][:3]

In [None]:
import numpy as np
with report_memory_usage() as store:
    store(np.linspace(0, 1.0, 1000000, dtype=np.float64))
storage[-1][:3]

In [None]:
with report_memory_usage() as store:
    store(np.linspace(0, 1.0, 1000000, dtype=np.float32))
storage[-1][:3]

## SciPy stack benefit: Speed

In [None]:
list_values = [x / 1000000.0 for x in range(1000000)]
array_values = np.linspace(0, 1.0, 1000000)

In [None]:
%timeit [2.0 * x for x in list_values]

In [None]:
%timeit 2.0 * array_values

# NumPy testing assertion helpers
- for docs, search ["numpy.testing"](http://docs.scipy.org/doc/numpy/reference/routines.testing.html)
- useful for comparisons of
  - arrays
  - special values (NaNs, infinities)
  - floating point values with tolerance

### Test data: Fuel flow $kg/h$ every 60 seconds

In [None]:
fuelflow_list = [3386.6,
                 3418.2,
                 3480.3,
                 3446.7,
                 3437.2,
                 3437.8,
                 3721.3,
                 4818.1,  # > 4000
                 3389.8,
                 3442.3,
                 3356.1,
                 3334.3,
                 3452.9,
                 3506.7,
                 3391.1,
                 3457.5,
                 11526.7,  # > 4000
                 3410.1]

## Example: pick values >4000 from a Python list

In [None]:
def high_list_values(values):
    return [v for v in values if v > 4000]

high_list_values(fuelflow_list)

In [None]:
assert high_list_values(fuelflow_list) == [4818.1, 11526.7]

In [None]:
assert high_list_values(fuelflow_list) == [4242.42, 5353.53]

## Example: pick values >4000 from a NumPy vector

In [None]:
import numpy as np

In [None]:
fuelflow_vec = np.array(fuelflow_list)
fuelflow_vec

In [None]:
def high_vec_values(vector):
    return vector[vector > 4000]

result = high_vec_values(fuelflow_vec)
result

In [None]:
expected = np.array([4818.1, 11526.7])
assert result == expected

__...umm, what?__

## NumPy compares each element in the vector separately

In [None]:
result == expected

### Can't turn a vector of booleans into a single boolean

In [None]:
bool(np.array([True, True]))

### Ok, so do it with `all()` and a generator comprehension...

In [None]:
result

In [None]:
expected

In [None]:
list(zip(result, expected))

In [None]:
[a == b for a, b in zip(result, expected)]

In [None]:
all(a == b for a, b in zip(result, expected))

In [None]:
all(result == expected)

### ...or a NumPy array shortcut...

In [None]:
result == expected

In [None]:
(result == expected).all()

### ...or even allow some inaccuracy:

In [None]:
np.allclose(result, expected)

## Why use `allclose()` instead of `==`

### Floats are funny in computers
Let's do a computation in two equivalent ways:

In [None]:
def mega_mul(v):
    """Multiply by a million"""
    return 1000000 * v

def mega_div(v):
    """Divide by a millionth"""
    return v / 0.000001

### Used on big numbers, these give exactly same results:

In [None]:
mega_mul(10.0)

In [None]:
mega_div(10.0)

In [None]:
mega_mul(10.0) == mega_div(10.0)

### But with smaller numbers...

In [None]:
mega_mul(0.001)

In [None]:
mega_div(0.001)

In [None]:
mega_mul(0.001) == mega_div(0.001)

### This is why we might want to use `allclose()`.

In [None]:
np.allclose(mega_mul(0.001), mega_div(0.001))

In [None]:
1.00000001 == 1.00000002

In [None]:
np.allclose(1.00000001, 1.00000002)

### For tests, there's also `assert_allclose(a, b)`:

In [None]:
from numpy.testing import assert_allclose
assert_allclose(np.array([1.0000001, 1.0000002]),
                np.array([1.0000002, 1.0000001]))

### You can adjust the absolute and relative tolerances:

In [None]:
assert_allclose(np.array([1.0000001, 1.0000002]),
                np.array([1.0000002, 1.0000001]),
                atol=1e-8, rtol=1e-8)

## NaNs

### Let's introduce missing data in our test data

In [None]:
fuelflow_with_none = [3386.6, 3418.2, None, 3480.3]
expected = [3386.6, 3418.2, None, 3480.3]
fuelflow_with_none == expected

### Using `None` is ok with lists, not so with NumPy arrays
So let's try a list with NaN values instead:

In [None]:
fuelflow_with_nan_list = [3386.6, 3418.2, float('nan'), 3480.3]
expected = [3386.6, 3418.2, float('nan'), 3480.3]
fuelflow_with_nan_list == expected

__...umm what?__
- what's a `float('nan')`?
- why does the comparison fail?

In [None]:
type(float('nan'))

In [None]:
repr(float('nan'))

NumPy introduces NaN as `np.nan`, which is preferred over `float('nan')`:

In [None]:
type(np.nan)

In [None]:
repr(np.nan)

In [None]:
np.nan == np.nan

In [None]:
np.allclose(np.nan, np.nan)

In [None]:
np.allclose(np.nan, np.nan, equal_nan=True)

In [None]:
assert_allclose(np.nan, np.nan, equal_nan=True)

## A slight detour to a NumPy bug
While preparing this talk, I noticed that:

In [None]:
np.allclose(np.nan, np.nan, equal_nan=False)

In [None]:
assert_allclose(np.nan, np.nan, equal_nan=False)  # should raise an AssertionError

See https://github.com/numpy/numpy/issues/8145 and don't rely on this – always use:

    assert_allclose(..., equal_nan=True)

## Comparisons of arrays containing NaNs

In [None]:
a = np.array([1.0, 2.0, np.nan])
b = np.array([1.0, 2.0, np.nan])
a == b

In [None]:
np.allclose(a, b, equal_nan=True)

In [None]:
assert_allclose(a, b, equal_nan=True)


## How to test for exact same floats and NaN equality?

In [None]:
assert_allclose(np.array([1.0, np.nan]),
                np.array([1.0, np.nan]),
                equal_nan=True)

What if you want this to fail:

In [None]:
assert_allclose(np.array([1.00000000000, np.nan]),
                np.array([0.99999999999, np.nan]),
                equal_nan=True)

You can disallow any relative tolerance:

In [None]:
assert_allclose(np.array([1.00000000000, np.nan]),
                np.array([0.99999999999, np.nan]),
                rtol=0,
                equal_nan=True)

This still passes:

In [None]:
assert_allclose(np.array([1.0, np.nan]),
                np.array([1.0, np.nan]),
                rtol=0,
                equal_nan=True)

## `assert_array_equal` tests for exact values, and considers NaNs equal:

In [None]:
from numpy.testing import assert_array_equal
assert_array_equal(np.array([1.0, np.nan]),
                   np.array([1.0, np.nan]))

In [None]:
from numpy.testing import assert_array_equal
assert_array_equal(np.array([1.00000000000, np.nan]),
                   np.array([0.99999999999, np.nan]))

## NumPy recap
- `numpy.testing.assert_allclose(a, b, equal_nan=True)`
  - NaNs are equal
  - floats with tolerance
  - adjust tolerance using `atol=` and `rtol=`
- `numpy.testing.assert_array_equal(a, b)`
  - NaNs are equal
  - floats must match exactly
- other helpers in `numpy.testing`
  - not so relevant for asserting unit test results
  - duplicate functionality in test frameworks

# Pandas
- __Index__
  - array with custom functionality
  - NaNs make no sense
- __Series__: array of values + index
- __DataFrame__: multiple arrays + a common index



## Index comparisons
- no NaNs, easier to compare

In [None]:
import pandas as pd

fib1 = pd.Index([2, 3, 5, 8, 13])
fib2 = pd.Index([2, 3, 5, 8, 13])
prime = pd.Index([2, 3, 5, 7, 11])
fib1

In [None]:
fib1 == fib2

In [None]:
(fib1 == fib2).all()

In [None]:
fib1 == prime

In [None]:
(fib1 == prime).all()

## For float indices, no tolerance usually needed
- so straight equality comparison is ok

In [None]:
(pd.Index([1.0]) == pd.Index([1.0])).all()

In [None]:
(pd.Index([1.0]) == pd.Index([0.999999999])).all()

## There is `pandas.util.testing.assert_index_equal`
- checks for index names

In [None]:
from pandas.util.testing import assert_index_equal
assert_index_equal(pd.Index([1.0], name='Wes'), pd.Index([1.0], name='Wes'))

In [None]:
assert_index_equal(pd.Index([1.0], name='Jeff'), pd.Index([1.0], name='Wes'))

## Eniram is all about time indexed data

In [None]:
ti1 = pd.DatetimeIndex(['2016-10-13T00:00:00', '2016-10-13T00:00:30'])
ti2 = pd.DatetimeIndex(['2016-10-13T00:00:00', '2016-10-13T00:00:30'])
ti1

In [None]:
(ti1 == ti2).all()