# Useful notebook to run to test that all figure creation works


This is not testing for software bugs, but for problems with the data that may result in 
series  of NaNs or infs, which may trigger an exception with matplotlib (typically
when there is no number in a data set to plot, and we set the axis labels to be NaN 
or inf).



What is better than running 'generate_countries.ipynb'?

- we don't produce html here, and the process is sligthly faster
- running 'generate_countries.ipynb' is a little bit indirect:
  - we create a notebook template, then execute the template to create html
  - in at least one instance, there was an error in the figure creation
    (reported by matplotlib) that did not result in an exception, i.e.
    that was only discovered by looking at one of the ~600 html files manually

In [None]:
import time
import matplotlib
matplotlib.use("Agg")


from coronavirus import overview, fetch_data_germany, fetch_deaths, fetch_cases

def get_germany_subregion_list():
    """returns list of subregions (Kreise), 
    ordered according to (i) Land, then (ii) Kreis
    """
    x = fetch_data_germany()
    land_kreis = x[['Bundesland', 'Landkreis']]
    ordered = land_kreis.sort_values(['Bundesland', 'Landkreis'])
    return list(ordered['Landkreis'].drop_duplicates())
 
def get_country_list():
    d, c = fetch_deaths(), fetch_cases()

    countries = d.index
    countries2 = c.index
    assert (countries2 == countries).all()
    
    # Here we should identify regions in countries, and process those.
    # Instead, as a quick hack to get started, we'll just take one country
    # and the current "get_country" method will sum over all regions of one country if only 
    # the country name is given.
    
    return sorted(countries.drop_duplicates())
    
    

In [None]:
def test_germany_overview():
    subregions = get_germany_subregion_list()

    # data cleaning: on 13 April, we had a Landkreis "LK Göttingen (alt)"
    # with only one data point. This causes plots to fail, because there
    # is nothing to plot, and then the legend() command failed.
    # We assume that the RKI labels unusual data with '(alt)', and remove those.

    alt_data_sets = [x for x in subregions if "(alt)" in x.lower()]
    if len(alt_data_sets) > 0:
        print(f"Removing datasets label with '(alt)': {alt_data_sets}")
        for alt in alt_data_sets:
            c, d = germany_get_region(landkreis=alt)
            print(f"  removed: {alt} : len(cases)={len(c)}, len(deaths)={len(d)}")
            # subregions = [x for x in subregions if not "(alt)" in x.lower()]
    
    start_time = time.time()
    for i, subregion in enumerate(subregions):
        print(f"Processing {i+1:3}/{len(subregions)} [{time.time()-start_time:4.0f}s] {subregion}")
        overview(country="Germany", subregion=subregion)
        matplotlib.pyplot.close('all')


In [None]:
def test_world_overview():
    countries = get_country_list()
    
    start_time = time.time()
    for i, country in enumerate(countries[0:190]):
        print(f"Processing {i+1:3}/{len(countries)} [{time.time()-start_time:4.0f}s] {country}")
        overview(country=country)
        matplotlib.pyplot.close('all')
        


In [None]:
test_world_overview()

In [None]:
test_germany_overview()