In [54]:
import papermill as pm
import multiprocessing as mp
import os
import logging

logging.basicConfig(level=logging.WARNING)
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)

import util

In [55]:
input_path = 'D3_gmm_template.ipynb'
output_dir = 'D3_gmm'

defaults = {
    'dataset_file': util.DATASET_SUBSET_FINAL,
    'index_fields': ['Country', 'Year'],
    'data_fields': ['ENI' ,'POP', 'URB', 'AFL' , 'TI', 'CTS' ,'KAOPEN'],
    'prefix': 'template',
    'start_year': 1995,
    'end_year': 2015,
    'exclude_countries': [],
}

analyses = {
# first iteration
    'default.ipynb': {
        **defaults,
    },
    'excluding_DZA_VEN.ipynb': {
        **defaults,
        'exclude_countries': ['DZA', 'VEN'],
    },
}
for new_start in range(1990, 2011):
    analyses['reduced_{}_{}.ipynb'.format(new_start, defaults['end_year'])] = {
        **defaults,
        'start_year': new_start,
    }


In [56]:
existing = [x for x in os.listdir(output_dir) if x.endswith('.ipynb')]
not_regenerated = set(existing) - set(analyses.keys())
if not_regenerated:
    raise ValueError('The following files are not generated, consider cleaning them: \n'
                    + '\n'.join(not_regenerated))

In [57]:
refresh = True

for output_file, params in analyses.items():
    output_path = os.path.join(output_dir, output_file)
    if not refresh and os.path.exists(output_path):
        log.warning('Skipping ' + output_file)
        continue
    log.info('Executing ' + output_file)
    try:
        pm.execute.execute_notebook(
            engine_name='sos',
            input_path=input_path, 
            output_path=output_path,
            parameters=params,
            progress_bar=False,
        );
    except pm.PapermillExecutionError:
        log.error('Error excuting ' + output_file)
        continue