In [1]:
import papermill as pm
import multiprocessing as mp
import os
import logging

logging.basicConfig(level=logging.WARNING)
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
logging.getLogger().setLevel(logging.WARNING)

import util

In [2]:
input_path = os.path.join(util.ANALYSIS_DIR, 'D3_gmm_template.ipynb')
output_dir = os.path.join(util.ANALYSIS_DIR, 'D3_gmm')

default_pdynmc_params = {
    'use.mc.diff': True, 'use.mc.lev': False, 'use.mc.nonlin': False,
    'include.y': True, 'varname.y': 'ENI', 'lagTerms.y': 2,
    'fur.con': True, 'fur.con.diff': True, 'fur.con.lev': False,
    'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'CTS', 'TI', 'KAOPEN'],
    'lagTerms.reg.fur': [0, 0, 0, 0, 0, 0, 0],
    'include.dum': True, 'dum.diff': True, 'dum.lev': False,
    'w.mat': 'iid.err', 'std.err': 'corrected', 
    'estimation': 'twostep', 'opt.meth': 'none',
    'inst.stata': False
}
backup_pdynmc_params = default_pdynmc_params.copy()

default_params = {
    'dataset_file': util.DATASET_SUBSET_FINAL,
    'index_fields': ['Country', 'Year'],
    'data_fields': ['ENI' ,'POP', 'URB', 'AFL' , 'TI', 'TS', 'CTS' ,'KAOPEN'],
    'prefix': 'template',
    'start_year': 1996,
    'end_year': 2015,
    'exclude_countries': [],
    'pdynmc_params': default_pdynmc_params
}
backup_params = default_params.copy()

extended_params = {
    **default_params,
    'dataset_file': util.DATASET_ALL_FINAL,
    'start_year': 1996,
    'end_year': 2015
}


# first iteration
analyses = {
    'default': {
        **default_params,
    },
}

    
# diff and sys GMM
analyses.update({
    'diffGMM_Rafiq2016_subset': {
        **default_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'TS'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0],
        }
    },
    'diffGMM_Rafiq2016-TI_subset': {
        **default_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'TS', 'TI'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0, 0],
        }
    },
    'diffGMM_Rafiq2016-TI-KAOPEN_subset': {
        **default_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'TS', 'TI', 'KAOPEN'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0, 0, 0],
        }
    },
    'diffGMM_Rafiq2016-CTS_subset': {
        **default_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'CTS'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0],
        }
    },
    'diffGMM_Rafiq2016-CTS_subset_lag1': {
        **default_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'lagTerms.y': 1,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'CTS'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0],
        }
    },
    'diffGMM_Rafiq2016-CTS_subset_lag2': {
        **default_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'lagTerms.y': 2,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'CTS'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0],
        }
    },
    'diffGMM_Rafiq2016-CTS-TI_subset': {
        **default_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'CTS', 'TI'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0, 0],
        }
    },
    'diffGMM_Rafiq2016-CTS-TI-KAOPEN_subset': {
        **default_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'CTS', 'TI', 'KAOPEN'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0, 0, 0],
        }
    },
    'diffGMM_Rafiq2016_all': {
        **extended_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'TS'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0],
        }
    },
    'diffGMM_Rafiq2016-TI_all': {
        **extended_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'TS', 'TI'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0, 0],
        }
    },
    'diffGMM_Rafiq2016-TI-KAOPEN_all': {
        **extended_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'TS', 'TI', 'KAOPEN'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0, 0, 0],
        }
    },
    'diffGMM_Rafiq2016-CTS_all': {
        **extended_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'CTS'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0],
        }
    },
    'diffGMM_Rafiq2016-CTS-TI_all': {
        **extended_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'CTS', 'TI'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0, 0],
        }
    },
    'diffGMM_Rafiq2016-CTS-TI-KAOPEN_all': {
        **extended_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'CTS', 'TI', 'KAOPEN'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0, 0, 0],
        }
    },
    'sysGMM_Rafiq2016_subset': {
        **default_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': True,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'TS'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0],
        }
    },    
    'sysGMM_Rafiq2016-CTS_subset': {
        **default_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': True,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'CTS'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0],
        }
    },
    'sysGMM_Rafiq2016-CTS-TI_subset': {
        **default_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': True,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'CTS', 'TI'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0, 0],
        }
    },
    'sysGMM_Rafiq2016-CTS-TI-KAOPEN_subset': {
        **default_params,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': True,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'CTS', 'TI', 'KAOPEN'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0, 0, 0],
        }
    },
})
# time stability tests
analyses.update({
    'diffGMM_full_model_all_{}_{}'.format(start, start+9): {
        **extended_params,
        'start_year': start,
        'end_year': start + 9,
        'pdynmc_params': {**default_pdynmc_params,
            'use.mc.lev': False,
            'varname.reg.fur': ['POP', 'AFL', 'AFL2', 'URB', 'CTS', 'TI', 'KAOPEN'],
            'lagTerms.reg.fur': [0, 0, 0, 0, 0, 0, 0],
        }
    }
    for start in range(1990, 2007)
})

assert backup_params == default_params
assert backup_pdynmc_params == default_pdynmc_params

In [3]:
existing = [x for x in os.listdir(output_dir) if x.endswith('.ipynb')]
not_regenerated = set(existing) - set(x + '.ipynb' for x in analyses.keys())
if not_regenerated:
    raise ValueError('The following files are not generated, consider cleaning them: \n'
                    + '\n'.join(not_regenerated))

In [4]:
refresh = True

for nb_name, params in analyses.items():
    output_file = nb_name + '.ipynb'
    output_path = os.path.join(output_dir, output_file)

    pickle_file = nb_name + '.pickle.bz2'
    pickle_path = os.path.join(output_dir, pickle_file)
    params['pickle_file'] = pickle_path

    if not refresh and os.path.exists(output_path):
        log.warning('Skipping ' + output_file)
        continue
    log.info('Executing ' + output_file)
    pm.execute.execute_notebook(
        engine_name='sos',
        input_path=input_path, 
        output_path=output_path,
        parameters=params,
        progress_bar=False,
    );