# Find the branch points from piControl

Each model seems to have a different convention for doing this. Sometimes they are given as notional dates in the piControl run.

For safety, do each experiment differently?

In [None]:
import json
import pandas as pd
import numpy as np
import glob

In [None]:
models = [
    'ACCESS-CM2',
    'ACCESS-ESM1-5',
    'CAMS-CSM1-0',
    'CanESM5',
    'CESM2',
    'CESM2-FV2',
    'CESM2-WACCM',
    'CESM2-WACCM-FV2',
    'CIESM',
    'CMCC-CM2-SR5',
#    'CMCC-ESM2',
    'CNRM-CM6-1',
    'CNRM-ESM2-1',
    'EC-Earth3',
    'EC-Earth3-AerChem',
    'FGOALS-g3',
    'GFDL-CM4',
    'GFDL-ESM4',
    'GISS-E2-1-G',
    'HadGEM3-GC31-LL',
    'HadGEM3-GC31-MM',
#    'IPSL-CM5A2-INCA',
#    'KACE-1-0-G',
    'IPSL-CM6A-LR',
    'MIROC6',
    'MIROC-ES2L',
    'MPI-ESM-1-2-HAM',
    'MPI-ESM1-2-HR',
    'MRI-ESM2-0',
    'NorESM2-LM',
    'NorESM2-MM',
    'TaiESM1',
    'UKESM1-0-LL',
]


piControls = {
    'ACCESS-CM2': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'CAMS-CSM1-0': 'r1i1p1f1',
    'CanESM5': 'r1i1p1f1',
    'CESM2': 'r1i1p1f1',
    'CESM2-FV2': 'r1i1p1f1',
    'CESM2-WACCM': 'r1i1p1f1',
    'CESM2-WACCM-FV2': 'r1i1p1f1',
    'CIESM': 'r1i1p1f1',
    'CMCC-CM2-SR5': 'r1i1p1f1',
    'CMCC-ESM2': 'r1i1p1f1',
    'CNRM-CM6-1': 'r1i1p1f2',
    'CNRM-ESM2-1': 'r1i1p1f2',
    'E3SM-1-0': 'r1i1p1f1',
    'EC-Earth3': 'r1i1p1f1',
    'EC-Earth3-AerChem': 'r1i1p1f1',
    'FGOALS-g3': 'r1i1p1f1',
    'GFDL-CM4': 'r1i1p1f1',
    'GFDL-ESM4': 'r1i1p1f1',
    'GISS-E2-1-G': 'r1i1p1f2',
    'HadGEM3-GC31-LL': 'r1i1p1f1',
    'HadGEM3-GC31-MM': 'r1i1p1f1',
    'IPSL-CM5A2-INCA': 'r1i1p1f1',
    'IPSL-CM6A-LR': 'r1i1p1f1',
    'KACE-1-0-G': 'r1i1p1f1',
    'MIROC6': 'r1i1p1f1',
    'MIROC-ES2L': 'r1i1p1f2',
    'MPI-ESM-1-2-HAM': 'r1i1p1f1',
    'MPI-ESM1-2-HR': 'r1i1p1f1',
    'MRI-ESM2-0': 'r1i1p1f1',
    'NorESM2-LM': 'r1i1p1f1',
    'NorESM2-MM': 'r1i1p1f1',
    'TaiESM1': 'r1i1p1f1',
    'UKESM1-0-LL': 'r1i1p1f2',
}

ipf = {
    'ACCESS-CM2': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'CAMS-CSM1-0': 'r1i1p1f1',
    'CanESM5': 'r1i1p1f1',
    'CESM2': 'r1i1p1f1',
    'CESM2-FV2': 'r1i1p1f1',
    'CESM2-WACCM': 'r1i1p1f1',
    'CESM2-WACCM-FV2': 'r1i1p1f1',
    'CIESM': 'r1i1p1f1',
    'CMCC-CM2-SR5': 'r1i1p1f1',
    'CMCC-ESM2': 'r1i1p1f1',
    'CNRM-CM6-1': 'r1i1p1f2',
    'CNRM-ESM2-1': 'r1i1p1f2',
    'E3SM-1-0': 'r1i1p1f1',
    'EC-Earth3': 'r1i1p1f1',
    'EC-Earth3-AerChem': 'r1i1p1f1',
    'FGOALS-g3': 'r1i1p1f1',
    'GFDL-CM4': 'r1i1p1f1',
    'GFDL-ESM4': 'r1i1p1f1',
    'GISS-E2-1-G': 'r1i1p1f2',
    'HadGEM3-GC31-LL': 'r1i1p1f3',
    'HadGEM3-GC31-MM': 'r1i1p1f3',
    'IPSL-CM6A-LR': 'r1i1p1f1',
    'KACE-1-0-G': 'r1i1p1f1',
    'MIROC6': 'r1i1p1f1',
    'MIROC-ES2L': 'r1i1p1f2',
    'MPI-ESM-1-2-HAM': 'r1i1p1f1',
    'MPI-ESM1-2-HR': 'r1i1p1f1',
    'MRI-ESM2-0': 'r1i1p1f1',
    'NorESM2-LM': 'r1i1p1f1',
    'NorESM2-MM': 'r1i1p1f1',
    'TaiESM1': 'r1i1p1f1',
    'UKESM1-0-LL': 'r1i1p1f2',
}

calendars = {
    'ACCESS-CM2': 365.2425,
    'ACCESS-ESM1-5': 365.2425,
    'CAMS-CSM1-0': 365,
    'CanESM5': 365,
    'CESM2': 365,
    'CESM2-FV2': 365,
    'CESM2-WACCM': 365,
    'CESM2-WACCM-FV2': 365,
    'CIESM': 365,
    'CMCC-CM2-SR5': 365,
    'CMCC-ESM2': 365,
    'CNRM-CM6-1': 365.2425,
    'CNRM-ESM2-1': 365.2425,
    'E3SM': 365,
    'EC-Earth3': 365.2425,
    'EC-Earth3-AerChem': 365.2425,
    'FGOALS-g3': 365,
    'GFDL-CM4': 365,
    'GFDL-ESM4': 365,
    'GISS-E2-1-G': 365,
    'HadGEM3-GC31-LL': 360,
    'HadGEM3-GC31-MM': 360,
    #'IPSL-CM5A2-INCA': 365, CHECK
    'IPSL-CM6A-LR': 365.2425,
    'KACE-1-0-G': 360,
    'MIROC6': 365.2425,
    'MIROC-ES2L': 365.2425,
    'MPI-ESM-1-2-HAM': 365.2425,
    'MPI-ESM1-2-HR': 365.2425,
    'MRI-ESM2-0': 365.2425,
    'NorESM2-LM': 365,
    'NorESM2-MM': 365,
    'TaiESM1': 365,
    'UKESM1-0-LL': 360,
}

In [None]:
def find_branch_index(meta, calendar, dataframe):
    """return the row of the piControl dataframe to compare against
    
    meta: model metadata json
    calendar: name of the calendar of the model (str) or days per year (int/float)
    dataframe: the piControl dataframe
    
    returns: int of index number to start from
    """
    btip = meta['branch_time_in_parent']
    ptu = meta['parent_time_units']
    ptu_split = ptu.split()
    unit = ptu_split[0]
    offset = int(ptu_split[2][:4])
    if unit == 'days':
        divisor = calendar
    elif unit == 'years':
        divisor = 1
    branch_time = '%04d' % (np.round(meta['branch_time_in_parent'] / divisor) + offset)
    istart = piControl.index[piControl['time'].str.startswith(branch_time)].tolist()
    if len(istart) != 1:
        result = np.nan
    elif (istart[0] + 165) > len(piControl):
        result = np.nan
    else:
        result = istart[0]
    return result

In [None]:
results = {}

for experiment in ['historical', 'hist-nat', 'hist-GHG', 'hist-aer', 'historical-cmip5', 'hist-nat-cmip5', 'hist-GHG-cmip5', 'hist-aer-cmip5']:
    results[experiment] = {}
    for model in models:
        if experiment[-5:]=='cmip5':
            if model not in ['CanESM5']:
                continue
            piControl = pd.read_csv('../data_output/cmip6/%s/%s/piControl-cmip5.csv' % (model, piControls[model]))
        else:
            piControl = pd.read_csv('../data_output/cmip6/%s/%s/piControl.csv' % (model, piControls[model]))
        runlist_files = glob.glob('../data_output/cmip6/%s/r*%s/meta_%s.json' % (model, ipf[model][-6:], experiment))
        if len(runlist_files) == 0:
            # no results available for this model; skip
            continue
        results[experiment][model] = {}
        for file in runlist_files:
            run = file.split('/')[4]
            with open(file, 'r') as f:
                meta = json.load(f)
            # list of exceptions starts here
            if model in ['GISS-E2-1-G'] and experiment in ['hist-nat'] and run[-2:]=='f2':
                meta['parent_time_units'] = 'days since 7550-1-1'  # they appear to use the f1 control rather than f2
            if model in ['CAMS-CSM1-0']:
                meta['parent_time_units'] = 'years since 0000-01-01'   # this is my best guess based on the metadata
            if model in ['FGOALS-g3']:
                meta['branch_time_in_parent'] = 134685  # I don't know why the DAMIP experiements are different, and furthermore I don't know why they don't start at the same point as historical!
            if model in ['GFDL-CM4']:
                meta['parent_time_units'] = 'days since 0151-01-01'   # assumed from section 5 of https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2019MS001829 where the first date of piControl is 0151
            if model in ['CIESM']:
                meta['branch_time_in_parent'] = meta['branch_time_in_parent'] - 182500
                # branch time in parent counted from the start of the 500-year spin up: assumed from https://doi.org/10.1029/2019MS002036
            if model in ['EC-Earth3']:
                if isinstance(meta['branch_time_in_parent'], str):
                    meta['branch_time_in_parent'] = float(meta['branch_time_in_parent'].split('D')[0])
            #print(model, meta['branch_time_in_parent'], meta['parent_time_units'])
            result = find_branch_index(meta, calendars[model], piControl)
            if not np.isnan(result):
                results[experiment][model][run] = result

In [None]:
results['historical-cmip5']

In [None]:
with open('../data_output/branch_points.json', 'w') as f:
    json.dump(results, f, indent=4)

In [None]:
results['historical']