# Create table of 21 yr window warming years for selected models, members, scenarios

### Use the historical + tier 1 scenarios, only models that have all of these, and only one member per model

In [1]:
import pandas as pd
import numpy as np

In [2]:
# list of scenarios to use
# note that historical scenario won't show up in the table since historical years will be considered with each future scenario

chosen_scenarios = ['ssp126','ssp245','ssp370','ssp585']

In [3]:
# Load the full dataset of warming years based on the 21 year moving window approach
allwarmyrs = pd.read_csv('/home/abbylute/alute_bucket/warming_levels/data/warming_years_zarr_21yr_window.csv')
allwarmyrs

Unnamed: 0,model,member,scenario,Yr1.0,Yr1.5,Yr2.0,Yr2.5,Yr3.0,Yr3.5,Yr4.0,Yr4.5
0,GFDL-CM4,r1i1p1f1,ssp245,2015.0,2031.0,2050.0,2071.0,,,,
1,GFDL-CM4,r1i1p1f1,ssp585,2015.0,2030.0,2042.0,2051.0,2060.0,2070.0,2079.0,2087.0
2,GFDL-ESM4,r1i1p1f1,ssp119,2021.0,,,,,,,
3,GFDL-ESM4,r1i1p1f1,ssp126,2020.0,,,,,,,
4,GFDL-ESM4,r1i1p1f1,ssp245,2019.0,2046.0,2073.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
1293,ACCESS-CM2,r4i1p1f1,ssp585,2012.0,2026.0,2037.0,2047.0,2055.0,2062.0,2070.0,2076.0
1294,ACCESS-CM2,r5i1p1f1,ssp126,2012.0,2026.0,2046.0,,,,,
1295,ACCESS-CM2,r5i1p1f1,ssp245,2013.0,2027.0,2040.0,2052.0,2066.0,2088.0,,
1296,ACCESS-CM2,r5i1p1f1,ssp370,2014.0,2027.0,2037.0,2048.0,2059.0,2068.0,2077.0,2085.0


In [4]:
# Exclude future model/member/scenario combinations that aren't available for the full future period (2015-2099)

fltab = pd.read_csv('/home/abbylute/alute_bucket/warming_levels/data/model_tables/all_zarr_models_first_last_year.csv')
# these are the problem cases:
fltab = fltab.loc[(fltab['scenario'] != 'historical') & ((fltab['tas_first_year']>2015) | (fltab['tas_last_year']<2099))] # 60 x7 
fltab = fltab.iloc[:,[0,1,2]]

for i in range(fltab.shape[0]):
    mod = fltab['model'].iloc[i]
    mem = fltab['member'].iloc[i]
    scen = fltab['scenario'].iloc[i]
    tab1 = allwarmyrs.loc[(allwarmyrs['model']==mod) & (allwarmyrs['member']==mem) & (allwarmyrs['scenario']==scen)]
    if tab1.shape[0] > 0:
        # then remove it from the larger table
        allwarmyrs = allwarmyrs.drop(index = tab1.index)
    else:
        print(mod,mem,scen, ' not found in tab')
allwarmyrs

Unnamed: 0,model,member,scenario,Yr1.0,Yr1.5,Yr2.0,Yr2.5,Yr3.0,Yr3.5,Yr4.0,Yr4.5
0,GFDL-CM4,r1i1p1f1,ssp245,2015.0,2031.0,2050.0,2071.0,,,,
1,GFDL-CM4,r1i1p1f1,ssp585,2015.0,2030.0,2042.0,2051.0,2060.0,2070.0,2079.0,2087.0
2,GFDL-ESM4,r1i1p1f1,ssp119,2021.0,,,,,,,
3,GFDL-ESM4,r1i1p1f1,ssp126,2020.0,,,,,,,
4,GFDL-ESM4,r1i1p1f1,ssp245,2019.0,2046.0,2073.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
1293,ACCESS-CM2,r4i1p1f1,ssp585,2012.0,2026.0,2037.0,2047.0,2055.0,2062.0,2070.0,2076.0
1294,ACCESS-CM2,r5i1p1f1,ssp126,2012.0,2026.0,2046.0,,,,,
1295,ACCESS-CM2,r5i1p1f1,ssp245,2013.0,2027.0,2040.0,2052.0,2066.0,2088.0,,
1296,ACCESS-CM2,r5i1p1f1,ssp370,2014.0,2027.0,2037.0,2048.0,2059.0,2068.0,2077.0,2085.0


In [5]:
# restrict the table to only the chosen scenarios
allwarmyrs = allwarmyrs.loc[allwarmyrs['scenario'].isin(chosen_scenarios)]
allwarmyrs

Unnamed: 0,model,member,scenario,Yr1.0,Yr1.5,Yr2.0,Yr2.5,Yr3.0,Yr3.5,Yr4.0,Yr4.5
0,GFDL-CM4,r1i1p1f1,ssp245,2015.0,2031.0,2050.0,2071.0,,,,
1,GFDL-CM4,r1i1p1f1,ssp585,2015.0,2030.0,2042.0,2051.0,2060.0,2070.0,2079.0,2087.0
3,GFDL-ESM4,r1i1p1f1,ssp126,2020.0,,,,,,,
4,GFDL-ESM4,r1i1p1f1,ssp245,2019.0,2046.0,2073.0,,,,,
5,GFDL-ESM4,r1i1p1f1,ssp370,2022.0,2042.0,2058.0,2070.0,2083.0,,,
...,...,...,...,...,...,...,...,...,...,...,...
1293,ACCESS-CM2,r4i1p1f1,ssp585,2012.0,2026.0,2037.0,2047.0,2055.0,2062.0,2070.0,2076.0
1294,ACCESS-CM2,r5i1p1f1,ssp126,2012.0,2026.0,2046.0,,,,,
1295,ACCESS-CM2,r5i1p1f1,ssp245,2013.0,2027.0,2040.0,2052.0,2066.0,2088.0,,
1296,ACCESS-CM2,r5i1p1f1,ssp370,2014.0,2027.0,2037.0,2048.0,2059.0,2068.0,2077.0,2085.0


In [6]:
# Only keep models that have all of the chosen scenarios
mods = allwarmyrs['model'].drop_duplicates()

for m in mods:
    tab1 = allwarmyrs.loc[allwarmyrs['model']==m]
    if tab1['scenario'].drop_duplicates().shape[0] < len(chosen_scenarios):
        # if this model doesn't have all scenarios then remove it from the table
        allwarmyrs = allwarmyrs.drop(index = tab1.index)

allwarmyrs

Unnamed: 0,model,member,scenario,Yr1.0,Yr1.5,Yr2.0,Yr2.5,Yr3.0,Yr3.5,Yr4.0,Yr4.5
3,GFDL-ESM4,r1i1p1f1,ssp126,2020.0,,,,,,,
4,GFDL-ESM4,r1i1p1f1,ssp245,2019.0,2046.0,2073.0,,,,,
5,GFDL-ESM4,r1i1p1f1,ssp370,2022.0,2042.0,2058.0,2070.0,2083.0,,,
6,GFDL-ESM4,r1i1p1f1,ssp585,2021.0,2040.0,2053.0,2065.0,2076.0,2086.0,,
7,GFDL-ESM4,r3i1p1f1,ssp245,2010.0,2039.0,2064.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
1293,ACCESS-CM2,r4i1p1f1,ssp585,2012.0,2026.0,2037.0,2047.0,2055.0,2062.0,2070.0,2076.0
1294,ACCESS-CM2,r5i1p1f1,ssp126,2012.0,2026.0,2046.0,,,,,
1295,ACCESS-CM2,r5i1p1f1,ssp245,2013.0,2027.0,2040.0,2052.0,2066.0,2088.0,,
1296,ACCESS-CM2,r5i1p1f1,ssp370,2014.0,2027.0,2037.0,2048.0,2059.0,2068.0,2077.0,2085.0


In [7]:
# How many unique models does this leave?
allwarmyrs['model'].drop_duplicates().shape[0]

37

In [8]:
# For each model only keep one member
mods = allwarmyrs['model'].drop_duplicates()

for m in mods:
    #print(m)
    tab1 = allwarmyrs.loc[allwarmyrs['model']==m]
    
    mems = list(tab1['member'])
    rnums = [i.split('r')[-1].split('i')[0] for i in mems]
    pnums = [i.split('p')[-1].split('f')[0] for i in mems]
    fnums = [i.split('f')[-1] for i in mems]
    ipfnums = [i.split('i')[-1] for i in mems]

        
    #if r1i1p1f1 is available for all scenarios for a model use that member for that model, remove other member instances for that model
    mem1 = tab1.loc[tab1['member']=='r1i1p1f1']
    if mem1['scenario'].drop_duplicates().shape[0] == len(chosen_scenarios):
        # if this model has this member for all scenarios, then add it to the keep list
        if m == mods.iloc[0]:
            keep = mem1
        else:
            keep = pd.concat([keep,mem1])        
    elif all(x=='1p1f1' for x in ipfnums):
        # if all available members end in 'i1p1f1', then try to use the lowest realization (r) index that has all scenarios
        counts = dict()
        for i in rnums:
            counts[i] = counts.get(i, 0) + 1
        counts = {k:v for (k,v) in counts.items() if v==len(chosen_scenarios)}
        # identify the minimum r value
        min_rval = np.array([eval(i) for i in list(counts.keys())]).min()
        memtouse = 'r' + str(min_rval) + 'i1p1f1'
        mem1 = tab1.loc[tab1['member']==memtouse]
        if mem1['scenario'].drop_duplicates().shape[0] == len(chosen_scenarios):
            # if this model has this member for all scenarios, then add it to the keep list
            if m == mods.iloc[0]:
                keep = mem1
            else:
                keep = pd.concat([keep,mem1])
    elif all(x=='2' for x in fnums):
        # if all available members end in 'f2', then try to use r1i1p1f2
        #print('all f=2')
        mem1 = tab1.loc[tab1['member']=='r1i1p1f2']
        if mem1['scenario'].drop_duplicates().shape[0] == len(chosen_scenarios):
            # if this model has this member for all scenarios, then add it to the keep list
            if m == mods.iloc[0]:
                keep = mem1
            else:
                keep = pd.concat([keep,mem1])
    elif all(x=='2' for x in pnums):
        # if all available members have 'p2', then try to use r1i1p2f1
        mem1 = tab1.loc[tab1['member']=='r1i1p2f1']
        if mem1['scenario'].drop_duplicates().shape[0] == len(chosen_scenarios):
            # if this model has this member for all scenarios, then add it to the keep list
            if m == mods.iloc[0]:
                keep = mem1
            else:
                keep = pd.concat([keep,mem1])
    elif m == 'GISS-E2-1-G':
        # not clear which member would be best out of those that are available for all scenarios
        # more info here: https://data.giss.nasa.gov/modelE/cmip6/
        # Just pick one.
        mem1 = tab1.loc[tab1['member']=='r1i1p3f1']
        if mem1['scenario'].drop_duplicates().shape[0] == len(chosen_scenarios):
            # if this model has this member for all scenarios, then add it to the keep list
            if m == mods.iloc[0]:
                keep = mem1
            else:
                keep = pd.concat([keep,mem1])
    elif m == 'GISS-E2-1-H':
        # all members that are available for all scenarios have i1p1f2, so just pick r1i1p1f2
        mem1 = tab1.loc[tab1['member']=='r1i1p1f2']
        if mem1['scenario'].drop_duplicates().shape[0] == len(chosen_scenarios):
            # if this model has this member for all scenarios, then add it to the keep list
            if m == mods.iloc[0]:
                keep = mem1
            else:
                keep = pd.concat([keep,mem1])
    else:
        raise ValueError('previous approaches will not work for this model: ' + m)
keep    

Unnamed: 0,model,member,scenario,Yr1.0,Yr1.5,Yr2.0,Yr2.5,Yr3.0,Yr3.5,Yr4.0,Yr4.5
3,GFDL-ESM4,r1i1p1f1,ssp126,2020.0,,,,,,,
4,GFDL-ESM4,r1i1p1f1,ssp245,2019.0,2046.0,2073.0,,,,,
5,GFDL-ESM4,r1i1p1f1,ssp370,2022.0,2042.0,2058.0,2070.0,2083.0,,,
6,GFDL-ESM4,r1i1p1f1,ssp585,2021.0,2040.0,2053.0,2065.0,2076.0,2086.0,,
23,IPSL-CM6A-LR,r1i1p1f1,ssp126,2003.0,2019.0,2039.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
1162,CMCC-ESM2,r1i1p1f1,ssp585,2004.0,2029.0,2039.0,2048.0,2055.0,2063.0,2070.0,2079.0
1283,ACCESS-ESM1-5,r31i1p1f1,ssp126,2015.0,2028.0,,,,,,
1284,ACCESS-ESM1-5,r31i1p1f1,ssp245,2013.0,2029.0,2046.0,2064.0,,,,
1285,ACCESS-ESM1-5,r31i1p1f1,ssp370,2014.0,2030.0,2045.0,2060.0,2070.0,2082.0,,


In [9]:
# This should leave the same number of models as before
keep['model'].drop_duplicates().shape[0]

37

In [10]:
# save the dataframe
keep.to_csv('/home/abbylute/alute_bucket/warming_levels/data/warming_years_zarr_21yr_window_best_set.csv', index = False)