# Save warming year datasets

#### Create datasets of warming amount (relative to 1850-1900) for every possible model, member, scenario combination available on Google Cloud, for each warming year identified via a warming level and temperature tolerance approach, and for each lat/lon point on a common grid. This results in one dataset per model/member/scenario combination. From these datasets, also create aggregated datasets that are the average across years and members (resulting in one file for each model and scenario).

In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import os.path
from warming_years import calc_warming_years_temperature_window, get_cmip6_data
from warming_years import get_cmip6_data_at_warming_years
from file_control import data_dir, gmst_table_dir, model_table_dir
from IPython.display import clear_output

In [2]:
target_wl = 2
temp_tol = 0.25
cmip6_variable = 'tas' # only works for tas right now due to temporal averaging
base_start = '1850-01'
base_end = '1900-12'
savedir = os.path.join(data_dir, 'zarr_' + str(target_wl) + 'C_warming_data')

In [3]:
# import gmst table
tab = pd.read_csv(os.path.join(gmst_table_dir, 'CMIP6_GMST_table_all.csv'))

# create a df of unique model, member, and scenario combinations to loop over
mms = tab[['model','member','scenario']].drop_duplicates()

# remove historical cases since those will be handled in addition to each 
# scenario, not as a separate scenario
mms = mms.loc[mms['scenario'] != 'historical']


In [4]:
# create new common grid to interpolate to
newlats = np.arange(-90, 90.01, .5)
newlons = np.arange(-180, 180, .5)

In [5]:
for i in range(mms.shape[0]):
    print('running file ' + str(i+1) + ' of ' + str(mms.shape[0]), end = '\r')
    model = mms['model'].iloc[i]
    member = mms['member'].iloc[i]
    scenario = mms['scenario'].iloc[i]

    wltable = calc_warming_years_temperature_window(mms.iloc[i:(i+1)], 
                                                    target_wl, 
                                                    temp_tol,
                                                    3000).dropna()
    
    yrs = wltable['warming_year'].to_numpy()
    
    # if warming years are available for this model/member/scenario, then create
    # dataset. Otherwise go to the next one.
    if len(yrs) > 0:
        # get base period data
        base = get_cmip6_data(model, member, 'historical', cmip6_variable, 
                              base_start, base_end, None)
        # aggregate to annual average
        month_length = base.time.dt.days_in_month
        weights = (month_length.groupby("time.year") / month_length.groupby(
            "time.year").sum())
        base[cmip6_variable] = (base[cmip6_variable] * weights).groupby(
            "time.year").sum(dim="time").mean('year')
        base = base.drop('time')
        # get rid of nans
        base[cmip6_variable] = base[cmip6_variable].interpolate_na(dim='lon')

        # get warming year data
        zz = get_cmip6_data_at_warming_years(model, member, scenario, 
                                             cmip6_variable, yrs, year_window=0,
                                             outfilename=None)
        # get rid of nans
        zz[cmip6_variable] = zz[cmip6_variable].interpolate_na(dim='lon')
                
        # reset base lon and lat values so they are the same as the future 
        # dataset, so that we avoid horizontal bands of nans
        base['lat'] = zz['lat']
        base['lon'] = zz['lon']
            
        # calculate warming
        # need to remove scenario info from base so that it's compatible with zz
        zz['warming'] = zz[cmip6_variable] - base[cmip6_variable].isel(
            scenario=0) 

        # interpolate to common spatial grid
        zz = zz.interp(lon=newlons, lat=newlats, method="linear")
            
        # clean up and prepare to concat
        zz = zz.drop([cmip6_variable])
           
        # save the dataset
        zz.to_netcdf(os.path.join(savedir, 'model_member_scenario_years',
                                  model + '_' + member + '_' + scenario + '_' + 
                                  str(target_wl) + 'C_all_years.nc'))

clear_output()

In [6]:
## Create aggregated files from those above that average across years, then 
# across members so that there is a file for each model and scenario

# create warming years table and remove model/member/scenario combinations for 
# which warming years were not available because the the target warming level 
# was never reached
wltable = calc_warming_years_temperature_window(mms, target_wl, temp_tol, 3000
                                               ).dropna()

# make table of all combinations of model and scenario that are available
ms = wltable[['model','scenario']].drop_duplicates()

# open table of first and last available years
flyear = pd.read_csv(os.path.join(model_table_dir, 
                                  'all_zarr_models_first_last_year.csv'))

for i in range(ms.shape[0]):    
    m = ms['model'].iloc[i]
    s = ms['scenario'].iloc[i]
    print(m,s, end = '\r')
    xx = xr.open_mfdataset(os.path.join(savedir, 'model_member_scenario_years',
                                        m + '_*' + s + '*.nc'))
    
    # remove any members that don't have data for the full 21st century (2015 
    # through 2099)
    thisflyear = flyear.loc[(flyear['model'] == m ) & 
                            (flyear['scenario'] == s) & 
                            (flyear['member'].isin(xx['member'].values))]
    toremove = thisflyear.loc[(thisflyear['tas_first_year']>2015) | 
                              (thisflyear['tas_last_year']<2099)]['member']
    
    # if all members have incomplete timeseries, then don't save the dataset
    if len(xx['member'].values) == len(toremove): 
        print(m + ' ' + s + 
              ' was not saved because all members had incomplete timeseries')
    else:
        xx = xx.drop_sel(member = toremove)  
    
        # pick only years <= 2100
        xx = xx.where(xx.year <= 2100, drop=True)
    
        # aggregate across years and members
        xm = xx.median('year').mean('member')
    
        # get rid of unnecesary variables
        xm = xm.drop(['areacella'], errors='ignore')

        # save the file
        xm.to_netcdf(os.path.join(savedir, 'model_scenario', 
                                  m + '_' + s + '_' + str(target_wl) + 'C.nc'))
        
clear_output()