# Calculate time series of SIA, SIE and SIV from 6 CLIVAR LE models

### Author: Chris Wyburn-Powell, [github](https://github.com/chrisrwp/synthetic-ensemble/SIA/SIC_to_SIA_models.ipynb)

**Input**: <br>
Sea ice concentration (SIC) and thickness (SIT) outputs for historical and RCP 8.5 for CLIVAR Large Ensemble Archive models: <br>
File locations: `/glade/collections/cdg/data/CLIVAR_LE`  ([doi: 10.1038/s41558-020-0731-2](https://doi.org/10.1038/s41558-020-0731-2))
- CanESM2
- CESM1
- CSIRO MK3.6
- GFDL CM3
- GFDL ESM2M
- MPI ESM1

**Output**: <br>
Single files for all 6 models with time series of sea ice area, sea ice extent, sea ice thickness. <br>
All models have the same mid-month date converted to `numpy.datetime64`
- `CLIVAR_SIA_1850_2100_hist_RCP85.nc`
- `CLIVAR_SIE_1850_2100_hist_RCP85.nc`
- `CLIVAR_SIT_1850_2100_hist_RCP85.nc`
- `CLIVAR_SIV_1850_2100_hist_RCP85.nc`

**Method**: <br>
- SIA is calculated by multiplying SIC by the areacello file
- SIE is calculated by setting all values of SIA where SIC<15% to 0%and >15% to 100%
- SIT is calculated by dividing SIV by SIA
- SIV is calculated by multiplying SIA by SIT

**N.B.** <br>
- GFDL CM3 has extremely high thickness values in the Davis Strait, this causes thickness and volume distortions which become most apparent when sea ice area drops near zero.

In [1]:
import numpy as np
import xarray as xr
import datetime

In [2]:
data_path = '/glade/scratch/cwpowell/Synthetic_ensemble/'

model_names  = ['CanESM2', 'CESM1', 'CSIRO_MK36', 'GFDL_CM3', 'GFDL_ESM2M', 'MPI_ESM1']
mem_len      = [50,        40,      30,           20,         30,           100       ]
model_starts = [1950,      1920,    1850,         1920,       1950,         1850      ]

## Make the Areacello files for CanESM2 and CSIRO MK3.6 by using the areacella and sftlf files

In [3]:
#make areacello file for CanESM2 - N.B. sea ice uses atmospheric grid, create areacello file from areacella and sftlf files
# CanESM2_areacella = xr.open_dataset(data_path+'Raw_data/areacello/areacella_fx_CanESM2_piControl_r0i0p0.nc')
# CanESM2_sftlf = xr.open_dataset(data_path+'Raw_data/areacello/sftlf_fx_CanESM2_historical_r0i0p0.nc')

# CanESM2_areacello = CanESM2_areacella.where(CanESM2_sftlf['sftlf']==0)
# CanESM2_areacello = CanESM2_areacello.rename(name_dict={'areacella':'areacello'})
# CanESM2_areacello.to_netcdf(data_path+'Raw_data/areacello/areacello_CanESM2.nc')

# #make areacello file for CSIRO MK3.6 from areacella and sftlf
# CSIRO_areacella = xr.open_dataset(data_path+'Raw_data/areacello/areacella_fx_CSIRO-Mk3-6-0_piControl_r0i0p0.nc')
# CSIRO_sftlf = xr.open_dataset(data_path+'Raw_data/areacello/sftlf_fx_CSIRO-Mk3-6-0_1pctCO2_r0i0p0.nc')

# CSIRO_areacello = CSIRO_areacella.where(CSIRO_sftlf['sftlf']==0)
# CSIRO_areacello = CSIRO_areacello.rename(name_dict={'areacella':'areacello'})
# CSIRO_areacello.to_netcdf(data_path+'Raw_data/areacello/areacello_CSIRO_MK36.nc')

# #make areacello GFDL CM3 from areacella and SIC for one member
# GFDL_CM3_areacella = xr.open_dataset(data_path+'Raw_data/areacello/areacella_GFDL_CM3.nc')
# GFDL_CM3_SIC = xr.open_dataset(data_path+'Raw_data/areacello/GFDL_CM3/GFDL_CM3_1979_03_mem1.nc')
# GFDL_CM3_areacello = GFDL_CM3_areacella['areacello'].where(GFDL_CM3_SIC['sic']>-1).isel(time=0)
# GFDL_CM3_areacello.to_netcdf(data_path+'Raw_data/areacello/areacello_GFDL_CM3.nc')

## Define a function to load a specific member file from the CLIVAR LE archive for SIC or SIT

In [3]:
def load_member(model, i, sic_sit, chunk_size=False, historical=False):
    '''
    Open a single member file of either sea ice concentration or thickness from the CLIVAR LE archive using xarray.open_dataset
    
    Parameters
    ----------
    model : string,
        Choose from ['CanESM2', 'CESM1', 'GFDL_CM3', GFDL_ESM2M', 'CSIRO_MK36', 'MPI_ESM1']
    i : integer,
        Member number e.g. 1
    sic_sit : string,
        Variable concentration or thickness, choose from ['sic', 'sit']
    chunk_size : integer, optional
        Choose an int e.g. 50 to use dask chunks to open the data, defaults to not use dask
    historical : boolean
        Only use for MPI_ESM1 to specify the time period required, defaults to RCP85 time period
    decode_bool : boolean
        Only use for GFDL_CM3 SIT member 1
    
    Returns
    ----------
        xarray.DataSet object from the CLIVAR LE archive sea ice output
    '''  
    
    base_path = '/glade/collections/cdg/data/CLIVAR_LE/'
    
    assert sic_sit in ['sic', 'sit'], 'invalid variable name'
    
    if np.logical_or(model=='GFDL_ESM2M', np.logical_and(model=='GFDL_CM3', sic_sit=='sit')): #time is not recognized by xarray
        decode_bool = False
    else:
        decode_bool = True
        
    
    ############### generate the file path ###############
    if model == 'CanESM2':
        path = base_path+'canesm2_lens/OImon/{}/{}_OImon_CanESM2_historical_rcp85_r{}i1p1_195001-210012.nc'.format(sic_sit, sic_sit, i)
        
    elif model == 'CESM1':
        if i == 1:
            path = base_path+'cesm_lens/OImon/{}/{}_OImon_CESM1-CAM5_historical_rcp85_r1i1p1_185001-210012.nc'.format(sic_sit, sic_sit, i)
        else:
            path = base_path+'cesm_lens/OImon/{}/{}_OImon_CESM1-CAM5_historical_rcp85_r{}i1p1_192001-210012.nc'.format(sic_sit, sic_sit, i)
            
    elif model == 'GFDL_ESM2M':
        path = base_path+'gfdl_esm2m_lens/OImon/{}/{}_OImon_GFDL-ESM2M_historical_rcp85_r{}i1p1_195001-210012.nc'.format(sic_sit, sic_sit, i)
    
    elif model == 'GFDL_CM3':
        path = base_path+'gfdl_cm3_lens/OImon/{}/{}_OImon_GFDL-CM3_historical_rcp85_r{}i1p1_192001-210012.nc'.format(sic_sit, sic_sit, i)
        
    elif model == 'CSIRO_MK36':
        path = base_path+'csiro_mk36_lens/OImon/{}/{}_OImon_CSIRO-Mk3-6-0_historical_rcp85_r{}i1p1_185001-210012.nc'.format(sic_sit, sic_sit, i)
        
    elif model == 'MPI_ESM1':
        period = [['historical', 'rcp85'], ['1850p3_185001-200512', '2005p3_200601-209912']]
        
        if historical: #2005-12 or previous
            path = base_path+'mpi_lens/OImon/{}/{}_OImon_MPI-ESM_{}_r{}i{}.nc'.format(sic_sit, sic_sit, str(period[0][0]), str(i).zfill(3), str(period[1][0]))
        else:
            path = base_path+'mpi_lens/OImon/{}/{}_OImon_MPI-ESM_{}_r{}i{}.nc'.format(sic_sit, sic_sit, str(period[0][1]), str(i).zfill(3), str(period[1][1]))
    else:
        print('invalid model name')
       
    ############### use the file path to open the NetCDF file using xarray ###############
    if chunk_size:
        data = xr.open_dataset(path, chunks={'time':(chunk_size)}, decode_times=decode_bool)
    else:
        data = xr.open_dataset(path, decode_times=decode_bool)
            
    return(data)

# Calculate SIA, SIE and SIV
## Firstly, make a new time dimension for consistency and avoding incompatability of cftime

In [4]:
#make numpy.datetime64 xarray.dataarray to apply to all datasets, use CSIRO_MK36 as the base as starts in 1850
#this avoids different definitions of mid-month e.g. with CESM1, 
#also the discontinuity between GFDL CM3 members 1-8 and 9-20 where 1-8 go 2005-12-16, 2006-02-21 and 9-20 go 2005-12-16, 2006-01-16...
       
xr_time = load_member('CSIRO_MK36', 1, 'sic')

new_time = []
for i in (xr_time['time']): #i is each date, we will then use the charachters within each date to change to np.datetime64
    new_time.append(np.datetime64(str(i.values)[:4]+'-'+str(i.values)[5:7].zfill(2)+'-'+str(i.values)[8:10]+'T'+str(i.values)[11:13].zfill(2)+':00:00.000'))

xr_time['time'] = new_time
xr_new_time = xr_time['time']

## Loop through all the models and members to calculate SIA, SIE, SIT, SIV

In [9]:
SIA_all_models = {} #initialize dictionaries to store all xarray dataarrays for each model
SIE_all_models = {}
SIT_all_models = {}
SIV_all_models = {}

####################################### loop through all models #######################################
for model_i, model_name in enumerate(model_names):
    print(datetime.datetime.now(), model_name)

    SIA_all_mem = [] #initialize lists to store all xarray dataarrays for each member
    SIE_all_mem = []
    SIT_all_mem = []
    SIV_all_mem = []

    #define the coordinates along which to sum or select latitude
    if model_name in ['CESM1', 'MPI_ESM1']:
        lat_lon = ['j', 'i']
        lat_select = 'lat'
    elif model_name == 'GFDL_CM3':
        lat_lon = ['rlat', 'rlon']
        lat_select = 'rlat'
    else:
        lat_lon = ['lat', 'lon']
        lat_select = 'lat'

    #load the areacello file and drop latitudes below 30N
    areacello = xr.open_dataset(data_path+'Raw_data/areacello/areacello_{}.nc'.format(model_name))

    if model_name == 'MPI_ESM1':
        areacello = areacello['areacello'].where(areacello['latitude']>30, drop=True)
    else:
        areacello = areacello['areacello'].where(areacello[lat_select]>30, drop=True)

    ####################################### loop through all the members of the model #######################################
    for mem in np.arange(1, mem_len[model_i]+1):
        if mem % 10 == 0:
            print(datetime.datetime.now(), mem)

        ##################### load the data files #####################
        #load the SIC and SIT data
        if model_name == 'MPI_ESM1': #historical and RCP85 data are stored in separate files
            SIC = xr.concat((load_member(model_name, mem, 'sic', historical=True), load_member(model_name, mem, 'sic', historical=False)), dim='time')
            SIT = xr.concat((load_member(model_name, mem, 'sit', historical=True), load_member(model_name, mem, 'sit', historical=False)), dim='time')
        else:
            SIC = load_member(model_name, mem, 'sic')
            SIT = load_member(model_name, mem, 'sit')


        #select the relevant variable above 30N, and multiply SIC by 100 for GFDL ESM2M to get into % like other models
        if model_name == 'GFDL_ESM2M':
            SIC = (SIC['sic']*100).where(SIC[lat_lon[0]]>30, drop=True)
            SIT = SIT['sit'].where(SIT[lat_lon[0]]>30, drop=True)

        elif model_name in ['CESM1', 'MPI_ESM1']:
            SIC = SIC['sic'].where(SIC[lat_select]>30, drop=True)
            SIT = SIT['sit'].where(SIT[lat_select]>30, drop=True)       

        else:
            SIC = SIC['sic'].where(SIC[lat_lon[0]]>30, drop=True)
            SIT = SIT['sit'].where(SIT[lat_lon[0]]>30, drop=True)


        #replace time with the new_time for consistency across models, and to stop GFDL CM3 discontinuity in dates
        if np.logical_and(model_name == 'CESM1', mem==1): #CESM1 member 1 starts in 1850, rest start in 1920
            SIC['time'] = xr_new_time.sel(time=slice('1850', '2100'))
            SIT['time'] = xr_new_time.sel(time=slice('1850', '2100'))
        elif model_name == 'MPI_ESM1':
            SIC['time'] = xr_new_time.sel(time=slice('1850', '2099'))
            SIT['time'] = xr_new_time.sel(time=slice('1850', '2099'))
        else:
            SIC['time'] = xr_new_time.sel(time=slice(str(model_starts[model_i]), '2100'))
            SIT['time'] = xr_new_time.sel(time=slice(str(model_starts[model_i]), '2100'))


        ##################### compute SIA, SIE, and SIV #####################      
        #only do the calculation where there is sea ice, CSIRO MK36 has small non-zero values in extra-tropics
        if model_name == 'CSIRO_MK36':
            SIC = SIC.where(SIC>3.368e-3)
            SIT = SIT.where(SIT>3.386e-5)
        else:
            SIC = SIC.where(SIC>0)
            SIT = SIT.where(SIT>0)
        
        #compute SIA
        SIA = (SIC * areacello.values).sum(lat_lon[0]).sum(lat_lon[1])
        SIA_all_mem.append(SIA)

        #compute SIE
        SIC_15 = xr.ufuncs.logical_not(xr.ufuncs.isnan(SIC.where(SIC>=15)))*100
        SIE_all_mem.append((SIC_15 * areacello.values).sum(lat_lon[0]).sum(lat_lon[1]))
        
        #commpute SIV
        SIV = (SIC * SIT * areacello.values).sum(lat_lon[0]).sum(lat_lon[1])
        SIV_all_mem.append(SIV)
        
        #compute SIT from SIV and SIE
        SIT_all_mem.append(SIV / SIA)
        
        
    ##################### append all member calculations to the dictionary of al models #####################      
    SIA_all_models[model_name] = xr.concat((SIA_all_mem), dim='member')
    SIA_all_models[model_name]['member'] = np.arange(1, mem_len[model_i]+1)
    
    SIE_all_models[model_name] = xr.concat((SIE_all_mem), dim='member')
    SIE_all_models[model_name]['member'] = np.arange(1, mem_len[model_i]+1)
    
    SIT_all_models[model_name] = xr.concat((SIT_all_mem), dim='member')
    SIT_all_models[model_name]['member'] = np.arange(1, mem_len[model_i]+1)
    
    SIV_all_models[model_name] = xr.concat((SIV_all_mem), dim='member')
    SIV_all_models[model_name]['member'] = np.arange(1, mem_len[model_i]+1)

2021-10-03 17:32:38.238008 CanESM2
2021-10-03 17:32:43.685046 10
2021-10-03 17:32:50.052096 20
2021-10-03 17:32:56.354257 30
2021-10-03 17:33:02.632149 40
2021-10-03 17:33:08.836918 50
2021-10-03 17:33:09.500178 CESM1
2021-10-03 17:34:46.576227 10
2021-10-03 17:36:25.975628 20
2021-10-03 17:38:03.394638 30
2021-10-03 17:39:49.428476 40
2021-10-03 17:39:59.760064 CSIRO_MK36
2021-10-03 17:40:27.710416 10
2021-10-03 17:41:05.988331 20
2021-10-03 17:41:38.135664 30
2021-10-03 17:41:41.270702 GFDL_CM3
2021-10-03 17:42:26.301719 10
2021-10-03 17:43:15.786575 20
2021-10-03 17:43:20.521584 GFDL_ESM2M
2021-10-03 17:44:21.561984 10
2021-10-03 17:45:29.285958 20
2021-10-03 17:46:35.701100 30
2021-10-03 17:46:42.354862 MPI_ESM1
2021-10-03 17:50:48.081357 10
2021-10-03 17:55:10.120332 20
2021-10-03 17:59:40.556197 30
2021-10-03 18:04:07.189449 40
2021-10-03 18:08:32.596581 50
2021-10-03 18:12:49.240242 60
2021-10-03 18:17:18.835121 70
2021-10-03 18:21:43.471767 80
2021-10-03 18:26:07.605369 90
2021

## Save the calculations to NetCDF

In [36]:
SIA_ds = xr.Dataset(SIA_all_models)/1e14 #divide by 1e14 to get from square m to million square km
SIE_ds = xr.Dataset(SIE_all_models)/1e14 #divide by 1e14 to get from square m to million square km
SIT_ds = xr.Dataset(SIT_all_models) #already in m 
SIV_ds = xr.Dataset(SIV_all_models)/1e14 #divide by 1e14 to get from cubic m to million cubic km

dict_attrs = {'Description': 'Arctic sea ice area (SIA) for the large ensemble models: CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1. During the historical and RCP8.5 periods 1850-2100, or all available years.', 
              'Units'      : 'million square km',
              'Timestamp'  : str(datetime.datetime.utcnow().strftime("%H:%M UTC %a %Y-%m-%d")),
              'Data source': 'CLIVAR Large Ensemble Archive, doi:10.1038/s41558-020-0731-2',
              'Analysis'   : 'https://github.com/chrisrwp/synthetic-ensemble/SIA/SIC_to_SIA_models.ipynb'}

dict_attrs_SIE = dict_attrs.copy()
dict_attrs_SIE['Description'] = 'Arctic sea ice extent (SIE) for the large ensemble models: CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1. During the historical and RCP8.5 periods 1850-2100, or all available years.' 

dict_attrs_SIV = dict_attrs.copy()
dict_attrs_SIV['Description'] = 'Arctic sea ice volume (SIV) for the large ensemble models: CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1. During the historical and RCP8.5 periods 1850-2100, or all available years.' 
dict_attrs_SIV['Units'] = 'million cubic km.' 

dict_attrs_SIT = dict_attrs.copy()
dict_attrs_SIT['Description'] = 'Arctic sea ice average thickenss (SIV) for the large ensemble models: CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1. During the historical and RCP8.5 periods 1850-2100, or all available years.' 
dict_attrs_SIT['Units'] = 'm' 

SIA_ds.attrs = dict_attrs
SIE_ds.attrs = dict_attrs_SIE
SIT_ds.attrs = dict_attrs_SIT
SIV_ds.attrs = dict_attrs_SIV

SIA_ds.to_netcdf(data_path+'SIA/SIA_SIE_SIV/CLIVAR_SIA_1850_2100_RCP85.nc')
SIE_ds.to_netcdf(data_path+'SIA/SIA_SIE_SIV/CLIVAR_SIE_1850_2100_RCP85.nc')
SIT_ds.to_netcdf(data_path+'SIA/SIA_SIE_SIV/CLIVAR_SIT_1850_2100_RCP85.nc')
SIV_ds.to_netcdf(data_path+'SIA/SIA_SIE_SIV/CLIVAR_SIV_1850_2100_RCP85.nc')

## Take a look at the areacello, SIC and SIV files to check idiosyncrasies 

In [25]:
model_name = 'CanESM2'
areacello = xr.open_dataset(data_path+'Raw_data/areacello/areacello_{}.nc'.format(model_name))
areacello['areacello'].plot()
plt.figure()

SIT = load_member(model_name, 5, 'sit')
SIT['sit'].sel(time='1950-03').where(SIT['sit'].sel(time='1950-03')>0).plot()
plt.figure()

SIC = load_member(model_name, 5, 'sic')
SIC['sic'].sel(time='1950-03').where(SIC['sic'].sel(time='1950-03')>0).plot()

In [26]:
model_name = 'CESM1'
areacello = xr.open_dataset(data_path+'Raw_data/areacello/areacello_{}.nc'.format(model_name))
areacello['areacello'].plot()
plt.figure()

SIT = load_member(model_name, 5, 'sit')
SIT['sit'].sel(time='1950-03').where(SIT['sit'].sel(time='1950-03')>0).plot()
plt.figure()

SIC = load_member(model_name, 5, 'sic')
SIC['sic'].sel(time='1950-03').where(SIC['sic'].sel(time='1950-03')>0).plot()

In [27]:
model_name = 'CSIRO_MK36'
areacello = xr.open_dataset(data_path+'Raw_data/areacello/areacello_{}.nc'.format(model_name))
areacello['areacello'].plot()
plt.figure()

SIT = load_member(model_name, 5, 'sit')
SIT['sit'].sel(time='1950-03').where(SIT['sit'].sel(time='1950-03')>3.386e-5).plot()
plt.figure()

SIC = load_member(model_name, 5, 'sic')
SIC['sic'].sel(time='1950-03').where(SIC['sic'].sel(time='1950-03')>3.368e-3).plot()

In [28]:
model_name = 'GFDL_CM3'
areacello = xr.open_dataset(data_path+'Raw_data/areacello/areacello_{}.nc'.format(model_name))
areacello['areacello'].plot()
plt.figure()

SIT = load_member(model_name, 5, 'sit')
SIT['sit'].isel(time=0).where(SIT['sit'].isel(time=0)>0).plot()
plt.figure()

SIC = load_member(model_name, 5, 'sic')
SIC['sic'].isel(time=0).where(SIC['sic'].isel(time=0)>0).plot()

In [29]:
model_name = 'GFDL_ESM2M'
areacello = xr.open_dataset(data_path+'Raw_data/areacello/areacello_{}.nc'.format(model_name))
areacello['areacello'].plot()
plt.figure()

SIT = load_member(model_name, 5, 'sit')
SIT['sit'].isel(time=0).where(SIT['sit'].isel(time=0)>0).plot()
plt.figure()

SIC = load_member(model_name, 5, 'sic')
SIC['sic'].isel(time=0).where(SIC['sic'].isel(time=0)>0).plot()

In [30]:
model_name = 'MPI_ESM1'
areacello = xr.open_dataset(data_path+'Raw_data/areacello/areacello_{}.nc'.format(model_name))
areacello['areacello'].plot()
plt.figure()

SIT = load_member(model_name, 5, 'sit', historical=True)
SIT['sit'].sel(time='1950-03').where(SIT['sit'].sel(time='1950-03')>0).plot()
plt.figure()

SIC = load_member(model_name, 5, 'sic', historical=True)
SIC['sic'].sel(time='1950-03').where(SIC['sic'].sel(time='1950-03')>0).plot()