# Detrending and resampling SIA data, observations and models

### Author: Chris Wyburn-Powell, [github](https://github.com/chrisrwp/synthetic-ensemble/SIA/SIA_detrending_resampling.ipynb)

**Input**: <br>
- Models: Arctic SIA from six models of the CLIVAR Large Ensemble archive (CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1), as computed in [another notebook](https://github.com/chrisrwp/synthetic-ensemble/SIA/SIC_to_SIA_models.ipynb)
- Observations: Arctic SIA from size observational datasets: Climate Data Record (CDR), NASA Bootstrap (BT), NASA Team (NT), NSIDC Sea Ice Index (SII), Hadley Centre Sea Ice (HadISST1), Merged Hadley NOAA Optimal Interpolation (Merged), as computed in  [another notebook](https://github.com/chrisrwp/synthetic-ensemble/SIA/SIA_calculations_observations.ipynb)

**Output**: <br>
- Detrended SIA for models and observations using the individual datasets or member or the average observational dataset or ensemble mean
- Resampled SIA 1000 times wiht a 2 year bootstrap size
- **$\sigma_{LE}$**  : Standard deviations of detrended models without resampling
- **$\sigma_{mem}$** : Standard deviations of detrended resampled models
- **$\sigma_{obs}$** : Standard deivations of detrended resampled observations

In [1]:
import xarray as xr
import numpy as np
import datetime

In [2]:
data_path = '/glade/scratch/cwpowell/Synthetic_ensemble/'

model_names  = ['CanESM2', 'CESM1', 'CSIRO_MK36', 'GFDL_CM3', 'GFDL_ESM2M', 'MPI_ESM1' ]
mem_len      = [50,        40,      30,           20,         30,           100        ]
model_starts = [1950,      1920,    1850,         1920,       1950,         1850       ]

In [3]:
#load observational data
CDR  = xr.open_dataset(data_path+'Raw_data/observations/NSIDC_CDR_v4/SIA_SIE_CDR_BT_NT_79-20_filled.nc')
SII      = xr.open_dataset(data_path+'Raw_data/observations/NSIDC_sea_ice_index_v3/NSIDC_sea_ice_index_SIA_SIE_79-20_filled_including_pole_hole.nc')
HadISST1 = xr.open_dataset(data_path+'Raw_data/observations/HadISST/HadISST1_SIA_SIE_79-20_filled.nc')
Merged   = xr.open_dataset(data_path+'Raw_data/observations/merged_Hadley_OI/merged_Hadley_OI_SIA_SIE_79-20.nc')

obs_SIA = xr.Dataset({'CDR':CDR['CDR_SIA'].copy(), 'BT':CDR['BT_SIA'].copy(), 'NT':CDR['NT_SIA'].copy(), 
                     'SII':SII['SIA'].copy(), 'HadISST1':HadISST1['SIA'].copy(), 'Merged':Merged['SIA'].copy()})

obs_SIA_keys = list(obs_SIA.keys())

#load model data
SIA = xr.open_dataset(data_path+'SIA/SIA_SIE_SIV/CLIVAR_SIA_1850_2100_RCP85.nc')
# SIE = xr.open_dataset(data_path+'SIA/SIA_SIE_SIV/CLIVAR_SIE_1850_2100_RCP85.nc')

# Define resampling functions

In [4]:
def resample_boot1(time_period, data):
    '''
    Resample a 1D time series using a 2 year block boostrap size
    
    Parameters
    ----------
    time_period : integer,
        For 1979-2020 use 42 as the total number of years in that time period
    data : 1 dimensional xarray dataarray,
        For 1979-2020 this is an array of shape [42] 
    
    Returns
    ----------
        2D xarray dataarray object of 1000 resamplings of the input data, shape: (time_period, 1000)
    ''' 
    
    resampled = np.random.choice(data, (time_period, 1000), replace=True)
    
    return(resampled)

In [5]:
def resample_boot2(time_period, data):
    '''
    Resample a 1D time series using a 2 year block boostrap size with replacement
    
    Parameters
    ----------
    time_period : integer,
        For 1979-2020 use 42 as the total number of years in that time period
    data : 1 dimensional xarray dataarray,
        For 1979-2020 this is an array of shape [42] 
    
    Returns
    ----------
        2D xarray dataarray object of 1000 resamplings of the input data, shape: (time_period, 1000)
    '''  
    #create an xarray dataarray of indexes for half the length of the time period, year_i coordinates 1,3,5...
    boot_2_first_ind = xr.DataArray(data   = np.random.randint(0,time_period-2, (1000,int(time_period/2))), 
                                    coords = {'resampling':np.arange(1,1001,1), 'year_i':np.arange(1,time_period+1,2)},
                                    dims   = ['resampling', 'year_i'])

    #create an identical dataarray but with each element incremented by 1, year_i coordinates 2,4,6....
    boot_2_second_ind = (boot_2_first_ind+1).copy()
    boot_2_second_ind['year_i'] = np.arange(2,time_period+2,2)

    #concatenate the two arrays with the coordinates in order, this allows a 2 year block boostrap size
    all_boot_2_ind = xr.concat((boot_2_first_ind, boot_2_second_ind), dim='year_i').sortby('year_i')
    
    #create an array with the starting element of the flattened array for each resampling 0, 42, 84...
    ind_base = np.repeat(np.arange(0,time_period*1000,time_period),time_period)
    
    #add together the base indexes (0,42,84...) with the randomly chosen indexes within the original data
    ind_1_d = np.ravel(all_boot_2_ind) + ind_base
    
    #copy the original data 1000 times as a 1D array so it will have the same indexes as we just made for ind_1_d
    data_1000 = np.ravel(np.tile(data,(time_period,1000)))
    
    #select the randomly generated indexes from the flattened copied original data, reshape and save to xarray dataarray
    resampled_boot_2 = xr.DataArray(data = np.reshape(data_1000[ind_1_d], (time_period,1000)),
                                    coords = {'year_i':np.arange(1,time_period+1,1), 'resampling':np.arange(1,1001,1)},
                                    dims   = ['year_i', 'resampling'])

    return(resampled_boot_2)

# Observations
## Detrend all observational datasets using average observational trend and individual dataset trends

In [49]:
#compute the average trends
start_yr = 1979
end_yr   = 2020

#calculate the average observations and its trend
obs_mean = (obs_SIA['CDR'] + obs_SIA['BT'] + obs_SIA['NT'] + obs_SIA['SII'] + obs_SIA['HadISST1'] + obs_SIA['Merged']) / 6

mean_trends = []
for month_ in np.arange(1,13,1):
    mean_coefs = np.polyfit(np.arange(start_yr, end_yr+1), obs_mean.sel(time=obs_mean['time.month']==month_).values, 1)
    mean_trend = (mean_coefs[0]*np.arange(start_yr, end_yr+1) + mean_coefs[1])
    mean_trends.append(xr.DataArray(data = mean_trend, coords={'time':obs_mean['time'].sel(time=obs_mean['time.month']==month_)}, dims=['time']))

mean_trends = xr.concat((mean_trends), dim='time')

In [85]:
#detrend observations based on own trend and the mean trend of the 6 datasets
obs_detrend_individual = {}
obs_detrend_average    = {}

for key in obs_SIA_keys:   
    detrend_individual_list = []
    
    for month_ in np.arange(1,13):
        coefs = np.polyfit(np.arange(start_yr, end_yr+1), obs_SIA[key].sel(time=obs_SIA['time.month']==month_), 1)
        detrend_individual_list.append(-1*(coefs[0]*np.arange(start_yr, end_yr+1) + coefs[1]) + obs_SIA[key].sel(time=obs_SIA['time.month']==month_))
        
    obs_detrend_individual[key] = xr.concat((detrend_individual_list), dim='time')
    obs_detrend_average[key]    = obs_SIA[key] - mean_trends[key]

In [113]:
#save detrended observations to NetCDF
dict_attrs = {'Description': 'Detrended Arctic sea ice area (SIA) for six observational datasets 1979-2020: Climate Data Record (CDR), NASA Bootstrap (BT), NASA Team (NT), NSIDC Sea Ice Index (SII), Hadley Centre Sea Ice (HadISST1), Merged Hadley NOAA Optimal Interpolation (Merged). The trend for each month of each dataset is used for detrending.', 
              'Units'      : 'million square km',
              'Timestamp'  : str(datetime.datetime.utcnow().strftime("%H:%M UTC %a %Y-%m-%d")),
              'Data source': 'DOIs - CRD, BT, NT:10.7265/efmz-2t65, SII:10.7265/N5K072F8, HadISST1:10.1029/2002JD002670, Merged:10.5065/r33v-sv91',
              'Analysis'   : 'https://github.com/chrisrwp/synthetic-ensemble/SIA/SIA_detrending_resampling.ipynb'}

obs_detrend_individual = xr.Dataset(obs_detrend_individual)
obs_detrend_individual.attrs = dict_attrs
obs_detrend_individual.to_netcdf(data_path+'SIA/SIA_detrended/Obs_SIA_detrended_individual_79-20.nc')

#save detrended SIA using average trends
ave_attrs = dict_attrs.copy()
ave_attrs['Description'] = 'Detrended Arctic sea ice area (SIA) for six observational datasets 1979-2020: Climate Data Record (CDR), NASA Bootstrap (BT), NASA Team (NT), NSIDC Sea Ice Index (SII), Hadley Centre Sea Ice (HadISST1), Merged Hadley NOAA Optimal Interpolation (Merged). The trend calculated from the average of the six datasets is used to detrend the datasets for each month.'

obs_detrend_average = xr.Dataset(obs_detrend_average)
obs_detrend_average.attrs = ave_attrs
obs_detrend_average.to_netcdf(data_path+'SIA/SIA_detrended/Obs_SIA_detrended_average_79-20.nc')

## Resample all detrended observational data sets

In [99]:
#import detrended data
obs_detrend_average    = xr.open_dataset(data_path+'SIA/SIA_detrended/Obs_SIA_detrended_average_79-20.nc')
obs_detrend_individual = xr.open_dataset(data_path+'SIA/SIA_detrended/Obs_SIA_detrended_individual_79-20.nc')

In [130]:
#calculate the resamplings of all models and months
#to change the bootstrap size, change the function names: [resample_boot2, resample_boot1]
obs_resample_individual = {}
obs_resample_average    = {}

for key in obs_SIA_keys:   
    resampled_individual_month = []
    resampled_average_month    = [] 
    
    for month_ in np.arange(1,13):
        resampled_individual_month.append(resample_boot2(42, obs_detrend_individual[key].sel(time=obs_detrend_individual['time.month']==month_)))
        resampled_average_month.append(resample_boot2(42, obs_detrend_average[key].sel(time=obs_detrend_average['time.month']==month_)))
        
    obs_resample_individual[key] = xr.concat((resampled_individual_month), dim='month')
    obs_resample_average[key] = xr.concat((resampled_average_month), dim='month')
    
obs_resample_individual = xr.Dataset(obs_resample_individual)
obs_resample_individual['month'] = np.arange(1,13)

obs_resample_average = xr.Dataset(obs_resample_average)
obs_resample_average['month'] = np.arange(1,13)

In [131]:
#save the resampled data to NetCDF
dict_attrs = {'Description': 'Resampled Arctic sea ice area 1979-2020 for six datasets: Climate Data Record (CDR), NASA Bootstrap (BT), NASA Team (NT), NSIDC Sea Ice Index (SII), Hadley Centre Sea Ice (HadISST1), Merged Hadley NOAA Optimal Interpolation (Merged). The trend for each month for each dataset is used for detrending, resampling is done 1000 times with a 2 year bootstrap size.', 
              'Units'      : 'million square km',
              'Timestamp'  : str(datetime.datetime.utcnow().strftime("%H:%M UTC %a %Y-%m-%d")),
              'Data source': 'DOIs - CRD, BT, NT:10.7265/efmz-2t65, SII:10.7265/N5K072F8, HadISST1:10.1029/2002JD002670, Merged:10.5065/r33v-sv91',
              'Analysis'   : 'https://github.com/chrisrwp/synthetic-ensemble/SIA/SIA_detrending_resampling.ipynb'}

obs_resample_individual = xr.Dataset(obs_resample_individual)
obs_resample_individual.attrs = dict_attrs
obs_resample_individual.to_netcdf(data_path+'SIA/SIA_resampled/Obs_SIA_resampled_individual_79-20.nc')

#save detrended SIA using average trends
ave_attrs = dict_attrs.copy()
ave_attrs['Description'] = 'Resampled Arctic sea ice area (SIA) for six observational datasets 1979-2020: Climate Data Record (CDR), NASA Bootstrap (BT), NASA Team (NT), NSIDC Sea Ice Index (SII), Hadley Centre Sea Ice (HadISST1), Merged Hadley NOAA Optimal Interpolation (Merged). The trend calculated from the average of the six datasets is used to detrend the datasets for each month, resampling is done 1000 times with a 2 year bootstrap size.'

obs_resample_average = xr.Dataset(obs_resample_average)
obs_resample_average.attrs = ave_attrs
obs_resample_average.to_netcdf(data_path+'SIA/SIA_resampled/Obs_SIA_resampled_average_79-20.nc')

# Models

## Detrend model data using ensemble mean trends and individual member trends

In [29]:
start_yr = 1979
end_yr   = 2020

all_model_detrend_ensemble   = {}
all_model_detrend_individual = {}

for model_i, model_name in enumerate(model_names):
    
    detrend_ensemble_list  = []
    detrend_individual_list = []
    
    for month_ in np.arange(1,13):
        
        if model_name == 'MPI_ESM1': #100 elements in member dimension so can't select all of those for MPI ESM1
            model_month = SIA[model_name].sel(time=SIA['time.month']==month_).sel(time=slice(str(start_yr),str(end_yr)))
        else:
            model_month = SIA[model_name].sel(time=SIA['time.month']==month_).sel(time=slice(str(start_yr),str(end_yr))).sel(member=slice('1',str(mem_len[model_i])))

        #detrend all members by the ensemble mean trend
        ensemble_coefs = np.polyfit(np.arange(start_yr, end_yr+1), model_month.mean('member').values, 1)
        detrend_ensemble_list.append(model_month - (ensemble_coefs[0]*np.arange(start_yr, end_yr+1) + ensemble_coefs[1]))

        #detrend the individual members with their own trend
        yr_list = xr.DataArray(data = np.arange(1979,2021,1), coords={'time':model_month['time']}, dims=['time'])
        
        mem_coefs  = np.polyfit(np.arange(start_yr, end_yr+1), model_month.transpose().values, 1)
        mem_coefs  = xr.DataArray(data = mem_coefs, coords={'coef':['grad', 'intercept'], 'member':np.arange(1,mem_len[model_i]+1)}, dims=['coef', 'member'])

        detrend_individual_list.append(model_month - (mem_coefs.sel(coef='grad')*yr_list + mem_coefs.sel(coef='intercept')))
        
    all_model_detrend_ensemble[model_name]   = xr.concat((detrend_ensemble_list), dim='time')
    all_model_detrend_individual[model_name] = xr.concat((detrend_individual_list), dim='time')

In [30]:
#save detrended SIA using ensemble trends
all_model_detrend_ensemble   = xr.Dataset(all_model_detrend_ensemble)

dict_attrs = {'Description': 'Detrended Arctic sea ice area (SIA) for the large ensemble models: CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1, 1979-2020. Ensemble mean trend is used to detrend each member.', 
              'Units'      : 'million square km',
              'Timestamp'  : str(datetime.datetime.utcnow().strftime("%H:%M UTC %a %Y-%m-%d")),
              'Data source': 'CLIVAR Large Ensemble Archive, doi:10.1038/s41558-020-0731-2',
              'Analysis'   : 'https://github.com/chrisrwp/synthetic-ensemble/SIA/SIA_detrending_resampling.ipynb'}

all_model_detrend_ensemble.attrs = dict_attrs
all_model_detrend_ensemble.to_netcdf(data_path+'SIA/SIA_detrended/CLIVAR_SIA_detrended_ensemble_79-20.nc')

#save detrended SIA using individual trends
all_model_detrend_individual = xr.Dataset(all_model_detrend_individual)
individual_attrs = dict_attrs.copy()
individual_attrs['Description'] = 'Detrended Arctic sea ice area (SIA) for the large ensemble models: CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1, 1979-2020. Each individual member trend is used to detrend.'
all_model_detrend_individual.attrs = individual_attrs
all_model_detrend_individual.to_netcdf(data_path+'SIA/SIA_detrended/CLIVAR_SIA_detrended_individual_79-20.nc')

## Resample models, 2 year bootstrap size, 1000 times

In [6]:
#import detrended data
detrend_ensemble   = xr.open_dataset(data_path+'SIA/SIA_detrended/CLIVAR_SIA_detrended_ensemble_79-20.nc')
detrend_individual = xr.open_dataset(data_path+'SIA/SIA_detrended/CLIVAR_SIA_detrended_individual_79-20.nc')

In [7]:
#calculate the resamplings of all models and months
#to change the bootstrap size, change the function names: [resample_boot2, resample_boot1]
resampled_ensemble_model   = {}
resampled_individual_model = {} 

for model_i, model_name in enumerate(model_names):
    print(datetime.datetime.now(), model_name)
    
    resampled_ensemble_month   = []
    resampled_individual_month = [] 
    
    for month_ in np.arange(1,13):
        
        resampled_ensemble_member   = []
        resampled_individual_member = [] 
        
        for mem_ in np.arange(1,mem_len[model_i]+1):
            #select a 1D array of detrended anomalies, resample these 1000 times for each member
            resampled_ensemble_member.append(resample_boot2(42, detrend_ensemble[model_name].sel(time=detrend_ensemble['time.month']==month_).sel(member=mem_)))
            resampled_individual_member.append(resample_boot2(42, detrend_individual[model_name].sel(time=detrend_ensemble['time.month']==month_).sel(member=mem_)))
        
        #concatenate all the member output data and append it to the list containing data for all months
        resampled_ensemble_month.append(xr.concat((resampled_ensemble_member), dim='member'))
        resampled_individual_month.append(xr.concat((resampled_individual_member), dim='member'))
            
    resampled_ensemble_model[model_name] = xr.concat((resampled_ensemble_month), dim='month')
    resampled_ensemble_model[model_name]['member'] = np.arange(1,mem_len[model_i]+1)
    resampled_individual_model[model_name] = xr.concat((resampled_individual_month), dim='month')
    resampled_individual_model[model_name]['member'] = np.arange(1,mem_len[model_i]+1)

2021-07-09 16:43:33.608607 CanESM2
2021-07-09 16:43:45.549778 CESM1
2021-07-09 16:43:54.997387 CSIRO_MK36
2021-07-09 16:44:02.092107 GFDL_CM3
2021-07-09 16:44:06.820155 GFDL_ESM2M
2021-07-09 16:44:13.893382 MPI_ESM1


In [8]:
#save the resampled data to NetCDF
resampled_ensemble_model   = xr.Dataset(resampled_ensemble_model)
resampled_ensemble_model['month'] = np.arange(1,13)

dict_attrs = {'Description': 'Resampled Arctic sea ice area (SIA) for the large ensemble models: CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1, 1979-2020. Ensemble mean trend is used to detrend each member. Resampling is done 1000 times with a 2 year bootstrap size.', 
              'Units'      : 'million square km',
              'Timestamp'  : str(datetime.datetime.utcnow().strftime("%H:%M UTC %a %Y-%m-%d")),
              'Data source': 'CLIVAR Large Ensemble Archive, doi:10.1038/s41558-020-0731-2',
              'Analysis'   : 'https://github.com/chrisrwp/synthetic-ensemble/SIA/SIA_detrending_resampling.ipynb'}

resampled_ensemble_model.attrs = dict_attrs
resampled_ensemble_model.to_netcdf(data_path+'SIA/SIA_resampled/CLIVAR_SIA_resampled_ensemble_79-20.nc')

#save detrended SIA using individual trends
resampled_individual_model = xr.Dataset(resampled_individual_model)
resampled_individual_model['month'] = np.arange(1,13)
individual_attrs = dict_attrs.copy()
individual_attrs['Description'] = 'Resampled Arctic sea ice area (SIA) for the large ensemble models: CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1, 1979-2020. Each individual member trend is used to detrend. Resampling is done 1000 times with a 2 year bootstrap size.'
resampled_individual_model.attrs = individual_attrs
resampled_individual_model.to_netcdf(data_path+'SIA/SIA_resampled/CLIVAR_SIA_resampled_individual_79-20.nc')

# $\sigma_{LE}$ , $\sigma_{mem}$ , $\sigma_{obs}$ calculations
## $\sigma_{LE}$

In [9]:
#load detrended model data
detrend_ensemble   = xr.open_dataset(data_path+'SIA/SIA_detrended/CLIVAR_SIA_detrended_ensemble_79-20.nc')
detrend_individual = xr.open_dataset(data_path+'SIA/SIA_detrended/CLIVAR_SIA_detrended_individual_79-20.nc')

#compute standard deviation along the member dimension
sigma_LE_ensemble   = detrend_ensemble.groupby('time.month').std('time').std('member')
sigma_LE_individual = detrend_individual.groupby('time.month').std('time').std('member')

#save to NetCDF
attrs_dict = {'Description': 'Variability of Arctic sea ice area (SIA) of six large ensembles 1979-2020 (CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1). Standard deviation with respect to members. Computed on the detrended standard deviations with respect to time for each month. Detrending was based on the single model ensemble mean trend.', 
              'Units'      : 'million square km',
              'Timestamp'  : str(datetime.datetime.utcnow().strftime("%H:%M UTC %a %Y-%m-%d")),
              'Data source': 'CLIVAR Large Ensemble Archive, doi:10.1038/s41558-020-0731-2',
              'Analysis'   : 'https://github.com/chrisrwp/synthetic-ensemble/SIA/SIA_detrending_resampling.ipynb'}

sigma_LE_ensemble.attrs = attrs_dict
sigma_LE_ensemble.to_netcdf(data_path+'SIA/SIA_resampled/Sigma_LE_ensemble_79-20.nc')

ind_attrs_dict = attrs_dict.copy()
ind_attrs_dict['Description'] = 'Variability of Arctic sea ice area (SIA) of six large ensembles 1979-2020 (CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1). Standard deviation with respect members. Computed on the detrended standard deviations with respect to time for each month. Detrending was based on the individual member trend.' 
sigma_LE_individual.attrs = ind_attrs_dict
sigma_LE_individual.to_netcdf(data_path+'SIA/SIA_resampled/Sigma_LE_individual_79-20.nc')

## $\sigma_{mem}$

In [10]:
#load resampled model data
resampled_ensemble   = xr.open_dataset(data_path+'SIA/SIA_resampled/CLIVAR_SIA_resampled_ensemble_79-20.nc')
resampled_individual = xr.open_dataset(data_path+'SIA/SIA_resampled/CLIVAR_SIA_resampled_individual_79-20.nc')

#compute standard deviation along the resampling dimension
sigma_mem_ensemble   = resampled_ensemble.std('year_i').std('resampling')
sigma_mem_individual = resampled_individual.std('year_i').std('resampling')

#save to NetCDF
attrs_dict = {'Description': 'Variability of Arctic sea ice area (SIA) of six large ensembles 1979-2020 (CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1). Standard deviation with respect to 1000 resamplings. Computed on the detrended standard deviations with respect to time for each month. Detrending was based on the single model ensemble mean trend.', 
              'Units'      : 'million square km',
              'Timestamp'  : str(datetime.datetime.utcnow().strftime("%H:%M UTC %a %Y-%m-%d")),
              'Data source': 'CLIVAR Large Ensemble Archive, doi:10.1038/s41558-020-0731-2',
              'Analysis'   : 'https://github.com/chrisrwp/synthetic-ensemble/SIA/SIA_detrending_resampling.ipynb'}

sigma_mem_ensemble.attrs = attrs_dict
sigma_mem_ensemble.to_netcdf(data_path+'SIA/SIA_resampled/Sigma_mem_ensemble_79-20.nc')

ind_attrs_dict = attrs_dict.copy()
ind_attrs_dict['Description'] = 'Variability of Arctic sea ice area (SIA) of six large ensembles 1979-2020 (CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1). Standard deviation with respect to 1000 resamplings. Computed on the detrended standard deviations with respect to time for each month. Detrending was based on the individual member trend.' 
sigma_mem_individual.attrs = ind_attrs_dict
sigma_mem_individual.to_netcdf(data_path+'SIA/SIA_resampled/Sigma_mem_individual_79-20.nc')

## $\sigma_{obs}$

In [5]:
#load resampled observational data
obs_resample_average    = xr.open_dataset(data_path+'SIA/SIA_resampled/Obs_SIA_resampled_average_79-20.nc')
obs_resample_individual = xr.open_dataset(data_path+'SIA/SIA_resampled/Obs_SIA_resampled_individual_79-20.nc')

#compute standard deviation along the resampling dimension
sigma_obs_average    = obs_resample_average.std('year_i').std('resampling')
sigma_obs_individual = obs_resample_individual.std('year_i').std('resampling')

#save to NetCDF
attrs_dict = {'Description': 'Variability of Arctic sea ice area (SIA) observations 1979-2020. Standard deviation with respect to 1000 resamplings. Computed on the detrended standard deviations with respect to time for each month. Detrending was based on the mean dataset trend.', 
              'Units'      : 'million square km',
              'Timestamp'  : str(datetime.datetime.utcnow().strftime("%H:%M UTC %a %Y-%m-%d")),
              'Data source': 'DOIs - CRD, BT, NT:10.7265/efmz-2t65, SII:10.7265/N5K072F8, HadISST1:10.1029/2002JD002670, Merged:10.5065/r33v-sv91',
              'Analysis'   : 'https://github.com/chrisrwp/synthetic-ensemble/SIA/SIA_detrending_resampling.ipynb'}

sigma_obs_average.attrs = attrs_dict
sigma_obs_average.to_netcdf(data_path+'SIA/SIA_resampled/Sigma_obs_average_79-20.nc')

ind_attrs_dict = attrs_dict.copy()
ind_attrs_dict['Description'] = 'Variability of Arctic sea ice area (SIA) observations 1979-2020. Standard deviation with respect to 1000 resamplings. Computed on the detrended standard deviations with respect to time for each month. Detrending was based on the individual dataset trend.' 
sigma_obs_individual.attrs = ind_attrs_dict
sigma_obs_individual.to_netcdf(data_path+'SIA/SIA_resampled/Sigma_obs_individual_79-20.nc')