# Detrended SIA data, observations and models

### Author: Chris Wyburn-Powell, [github](https://github.com/chrisrwp/synthetic-ensemble/SIA/SIA_calculations_observations.ipynb)

**Input**: <br>
Models:
- SIA from CLIVAR LE archive `CLIVAR_SIA_1850_2100_RCP85.nc` <br>
<br>
Observations:
- NSIDC CDR SIA (as interpolated and 
- Trends from NSIDC CDR SIA and mean trend for all datasets

**Output**: <br>
- **$\sigma_{LE}$**  : Standard deviations of detrended models without resampling
- **$\sigma_{mem}$** : Standard deviations of detrended resampled models
- **$\sigma_{obs}$** : Standard deivations of detrended resampled observations


**TO DO:** <br>
- Select what observational data to detrend and resample. Look at variance between model ensemble members and see if using the average of the 5 obs datasets produces a similar uncertainty to the actual observed linear trend.

In [106]:
import xarray as xr
import numpy as np
import datetime

In [3]:
data_path = '/glade/scratch/cwpowell/Synthetic_ensemble/'

model_names  = ['CanESM2', 'CESM1', 'CSIRO_MK36', 'GFDL_CM3', 'GFDL_ESM2M', 'MPI_ESM1' ]
mem_len      = [50,        40,      30,           20,         30,           100        ]
model_starts = [1950,      1920,    1850,         1920,       1950,         1850       ]

In [5]:
#load observational data





#load model data
SIA = xr.open_dataset(data_path+'SIA/SIA_SIE_SIV/CLIVAR_SIA_1850_2100_RCP85.nc')
# SIE = xr.open_dataset(data_path+'SIA/SIA_SIE_SIV/CLIVAR_SIE_1850_2100_RCP85.nc')

# Define resampling functions

In [135]:
def resample_boot1(time_period, data):
    '''
    Resample a 1D time series using a 2 year block boostrap size
    
    Parameters
    ----------
    time_period : integer,
        For 1979-2020 use 42 as the total number of years in that time period
    data : 1 dimensional xarray dataarray,
        For 1979-2020 this is an array of shape [42] 
    
    Returns
    ----------
        2D xarray dataarray object of 1000 resamplings of the input data, shape: (time_period, 1000)
    ''' 
    
    resampled = np.random.choice(data, (time_period, 1000), replace=True)
    
    return(resampled)

In [134]:
def resample_boot2(time_period, data):
    '''
    Resample a 1D time series using a 2 year block boostrap size with replacement
    
    Parameters
    ----------
    time_period : integer,
        For 1979-2020 use 42 as the total number of years in that time period
    data : 1 dimensional xarray dataarray,
        For 1979-2020 this is an array of shape [42] 
    
    Returns
    ----------
        2D xarray dataarray object of 1000 resamplings of the input data, shape: (time_period, 1000)
    '''  
    #create an xarray dataarray of indexes for half the length of the time period, year_i coordinates 1,3,5...
    boot_2_first_ind = xr.DataArray(data   = np.random.randint(0,time_period-2, (1000,int(time_period/2))), 
                                    coords = {'resampling':np.arange(1,1001,1), 'year_i':np.arange(1,time_period+1,2)},
                                    dims   = ['resampling', 'year_i'])

    #create an identical dataarray but with each element incremented by 1, year_i coordinates 2,4,6....
    boot_2_second_ind = (boot_2_first_ind+1).copy()
    boot_2_second_ind['year_i'] = np.arange(2,time_period+2,2)

    #concatenate the two arrays with the coordinates in order, this allows a 2 year block boostrap size
    all_boot_2_ind = xr.concat((boot_2_first_ind, boot_2_second_ind), dim='year_i').sortby('year_i')
    
    #create an array with the starting element of the flattened array for each resampling 0, 42, 84...
    ind_base = np.repeat(np.arange(0,time_period*1000,time_period),time_period)
    
    #add together the base indexes (0,42,84...) with the randomly chosen indexes within the original data
    ind_1_d = np.ravel(all_boot_2_ind) + ind_base
    
    #copy the original data 1000 times as a 1D array so it will have the same indexes as we just made for ind_1_d
    data_1000 = np.ravel(np.tile(data,(time_period,1000)))
    
    #select the randomly generated indexes from the flattened copied original data, reshape and save to xarray dataarray
    resampled_boot_2 = xr.DataArray(data = np.reshape(data_1000[ind_1_d], (time_period,1000)),
                                    coords = {'year_i':np.arange(1,time_period+1,1), 'resampling':np.arange(1,1001,1)},
                                    dims   = ['year_i', 'resampling'])

    return(resampled_boot_2)

# Observations
## Detrend all observational data sets

In [None]:
#detrend based on own trend



#detrend based on average trend from all observational data sets

In [None]:
#save the detrended data to NetCDF

## Resample all detrended observational data sets

In [None]:
#save the resampled data to NetCDF

# Models

## Detrend model data using ensemble mean trends and individual member trends

In [122]:
start_yr = 1979
end_yr   = 2020

all_model_detrend_ensemble   = {}
all_model_detrend_individual = {}

for model_i, model_name in enumerate(model_names):
    print(datetime.datetime.now(), model_name)
    
    detrend_ensemble_list  = []
    detrend_individual_list = []
    
    for month_ in np.arange(1,13):
        
        if model_name == 'MPI_ESM1': #100 elements in member dimension so can't select all of those for MPI ESM1
            model_month = SIA[model_name].sel(time=SIA['time.month']==month_).sel(time=slice(str(start_yr),str(end_yr)))
        else:
            model_month = SIA[model_name].sel(time=SIA['time.month']==month_).sel(time=slice(str(start_yr),str(end_yr))).sel(member=slice('1',str(mem_len[model_i])))

        #detrend all members by the ensemble mean trend
        ensemble_coefs = np.polyfit(np.arange(start_yr, end_yr+1), model_month.mean('member').values, 1)
        detrend_ensemble_list.append(model_month - (ensemble_coefs[0]*np.arange(start_yr, end_yr+1) + ensemble_coefs[1]))


        #detrend the individual members with their own trend
        yr_list = xr.DataArray(data = np.arange(1979,2021,1), coords={'time':model_month['time']}, dims=['time'])
        
        mem_coefs  = np.polyfit(np.arange(start_yr, end_yr+1), model_month.transpose().values, 1)
        mem_coefs  = xr.DataArray(data = mem_coefs, coords={'coef':['grad', 'intercept'], 'member':np.arange(1,mem_len[model_i]+1)}, dims=['coef', 'member'])

        detrend_individual_list.append(model_month - mem_coefs.sel(coef='grad')*yr_list + mem_coefs.sel(coef='intercept'))
        
    all_model_detrend_ensemble[model_name]   = xr.concat((detrend_ensemble_list), dim='time')
    all_model_detrend_individual[model_name] = xr.concat((detrend_individual_list), dim='time')
    

2021-06-23 12:12:26.290036 CanESM2
2021-06-23 12:12:26.418453 CESM1
2021-06-23 12:12:26.551278 CSIRO_MK36
2021-06-23 12:12:26.683126 GFDL_CM3
2021-06-23 12:12:26.815022 GFDL_ESM2M
2021-06-23 12:12:26.946902 MPI_ESM1


In [126]:
#save detrended SIA using ensemble trends
all_model_detrend_ensemble   = xr.Dataset(all_model_detrend_ensemble)

dict_attrs = {'Description': 'Detrended Arctic sea ice area (SIA) for the large ensemble models: CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1, 1979-2020. Ensemble mean trend is used to detrend each member', 
              'Units'      : 'million square km',
              'Timestamp'  : str(datetime.datetime.utcnow().strftime("%H:%M UTC %a %Y-%m-%d")),
              'Data source': 'CLIVAR Large Ensemble Archive, doi:10.1038/s41558-020-0731-2',
              'Analysis'   : 'https://github.com/chrisrwp/synthetic-ensemble/SIA/SIC_detrending.ipynb'}

all_model_detrend_ensemble.attrs = dict_attrs
all_model_detrend_ensemble.to_netcdf(data_path+'SIA/SIA_detrended/CLIVAR_SIA_detrended_ensemble_79-20.nc')

#save detrended SIA using individual trends
all_model_detrend_individual = xr.Dataset(all_model_detrend_individual)
individual_attrs = dict_attrs.copy()
individual_attrs['Description'] = 'Detrended Arctic sea ice area (SIA) for the large ensemble models: CanESM2, CESM1, CSIRO MK3.6, GFDL CM3, GFDL ESM2M, MPI ESM1, 1979-2020. Each individual member trend is used to detrend'
all_model_detrend_individual.attrs = individual_attrs
all_model_detrend_ensemble.to_netcdf(data_path+'SIA/SIA_detrended/CLIVAR_SIA_detrended_individual_79-20.nc')

## Resample models, 2 year bootstrap size, 1000 times

In [None]:
#import detrended data
detrend_ensemble   = xr.open_dataset(data_path+'SIA/SIA_detrended/CLIVAR_SIA_detrended_ensemble_79-20.nc')
detrend_individual = xr.open_dataset(data_path+'SIA/SIA_detrended/CLIVAR_SIA_detrended_individual_79-20.nc')

In [136]:
#calculate the resamplings of all models and months
#to change the bootstrap size, change the function names: [resample_boot2, resample_boot1]
resampled_ensemble_model   = {}
resampled_individual_model = {} 

for model_i, model_name in enumerate(model_names):
    print(datetime.datetime.now(), model_name)
    
    resampled_ensemble_month   = []
    resampled_individual_month = [] 
    
    for month_ in np.arange(1,13):
        
        resampled_ensemble_member   = []
        resampled_individual_member = [] 
        
        for mem_ in np.arange(1,mem_len[model_i]+1):
            #select a 1D array of detrended anomalies, resample these 1000 times for each member
            resampled_ensemble_member.append(resample_boot2(42, detrend_ensemble[model_name].sel(time=detrend_ensemble['time.month']==month_).sel(member=mem_)))
            resampled_individual_member.append(resample_boot2(42, detrend_individual[model_name].sel(time=detrend_ensemble['time.month']==month_).sel(member=mem_)))
        
        #concatenate all the member output data and append it to the list containing data for all months
        resampled_ensemble_month.append(xr.concat((resampled_ensemble_member), dim='member'))
        resampled_individual_month.append(xr.concat((resampled_individual_member), dim='member'))
            
    resampled_ensemble_model[model_name] = xr.concat((resampled_ensemble_month), dim='month')
    resampled_individual_model[model_name] = xr.concat((resampled_individual_month), dim='month')

In [None]:
#save the resampled data to NetCDF