# Resample detrended model SIC

### Author: Chris Wyburn-Powell, [github](https://github.com/chrisrwp/synthetic-ensemble/SIC/Resample_models_obs.ipynb)

**Input**: <br>
- Detrended CLIVAR LE Archive model output from CanESM2, CESM1, CSIRO MK3.6, GDL CM3, GFDL ESM2M, MPI ESM1. Detrended in the with respect to the following metrics:
  * Ensemble mean, i.e. the linear trend of the mean of all members
  * Ensemble mean with adjustments so the ensemble mean trend does not reach below 0% or above 100% SIC
  * Individual mean, i.e. the linear trend of the member which is being detrended 
  * Individual mean with adjustments to within 0-100% SIC

**Output**: <br>
- Standard deviation (with respect to time) of 1000 resamplings of SIC with a 2 year block bootstrap size
  
**Method**: <br>
- Resampling uses a 2 year block bootstrap size
- Use a linear trend as calculated by `xarray.DataArray.polyfit` to detrend the data

In [1]:
import numpy as np
import xarray as xr
import datetime
import dask

print(datetime.datetime.utcnow().strftime("%H:%M UTC %a %Y-%m-%d"))

17:43 UTC Sat 2021-07-24


In [3]:
data_path = '/glade/scratch/cwpowell/Synthetic_ensemble/'

model_names  = ['CanESM2', 'CESM1', 'CSIRO_MK36', 'GFDL_CM3', 'GFDL_ESM2M', 'MPI_ESM1' ]
mem_len      = [50,        40,      30,           20,         30,           100        ]
model_starts = [1950,      1920,    1850,         1920,       1950,         1850       ]

In [5]:
model_name = 'CanESM2'
adj = 'adj_'
ind_ens = 'ensemble'
month_ = 9

detrended = xr.open_dataset(data_path+'SIC/Detrended/{}_detrended_{}{}_{}_1979_2020.nc'.format(model_name, adj, str(month_).zfill(2), ind_ens))  

In [42]:
detrended

In [39]:
np.random.randint(0,time_period-2, (1000, int(time_period/2), len(data[lat_lab]), len(data[lon_lab])))

NameError: name 'lat_lab' is not defined

In [9]:
time_period = 42
np.random.randint(0,time_period-2, (1000,int(time_period/2)))

array([[23, 28, 29, ..., 39, 38, 21],
       [17,  2,  0, ..., 27, 19, 18],
       [26, 22,  2, ..., 35, 26, 39],
       ...,
       [23, 27, 16, ...,  5, 10,  7],
       [14,  8, 38, ...,  8, 37,  7],
       [35, 24, 39, ..., 31, 21, 36]])

In [40]:
def resample_boot2_mem(data, lat_lab, lon_lab):
    '''
    Resample a 2D time series using a 2 year block boostrap size with replacement
    2D so can only resample one member at a time
    
    Parameters
    ----------
    time_period : integer,
        For 1979-2020 use 42 as the total number of years in that time period
    data : 1 dimensional xarray dataarray,
        For 1979-2020 this is an array of shape [lat, lon, time:42] 
    
    Returns
    ----------
        3D xarray dataarray object of 1000 resamplings of the input data, shape: (lat, lon, resampling:1000)
    '''  
#     #initialize an empty numpy array
#     resampled_member = np.empty(np.append(1000, list(np.shape(detrended['SIC'].sel(member=1)))))
    
    #create an xarray dataarray of indexes for half the length of the time period, year_i coordinates 1,3,5...
    boot_2_first_ind = xr.DataArray(data   = np.random.randint(0,time_period-2, (1000, int(time_period/2), len(data[lat_lab]), len(data[lon_lab]))), 
                                    coords = {'resampling':np.arange(1,1001,1), 'year_i':np.arange(1,time_period+1,2), lat_lab:data[lat_lab], lon_lab:data[lat_lab]},
                                    dims   = ['resampling', 'year_i', lat_lab, lon_lab])

    return(boot_2_first_ind)
    
    

In [41]:
resample_boot2_mem(detrended, 'lat', 'lon')

ValueError: coordinate 'lon' is a DataArray dimension, but it has shape (21,) rather than expected shape (128,) matching the dimension size

In [None]:
def resample_boot2_mem(time_period, data):
    '''
    Resample a 1D time series using a 2 year block boostrap size with replacement
    
    Parameters
    ----------
    time_period : integer,
        For 1979-2020 use 42 as the total number of years in that time period
    data : 1 dimensional xarray dataarray,
        For 1979-2020 this is an array of shape [lat, lon, time:42] 
    
    Returns
    ----------
        3D xarray dataarray object of 1000 resamplings of the input data, shape: (lat, lon, resampling:1000)
    '''  
    #create an xarray dataarray of indexes for half the length of the time period, year_i coordinates 1,3,5...
    boot_2_first_ind = xr.DataArray(data   = np.random.randint(0,time_period-2, (1000,int(time_period/2))), 
                                    coords = {'resampling':np.arange(1,1001,1), 'year_i':np.arange(1,time_period+1,2)},
                                    dims   = ['resampling', 'year_i'])

    #create an identical dataarray but with each element incremented by 1, year_i coordinates 2,4,6....
    boot_2_second_ind = (boot_2_first_ind+1).copy()
    boot_2_second_ind['year_i'] = np.arange(2,time_period+2,2)

    #concatenate the two arrays with the coordinates in order, this allows a 2 year block boostrap size
    all_boot_2_ind = xr.concat((boot_2_first_ind, boot_2_second_ind), dim='year_i').sortby('year_i')
    
    #create an array with the starting element of the flattened array for each resampling 0, 42, 84...
    ind_base = np.repeat(np.arange(0,time_period*1000,time_period),time_period)
    
    #add together the base indexes (0,42,84...) with the randomly chosen indexes within the original data
    ind_1_d = np.ravel(all_boot_2_ind) + ind_base
    
    #copy the original data 1000 times as a 1D array so it will have the same indexes as we just made for ind_1_d
    data_1000 = np.ravel(np.tile(data,(time_period,1000)))
    
    #select the randomly generated indexes from the flattened copied original data, reshape and save to xarray dataarray
    resampled_boot_2 = xr.DataArray(data = np.reshape(data_1000[ind_1_d], (time_period,1000)),
                                    coords = {'year_i':np.arange(1,time_period+1,1), 'resampling':np.arange(1,1001,1)},
                                    dims   = ['year_i', 'resampling'])

    return(resampled_boot_2)