# Detrend Observational and Model SIC with a 5th order Butterworth lowpass filter with a 2 year cutoff

### Author: Chris Wyburn-Powell, [github](https://github.com/chrisrwp/synthetic-ensemble/SIC/Detrend_filter_SIC_obs.ipynb)

**Input datasets for observational datasets, as created in this [notebook](https://github.com/chrisrwp/synthetic-ensemble/SIA/SIA_calculations_observations.ipynb):** <br>

- **NOAA/NSIDC CDR version 4 (CDR, BT, NT)**: Pole hole is filled using the average SIC of the surrounding grid cells (built in). Missing months (1984-07 1987-12, 1988-01) are filled by looking at the closest valid months for SIA (CDR), idenfitying whether the previous or following year's SIA for those valid months are closets to that year with missing data, then selecting the previous or following SIC data to fill the missing data. E.g. For 1984-07: SIA for 1983-06 and 1985-06 are compared with 1984-06 and 1983-08 and 1985-08 are compared with 1984-08. 1985 is found to be closer to 1984 than 1983 was with 1984 so to fill 1984-07, 1985-07 is copied. Similarly SIC values for 1988-12 and 1989-01 are used to fill 1987-12 and 1988-01.
- **HadISST1**: Discontinuities for months 2009-03 and 2009-04 were found with extreme negative anomalies which do not appear in other datasets. SIC from 2007-03 is used for 2009-03 and 2008-04 are used for 2009-04. <br><br>

**Reduced model data was produced in this [notebook](https://github.com/chrisrwp/synthetic-ensemble/blob/main/SIC/Detrend_SIC_models.ipynb), using data from the [CLIVAR Large Ensemble](https://doi.org/10.1038/s41558-020-0731-2)**

In [1]:
import numpy as np
import xarray as xr
import datetime
import scipy.signal as signal

import matplotlib.pyplot as plt

print(datetime.datetime.utcnow().strftime("%H:%M UTC %a %Y-%m-%d"))

16:15 UTC Fri 2022-03-18


In [2]:
data_path = '/glade/scratch/cwpowell/Synthetic_ensemble/'

month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 
               'August', 'September', 'October', 'November', 'December']

model_names  = ['CanESM2', 'CESM1', 'CSIRO_MK36', 'GFDL_CM3', 'GFDL_ESM2M', 'MPI_ESM1' ]
mem_len      = [50,        40,      30,           20,         30,           100        ]
model_starts = [1950,      1920,    1850,         1920,       1950,         1850       ]

## Test detrending observations with a 2 year lowpass filter

In [4]:
#load data
NSIDC_CDR = xr.open_dataset(data_path+'Raw_data/observations/NSIDC_CDR_v4/SIC_CDR_BT_NT_79-20_filled.nc')

In [23]:
# plot the NSIDC grid
NSIDC_CDR_09 = NSIDC_CDR['CDR'].sel(time=NSIDC_CDR['time.month']==9)
NSIDC_CDR_09_plot = NSIDC_CDR_09.copy()
NSIDC_CDR_09_plot['xgrid'] = np.arange(0,304)
NSIDC_CDR_09_plot['ygrid'] = np.arange(0,448)

NSIDC_CDR_09_plot.isel(time=0).plot()
plt.grid()

In [8]:
def filt_lowpass(data, sample_freq, cutoff, order):
    '''
    Filter a 1D time series using a lowpass Butterworth filter. 
    Uses scipy.signal.butter and scipy.signal.filtfilt
    
    Parameters
    ----------
    data : 1 dimensional xarray dataarray,
        For 1979-2020 this is an array of shape [42] 
    sample_freq: float,
        The sampling frequency of the input data, typically sample_freq=1 [year]
    cutoff: float,     
        The fraction of the nyquist frequency. To filter with a 2-year lowpass filter with sample_freq=1, cutoff=0.25
    order: int
        The order of the Butterworth filter, typically 4-6
    
    Returns
    ----------
        1D numpy array of the same shape as the input data
    '''  
    
    nyquist = sample_freq / 2 # 0.5 times the sampling frequency
    b, a = signal.butter(order, cutoff, btype='lowpass') #low pass filter
    filtered = signal.filtfilt(b, a, data) #apply the filter forward and backward
    
    return(filtered)

In [22]:
#compare linear detrending and filtering to produce anomalies
fig, ax = plt.subplots(2,1,figsize=[8,8])

x_i = 240
y_i = 192
order_ = 12
coefs = np.polyfit(np.arange(1979,2021), NSIDC_CDR_09[:,x_i,y_i]*100, 1 )
ax[0].plot(np.arange(1979,2021), NSIDC_CDR_09[:,x_i,y_i]*100, label='SIC time series', c='r')
ax[0].plot(np.arange(1979,2021), np.arange(1979,2021)*coefs[0] + coefs[1], label='Linear trend', c='k')
ax[0].plot(np.arange(1979,2021), filt_lowpass(data=NSIDC_CDR_09[:,x_i,y_i]*100, sample_freq=1, cutoff=0.25, order=order_), label='2 year lowpass filter', c='b')
ax[0].legend()
ax[0].set_xlim(1979,2020)
ax[0].set_ylabel('SIC [%]', fontsize=13)
ax[0].set_title('NSIDC CDR, September, x={}, y={}'.format(x_i, y_i), fontsize=15);

ax[1].plot(np.arange(1979,2021), NSIDC_CDR_09[:,x_i,y_i]*100 - (np.arange(1979,2021)*coefs[0] + coefs[1]), label='Linear Detrend', c='k')
ax[1].plot(np.arange(1979,2021), (NSIDC_CDR_09[:,x_i,y_i]*100 - filt_lowpass(data=NSIDC_CDR_09[:,x_i,y_i]*100, sample_freq=1, cutoff=0.25, order=order_)), label='2 year filter', c='b')
ax[1].legend()
ax[1].set_xlim(1979,2020)
ax[1].set_ylabel('SIC Anomaly [%]', fontsize=13)
ax[1].axhline(0,c='0.5', linestyle='--');

In [3]:
def SIC_filter_detrend_obs(data, month_):
    '''
    Detrend a 3D using a 2 year lowpass Butterworth filter, with and without adjusting to physical limits of 0 and 100% SIC

    Parameters
    ----------
    data : xarray dataarray
         Should have 3 dimensions: [time, lat, lon]
    month_ : int
        Number of month to select from the data

    Returns
    ----------
        2D xarray dataarray object of 1000 resamplings of the input data, shape: (time_period, 1000)
    '''  
    
    monthly_data = data.sel(time=data['time.month']==month_).sel(time=slice('1979', '2020'))
    
    #set up the Butterworth filter    
    nyquist = 1 / 2 # 0.5 times the yearly sampling frequency
    b, a = signal.butter(5, 0.25, btype='lowpass') #low pass filter, order 5, cutoff 0.25 nyquist
    
    #compute anomalies relative to the 2 year filter
    detrended_filt = monthly_data - signal.filtfilt(b, a, monthly_data, axis=0) #apply the filter forward and backward
    
    return(detrended_filt)

In [13]:
#load data
# NSIDC_CDR = xr.open_dataset(data_path+'Raw_data/observations/NSIDC_CDR_v4/SIC_CDR_BT_NT_79-20_filled.nc')
HadISST1 = xr.open_dataset(data_path+'Raw_data/observations/HadISST/HadISST1_NH_79-20_filled.nc')

In [16]:
all_month_filt = []
for month_ in np.arange(1,13):
    # all_month_filt.append(SIC_filter_detrend_obs(NSIDC_CDR['BT'], month_))
    all_month_filt.append(SIC_filter_detrend_obs(HadISST1['sic'].transpose('time','latitude','longitude'), month_))
    
all_month_filt_xr = xr.concat((all_month_filt),dim='time')

#NOAA/NSIDC Climate Data Record of Passive Microwave Sea Ice Concentration, Version 4 - NASA Bootstrap (BT)
all_month_filt_xr.attrs = {'Description': 'Detrended Arctic sea ice concentrations (SIC) for the observational dataset HadISST1. Years 1979-2020, all months. Detrended with a 2 year 5th order lowpass Butterworth filter for each month.', 
                           'Units'      : '%',
                           'Timestamp'  : str(datetime.datetime.utcnow().strftime("%H:%M UTC %a %Y-%m-%d")),
                           'Data source': 'HadISST1 - DOI: 10.1029/2002JD002670',
                           'Analysis'   : 'https://github.com/chrisrwp/synthetic-ensemble/SIC/Detrend_filter_SIC_obs_models.ipynb'}


In [17]:
all_month_filt_xr.to_netcdf('/glade/scratch/cwpowell/Synthetic_ensemble_revisions/SIC/Detrend_filter/HadISST1_SIC_2yr_filter_1979_2020.nc')

# Models
**Start from reduced datasets made in https://github.com/chrisrwp/synthetic-ensemble/blob/main/SIC/Detrend_SIC_models.ipynb** <br>
No need to run dask workers, takes ~6 minutes to run, requires ~10 GB RAM

In [24]:
reduced_data_path = '/glade/campaign/univ/ucub0084/Synthetic_ensemble/SIC/Reduced_datasets/'

for model_i, model_name in enumerate(model_names):
    print(datetime.datetime.now(), model_name)
    
    for month_ in np.arange(1,13):
        print(datetime.datetime.now(), month_)

        ##############################################################################################
        #load the reduced datasets, each month for each model for 30N+ and 1979-2020
        month_data = xr.open_dataarray(reduced_data_path+'{}_reduced_1979-2020_{}.nc'.format(model_name, str(month_).zfill(2)))

        print(np.shape(month_data)) #confirm that 42 (time) is the first dimension

        #GFDL ESM2M uses fraction not percentage for SIC, change to % for this model
        if model_name == 'GFDL_ESM2M': month_data = month_data * 100

        ##############################################################################################            
        #set up the Butterworth filter. Nyquist frequency is 0.5*sampling frequency = 0.5 years
        #cutoff frequency is 0.25 the Nyquist frequency = 0.25*0.5 = 2 years
        b, a = signal.butter(5, 0.25, btype='lowpass') #low pass filter, order 5, cutoff 0.25 nyquist

        #compute anomalies relative to the 2 year filter, apply to axis=1 which is the time dimension
        detrended_filt = month_data - signal.filtfilt(b, a, month_data, axis=1) #apply the filter forward and backward

        ##############################################################################################
        #save to NetCDF 
        detrended_filt.attrs = {'Description': 'Detrended Arctic sea ice concentrations (SIC) the model {}. Years 1979-2020, month of {}. Detrended using a 5th order Butterworth lowpass filter with a 2 year cutoff.'.format(model_name, month_names[month_-1]), 
                                'Units'      : '%',
                                'Timestamp'  : str(datetime.datetime.utcnow().strftime("%H:%M UTC %a %Y-%m-%d")),
                                'Data source': 'CLIVAR Large Ensemble Archive, doi:10.1038/s41558-020-0731-2',
                                'Analysis'   : 'https://github.com/chrisrwp/synthetic-ensemble/blob/main/SIC/Detrend_filter_SIC_obs_models.ipynb'}

        detrended_filt.to_netcdf('/glade/scratch/cwpowell/Synthetic_ensemble_revisions/SIC/Detrend_filter/{}_detrended_2yr_filter_1979_2020_{}.nc'.format(model_name, str(month_).zfill(2)))  