# Establish minimum r$^2$ values for predictability from persitence

In [2]:
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import datetime

In [3]:
CMIP6_info = xr.open_dataset(
    '/glade/work/cwpowell/low-frequency-variability/raw_data/CMIP6_info/'\
    +'CMIP6_modeling_center_members_doi.nc'
)

In [7]:
var_ = 'regional_SIC'

corr_SIC = {}

for model_name in CMIP6_info['model'].drop_sel(model=['CAS-ESM2-0']).values:
    print(datetime.datetime.now(), model_name)
    
    try:
        model_data = xr.open_dataset(
            f'/glade/work/cwpowell/low-frequency-variability/raw_data/'
            f'regional_sea_ice_CMIP6/Regional_SIC_SIT_all_mem_{model_name}_'
            f'1850_2014.nc'
        )
        
        
    except (FileNotFoundError):
        print(model_name,'FILE NOT FOUND')
        continue
        
        
    all_mems = []
    for mem_ in np.sort(model_data['member'].values):
        print(datetime.datetime.now(), mem_)
        all_regions = []
        for region_ in [1,2,3,4,5,6,11]:
            all_months = []
            for month_ in np.arange(1,13):
                all_lags = []
                for lag_ in np.arange(1,21):
                    month_region = model_data[var_].sel(member=mem_).sel(time=
                        model_data['time.month']==month_).sel(region=region_)

                    #select the base time period
                    sea_ice_init = month_region.sel(
                        time=slice('1920',str(2014-lag_)))
                    sea_ice_init['time'] = range(len(sea_ice_init['time']))
                    #select a lagged time period but use same time coordinates
                    sea_ice_lagged = month_region.sel(
                        time=slice(str(1920+lag_),'2014'))
                    sea_ice_lagged['time'] = range(len(sea_ice_lagged['time']))
                    all_lags.append(xr.corr(sea_ice_init, sea_ice_lagged))

                all_lags_xr = xr.concat((all_lags),dim='lag')
                all_lags_xr['lag'] = np.arange(1,21)
                all_months.append(all_lags_xr)

            all_months_xr = xr.concat((all_months),dim='month')
            all_months_xr['month'] = np.arange(1,13)
            all_regions.append(all_months_xr)

        all_regions_xr = xr.concat((all_regions),dim='region')
        all_regions_xr['region'] = [1,2,3,4,5,6,11]
        all_mems.append(all_regions_xr)

    all_mems_xr = xr.concat((all_mems),dim='member')
    all_mems_xr['member'] = np.sort(model_data['member'].values)
    corr_SIC[model_name] = all_mems_xr
    
    
            

In [29]:
for model_name in np.sort(list(corr_SIC.keys())):
    doi_model = doi_model = CMIP6_info['doi'].sel(model=model_name)
    model_corr = xr.Dataset({'r_value':corr_SIC[model_name]})
    model_corr.attrs = {
        'Description': 'Pearson correlation coefficient between sea ice '\
            f'thickness in {model_name} historical forcing 1920-2014 and '\
            'the same data lagged by 1-20 years, by region, lag and member',
        'Timestamp'  : str(datetime.datetime.utcnow().strftime(
            "%H:%M UTC %a %Y-%m-%d")),
        'Data source': '{}, doi:{} . '.format(model_name, doi_model), 
        'Analysis'   : 'https://github.com/chrisrwp/low-frequency-variability/'\
            'blob/main/null_model/Null_model_persistence.ipynb'
    }
    
    model_corr.to_netcdf(
        '/glade/work/cwpowell/low-frequency-variability/null_model/'
        f'Pearson_correlation_SIT_lagged_1_20_years_{model_name}.nc'
    )

## Now compute Pearson correlation coefficient for lowpass filtered data

In [24]:
corr_SIC = {}

for model_name in CMIP6_info['model'].drop_sel(model=['CAS-ESM2-0']).values:
    print(datetime.datetime.now(), model_name)
    
    try:
        model_data = xr.open_dataset(
            f'/glade/work/cwpowell/low-frequency-variability/input_data/'
            f'Regional_SIC_detrended_lowpass_{model_name}_1920_2014.nc'
        )
        
        
    except (FileNotFoundError):
        print(model_name,'FILE NOT FOUND')
        continue
        
        
    all_mems = []
    for mem_ in np.sort(model_data['member'].values):
        all_regions = []
        for region_ in [1,2,3,4,5,6,11]:
            all_months = []
            for month_ in np.arange(1,13):
                all_lags = []
                for lag_ in np.arange(1,21):
                    month_region = model_data['SIC'].sel(member=mem_).sel(
                        month=month_).sel(region=region_)

                    #select the base time period
                    sea_ice_init = month_region.sel(
                        year=slice('1920',str(2014-lag_)))
                    sea_ice_init['year'] = range(len(sea_ice_init['year']))
                    #select a lagged time period but use same time coordinates
                    sea_ice_lagged = month_region.sel(
                        year=slice(str(1920+lag_),'2014'))
                    sea_ice_lagged['year'] = range(len(sea_ice_lagged['year']))
                    all_lags.append(xr.corr(sea_ice_init, sea_ice_lagged))

                all_lags_xr = xr.concat((all_lags),dim='lag')
                all_lags_xr['lag'] = np.arange(1,21)
                all_months.append(all_lags_xr)

            all_months_xr = xr.concat((all_months),dim='month')
            all_months_xr['month'] = np.arange(1,13)
            all_regions.append(all_months_xr)

        all_regions_xr = xr.concat((all_regions),dim='region')
        all_regions_xr['region'] = [1,2,3,4,5,6,11]
        all_mems.append(all_regions_xr)

    all_mems_xr = xr.concat((all_mems),dim='member')
    all_mems_xr['member'] = np.sort(model_data['member'].values)
    corr_SIC[model_name] = all_mems_xr          
        
        

2022-12-20 13:33:56.587116 ACCESS-CM2
2022-12-20 13:34:33.472528 ACCESS-ESM1-5
2022-12-20 13:36:35.123465 AWI-CM-1-1-MR
AWI-CM-1-1-MR FILE NOT FOUND
2022-12-20 13:36:35.123911 AWI-ESM-1-1-LR
AWI-ESM-1-1-LR FILE NOT FOUND
2022-12-20 13:36:35.124055 BCC-CSM2-MR
2022-12-20 13:37:12.069236 BCC-ESM1
2022-12-20 13:37:47.739910 CAMS-CSM1-0
2022-12-20 13:38:24.163759 CESM2
2022-12-20 13:40:36.982015 CESM2-FV2
2022-12-20 13:41:13.602406 CESM2-WACCM
2022-12-20 13:41:49.587605 CESM2-WACCM-FV2
2022-12-20 13:42:26.704075 CIESM
2022-12-20 13:43:02.565020 CMCC-CM2-SR5
2022-12-20 13:43:14.639615 CNRM-CM6-1
2022-12-20 13:47:27.525701 CNRM-CM6-1-HR
CNRM-CM6-1-HR FILE NOT FOUND
2022-12-20 13:47:27.526159 CNRM-ESM2-1
2022-12-20 13:49:34.730823 CanESM5
2022-12-20 14:02:33.212600 CanESM5-CanOE
2022-12-20 14:03:09.417091 E3SM-1-0
E3SM-1-0 FILE NOT FOUND
2022-12-20 14:03:09.417907 E3SM-1-1
2022-12-20 14:03:21.998853 E3SM-1-1-ECA
2022-12-20 14:03:34.124941 EC-Earth3
2022-12-20 14:07:35.307541 EC-Earth3-Veg
202

In [25]:
for model_name in np.sort(list(corr_SIC.keys())):
    doi_model = doi_model = CMIP6_info['doi'].sel(model=model_name)
    model_corr = xr.Dataset({'r_value':corr_SIC[model_name]})
    model_corr.attrs = {
        'Description': 'Pearson correlation coefficient between sea ice '\
            f'concentration in {model_name} historical forcing 1920-2014 '\
            'with a 2 year lowpass filter. The same data is lagged by 1-20 '\
            'years, by region, lag and member',
        'Timestamp'  : str(datetime.datetime.utcnow().strftime(
            "%H:%M UTC %a %Y-%m-%d")),
        'Data source': '{}, doi:{} . '.format(model_name, doi_model), 
        'Analysis'   : 'https://github.com/chrisrwp/low-frequency-variability/'\
            'blob/main/null_model/Null_model_persistence.ipynb'
    }
    
    model_corr.to_netcdf(
        '/glade/work/cwpowell/low-frequency-variability/null_model/'
        f'Pearson_correlation_SIC_lagged_1_20_years_lowpass_{model_name}.nc'
    )