# Significance Testing

**Not following Santer et al. (2008)**

In [1]:
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import matplotlib.patheffects as pe
import matplotlib as mpl
import xarray as xr
import numpy as np
import math as m
import scipy.stats as stats

In [2]:
data_path = '/glade/scratch/cwpowell/Synthetic_ensemble/'

month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 
               'August', 'September', 'October', 'November', 'December']

month_names_short = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

model_names       = ['CanESM2', 'CESM1', 'CSIRO_MK36', 'GFDL_CM3', 'GFDL_ESM2M', 'MPI_ESM1'] #for loading areas
model_print_names = ['CanESM2', 'CESM1', 'CSIRO MK36', 'GFDL CM3', 'GFDL ESM2M', 'MPI ESM1'] #for printing on graphs

member_numbers = [50, 40, 30, 20, 30, 100]

        # CanESM2, CESM1, CSIRO MK36, GFDL CM3, GFDL ESM2M, MPI ESM2
colors = ['m',     'b',   'g',        'orange',  'k',        'tab:olive' ]
obs_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']

In [213]:
#load raw SIA data
#models
SIA = xr.open_dataset(data_path+'SIA/SIA_SIE_SIV/CLIVAR_SIA_1850_2100_RCP85.nc')

#observations
CDR  = xr.open_dataset(data_path+'Raw_data/observations/NSIDC_CDR_v4/SIA_SIE_CDR_BT_NT_79-20_filled.nc')
HadISST1 = xr.open_dataset(data_path+'Raw_data/observations/HadISST/HadISST1_SIA_SIE_79-20_filled.nc')

#load resampling statistics
sigma_mem_individual = xr.open_dataset(data_path+'SIA/SIA_resampled/Sigma_mem_individual_10000.nc')
sigma_obs_individual = xr.open_dataset(data_path+'SIA/SIA_resampled/Sigma_obs_individual_10000.nc')

mu_mem_individual = xr.open_dataset(data_path+'SIA/SIA_resampled/Mu_mem_individual_10000.nc')
mu_obs_individual = xr.open_dataset(data_path+'SIA/SIA_resampled/Mu_obs_individual_10000.nc')

## Simple t-tests

In [5]:
p_vals = []
for obs_data in ['CDR','NT','BT','HadISST1']:
    
    p_val_obs = []
    for model_i, model_name in enumerate(model_names):
        
        p_val_obs_model = []
        for month_ in np.arange(1,13):
            if model_name == 'MPI_ESM1': #MPI has the full 100 members so do not sel on member dimension
                model_sigmas = sigma_mem_individual[model_name].sel(month=month_)
            else:
                model_sigmas = sigma_mem_individual[model_name].sel(month=month_).sel(member=slice('1',str(member_numbers[model_i])))
                
            t_stat, p = stats.ttest_1samp(model_sigmas, sigma_obs_individual[obs_data].sel(month=month_))
                                          
            p_val_obs_model.append(p)
        p_val_obs.append(p_val_obs_model)
    p_vals.append(p_val_obs)
    
p_vals_xr = xr.DataArray(data=p_vals, coords={'obs_dataset':['CDR','NT','BT','HadISST1'], 'model':model_names,'month':np.arange(1,13)}, dims=['obs_dataset','model','month'])

p_vals_sig = p_vals_xr.where(p_vals_xr>0.05,0)
p_vals_sig = p_vals_sig.where(p_vals_sig<=0.05,1)

In [17]:
obs_datasets = ['CDR','NT','BT','HadISST1']
min_list = []
max_list = []

for month_ in np.arange(1,13):
    CDR_temp = sigma_obs_individual['CDR'].sel(month=month_).values
    NT_temp = sigma_obs_individual['NT'].sel(month=month_).values
    BT_temp = sigma_obs_individual['BT'].sel(month=month_).values
    HadISST1_temp = sigma_obs_individual['HadISST1'].sel(month=month_).values
    
    month_array = [CDR_temp, NT_temp, BT_temp, HadISST1_temp]
    min_list.append(np.argmin(month_array))
    max_list.append(np.argmax(month_array))

In [22]:
#minimum and maximum obs p_vals
p_vals_min = []
p_vals_max = []
for model_i, model_name in enumerate(model_names):

    p_val_model_min = []
    p_val_model_max = []
    for month_ in np.arange(1,13):
        if model_name == 'MPI_ESM1': #MPI has the full 100 members so do not sel on member dimension
            model_sigmas = sigma_mem_individual[model_name].sel(month=month_)
        else:
            model_sigmas = sigma_mem_individual[model_name].sel(month=month_).sel(member=slice('1',str(member_numbers[model_i])))

        #find the observational dataset with the minimum and maximum sigma_obs
        min_month = obs_datasets[min_list[month_-1]]
        max_month = obs_datasets[max_list[month_-1]]    
        
        #run the t-test
        t_stat_min, p_min = stats.ttest_1samp(model_sigmas, sigma_obs_individual[min_month].sel(month=month_))
        t_stat_max, p_max = stats.ttest_1samp(model_sigmas, sigma_obs_individual[max_month].sel(month=month_))

        p_val_model_min.append(p_min)
        p_val_model_max.append(p_max)
        
    p_vals_min.append(p_val_model_min)
    p_vals_max.append(p_val_model_max)

p_vals_min_xr = xr.DataArray(data=p_vals_min, coords={'model':model_names,'month':np.arange(1,13)}, dims=['model','month'])
p_vals_max_xr = xr.DataArray(data=p_vals_max, coords={'model':model_names,'month':np.arange(1,13)}, dims=['model','month'])

In [68]:
#make an xarray dataarray of higher or lower sigma_obs than bar_sigma_mem - binary -1 or 1
higher_lower = sigma_mem_individual.mean('member') - sigma_obs_individual.drop('SII').drop('Merged').to_array()
higher_lower = higher_lower.rename({'variable':'obs_dataset'})
higher_lower = higher_lower.where(higher_lower>0,-1)
higher_lower = higher_lower.where(higher_lower==-1,1)

In [72]:
min_obs_diff = []
max_obs_diff = []
for month_ in np.arange(1,13):
   
    min_obs_diff_model = []
    max_obs_diff_model = []
    for model_name in model_names:
        min_obs_diff_model.append(higher_lower[model_name].sel(obs_dataset=obs_datasets[min_list[month_-1]]).sel(month=month_))
        max_obs_diff_model.append(higher_lower[model_name].sel(obs_dataset=obs_datasets[max_list[month_-1]]).sel(month=month_))
        
    min_obs_diff.append(xr.concat((min_obs_diff_model),dim='model'))
    max_obs_diff.append(xr.concat((max_obs_diff_model),dim='model'))
    
min_obs_diff_xr = xr.concat((min_obs_diff),dim='month')
min_obs_diff_xr['month'] = np.arange(1,13)
min_obs_diff_xr['model'] = model_names

max_obs_diff_xr = xr.concat((max_obs_diff),dim='month')
max_obs_diff_xr['month'] = np.arange(1,13)
max_obs_diff_xr['model'] = model_names

In [5]:
cmap = mpl.colors.LinearSegmentedColormap.from_list("", [(0.1, 0.1, 0.8), (0.6, 0.6, 0.9),"1",(0.9, 0.6, 0.6), (0.8, 0.1, 0.1)])
fig = plt.figure(figsize=[4.7,5.5]) 

plt.pcolor(min_obs_diff_xr.where(p_vals_min_xr<0.05,0).reindex(month=list(reversed(min_obs_diff_xr.month))), cmap=cmap, edgecolor='0.5', lw=2, vmin=-1, vmax=1)
plt.xticks(np.arange(0.5, 5.6, 1), labels=model_print_names, fontsize=14, rotation=90);
plt.yticks(np.arange(0.5, 11.6, 1), labels=month_names_short[::-1], fontsize=14);
plt.title(r'Minimum $\sigma_{obs}$', fontsize=16);

In [7]:
cmap = mpl.colors.LinearSegmentedColormap.from_list("", [(0.1, 0.1, 0.8), (0.6, 0.6, 0.9),"1",(0.9, 0.6, 0.6), (0.8, 0.1, 0.1)])

fig = plt.figure(figsize=[4.7,5.5]) 

plt.pcolor(max_obs_diff_xr.where(p_vals_max_xr<0.05,0).reindex(month=list(reversed(max_obs_diff_xr.month))), cmap=cmap, edgecolor='0.5', lw=2, vmin=-1, vmax=1)
plt.xticks(np.arange(0.5, 5.6, 1), labels=model_print_names, fontsize=14, rotation=90);
plt.yticks(np.arange(0.5, 11.6, 1), labels=month_names_short[::-1], fontsize=14);
plt.title(r'Maximum $\sigma_{obs}$', fontsize=16);

In [6]:
fig = plt.figure(figsize=[4.7,5.5]) 

plt.pcolor(p_vals_xr.max('obs_dataset').transpose(), cmap='plasma', edgecolor='0.5', lw=2, norm=mpl.colors.LogNorm(1e-10, 1))
plt.xticks(np.arange(0.5, 5.6, 1), labels=model_print_names, fontsize=14, rotation=90);
plt.yticks(np.arange(0.5, 11.6, 1), labels=month_names_short, fontsize=14);
plt.colorbar()
plt.title('Highest p-value', fontsize=16);

In [8]:
#combine max and min
#-4=dark blue, -1=light blue, 2=Black, 3=Pink, 4=Red          -4                 -3              -2               -1          0        1         2           3             4
cmap = mpl.colors.LinearSegmentedColormap.from_list("", [(0.1, 0.1, 0.8), (0.1, 0.1, 0.8), (0.1, 0.1, 0.8), (0.1, 0.1, 0.8), "1",     "1",      "0", (0.8, 0.1, 0.1), (0.8, 0.1, 0.1)])


fig = plt.figure(figsize=[4.5,5.5]) 

plt.pcolor((max_obs_diff_xr.where(p_vals_max_xr<0.05,0) + 3*min_obs_diff_xr.where(p_vals_min_xr<0.05,0)).reindex(month=list(reversed(max_obs_diff_xr.month))), cmap=cmap, edgecolor='0.5', lw=2, vmin=-4, vmax=4)
plt.xticks(np.arange(0.5, 5.6, 1), labels=model_print_names, fontsize=14, rotation=90);
plt.yticks(np.arange(0.5, 11.6, 1), labels=month_names_short[::-1], fontsize=14);
plt.title(r'Combined t-test $\sigma_{obs}$', fontsize=16);

In [10]:
#plot p-vales for a given obs dataset
fig = plt.figure(figsize=[4.7,5.5]) 

plt.pcolor(p_vals_xr.sel(obs_dataset='HadISST1').where(p_vals_xr.sel(obs_dataset='HadISST1')<0.05).transpose(), cmap='plasma', edgecolor='0.5', lw=2, norm=mpl.colors.LogNorm(1e-10, 0.05))
plt.xticks(np.arange(0.5, 5.6, 1), labels=model_print_names, fontsize=14, rotation=90);
plt.yticks(np.arange(0.5, 11.6, 1), labels=month_names_short, fontsize=14);
plt.colorbar()
plt.title('HadISST1 p-value', fontsize=16);

In [11]:
#sum up the number of consistnet and inconsistent obs datasets
fig = plt.figure(figsize=[5.5,5.5]) 

# bounds = np.array([0,1,2,3,4])
# norm = mpl.colors.BoundaryNorm(boundaries=bounds, ncolors=4)

plt.pcolor(p_vals_sig.sum('obs_dataset').transpose(), cmap='Greens_r', edgecolor='0.5', lw=2, vmin=0, vmax=4)
plt.xticks(np.arange(0.5, 5.6, 1), labels=model_print_names, fontsize=14, rotation=90);
plt.yticks(np.arange(0.5, 11.6, 1), labels=month_names_short, fontsize=14);
cbar = plt.colorbar(ticks=[0,1,2,3,4]);
cbar.ax.set_ylabel('Number of Observational Datasets', fontsize=16)
plt.title('Not Significantly Different', fontsize=16);

# Comparing observational datasets with eachother

In [180]:
#loop through each dataset and compare to the other 3 datasets, do for each month

month_p_vals = []
for month_ in np.arange(1,13):
    all_obs_datasets = [sigma_obs_individual['CDR'].sel(month=month_), sigma_obs_individual['NT'].sel(month=month_),
                        sigma_obs_individual['BT'].sel(month=month_), sigma_obs_individual['HadISST1'].sel(month=month_)]
    dataset_p_vals = []
    for obs_i in range(4):
        t_stat, p = stats.ttest_1samp(np.delete(all_obs_datasets,obs_i), all_obs_datasets[obs_i])
        dataset_p_vals.append(p)
    
    month_p_vals.append(dataset_p_vals)

In [183]:
obs_p_vals_eachother = xr.DataArray(data=month_p_vals, coords={'month':np.arange(1,13), 'dataset':['CDR','NT','BT','HadISST1']}, dims=['month','dataset'])

In [14]:
fig = plt.figure(figsize=[4.7,5.5]) 

plt.pcolor(obs_p_vals_eachother.where(obs_p_vals_eachother>0.05), cmap='plasma', edgecolor='0.5', lw=2, vmin=0.05, vmax=1)
plt.xticks(np.arange(0.5, 3.6, 1), labels=['CDR','NT','BT','HadISST1'], fontsize=14);
plt.yticks(np.arange(0.5, 11.6, 1), labels=month_names_short, fontsize=14);
plt.colorbar()
plt.title('Observational Datasets p-value Eachother', fontsize=16);

## Consistency of $\bar{\sigma}_{mem}$ standard deviations
**Color scale of above/beow and hatching for too far away**

In [4]:
fig, ax = plt.subplots(1,6,figsize=[20,5]) 

for model_i, model_name in enumerate(model_names):
    data_to_plot = (sigma_obs_individual.drop('SII').drop('Merged').to_array() - (sigma_mem_individual[model_name].mean('member')+2*sigma_mem_individual[model_name].std('member'))).transpose()

    plot_mesh = ax[model_i].pcolor(data_to_plot.reindex(month=list(reversed(data_to_plot.month))), cmap='RdBu_r', edgecolor='0.5', lw=2, vmin=-0.05, vmax=0.05)
    
    if model_i == 5:
        cbar_ax = fig.add_axes([0.15, 0, 0.7, 0.05])
    
        cbar = fig.colorbar(plot_mesh, cax=cbar_ax, ticks=np.arange(-0.05,0.06,0.01), orientation='horizontal')
        cbar.ax.set_xlabel(r'$\sigma_{obs} - (\bar{\sigma}_{mem} + 2 \sigma)$', fontsize=16)
        for t in cbar.ax.get_xticklabels():
             t.set_fontsize(15)

    ax[model_i].pcolor((data_to_plot.where(data_to_plot>0).reindex(month=list(reversed(data_to_plot.month)))), hatch='//', alpha=0.)

    ax[model_i].set_xticks(np.arange(0.5, 3.6, 1));
    ax[model_i].set_yticks(np.arange(0.5, 11.6, 1));
    ax[model_i].set_xticklabels(['CDR','NT','BT','Had'], fontsize=14, rotation=0)
    
    if model_i == 0:
        ax[model_i].set_yticklabels(month_names_short[::-1], fontsize=14)
    else:
        ax[model_i].set_yticklabels(['','','','','','','','','','','',''], fontsize=14)
        
    ax[model_i].set_title(model_print_names[model_i], fontsize=16);

plt.subplots_adjust(wspace=0.05, top=0.85)
plt.suptitle(r'$\sigma_{obs}$ - (Ensemble mean + 2 Standard Deviation of $\sigma_{mem}$)', fontsize=22);

In [3]:
fig, ax = plt.subplots(1,6,figsize=[20,5]) 

for model_i, model_name in enumerate(model_names):
    data_to_plot = (sigma_obs_individual.drop('SII').drop('Merged').to_array() - (sigma_mem_individual[model_name].mean('member')-2*sigma_mem_individual[model_name].std('member'))).transpose()

    plot_mesh = ax[model_i].pcolor(data_to_plot.reindex(month=list(reversed(data_to_plot.month))), cmap='RdBu_r', edgecolor='0.5', lw=2, vmin=-0.05, vmax=0.05)
    
    if model_i == 5:
        cbar_ax = fig.add_axes([0.15, 0, 0.7, 0.05])
    
        cbar = fig.colorbar(plot_mesh, cax=cbar_ax, ticks=np.arange(-0.05,0.06,0.01), orientation='horizontal')
        cbar.ax.set_xlabel(r'$\sigma_{obs} - (\bar{\sigma}_{mem} - 2 \sigma)$', fontsize=16)
        for t in cbar.ax.get_xticklabels():
             t.set_fontsize(15)

    ax[model_i].pcolor((data_to_plot.where(data_to_plot<0).reindex(month=list(reversed(data_to_plot.month)))), hatch='//', alpha=0.)

    ax[model_i].set_xticks(np.arange(0.5, 3.6, 1));
    ax[model_i].set_yticks(np.arange(0.5, 11.6, 1));
    ax[model_i].set_xticklabels(['CDR','NT','BT','Had'], fontsize=14, rotation=0)
    
    if model_i == 0:
        ax[model_i].set_yticklabels(month_names_short[::-1], fontsize=14)
    else:
        ax[model_i].set_yticklabels(['','','','','','','','','','','',''], fontsize=14)
        
    ax[model_i].set_title(model_print_names[model_i], fontsize=16);

plt.subplots_adjust(wspace=0.05, top=0.85)
plt.suptitle(r'$\sigma_{obs}$ - (Ensemble mean - 2 Standard Deviation of $\sigma_{mem}$)', fontsize=22);

## Make consistency plots with 'both'

In [2]:
#both
fig, ax = plt.subplots(1,6,figsize=[20,5]) 

for model_i, model_name in enumerate(model_names):
    
    SD_plus2  = (sigma_obs_individual.drop('SII').drop('Merged').to_array() - (sigma_mem_individual[model_name].mean('member')+2*sigma_mem_individual[model_name].std('member'))).transpose()
    SD_minus2 = (sigma_obs_individual.drop('SII').drop('Merged').to_array() - (sigma_mem_individual[model_name].mean('member')-2*sigma_mem_individual[model_name].std('member'))).transpose()
    
    both_true = xr.ufuncs.logical_and(SD_plus2>0, SD_minus2<0)
    

    data_plot = ax[model_i].pcolor(both_true.where(both_true==False), cmap='binary', edgecolor='0.5', lw=2)
    
    #unacceptably high
    
    ax[model_i].pcolor((sigma_obs_individual.drop('SII').drop('Merged').to_array() - (sigma_mem_individual[model_name].mean('member')-2*sigma_mem_individual[model_name].std('member'))).transpose().where(
               (sigma_obs_individual.drop('SII').drop('Merged').to_array() - (sigma_mem_individual[model_name].mean('member')-2*sigma_mem_individual[model_name].std('member'))).transpose()<0).where(both_true==False), 
                       cmap='Reds', edgecolor='0.5', lw=2, vmin=-999, vmax=-998)
    
    ((SD_plus2.where(SD_plus2>0))*0)+1
    
    #unacceptably low
    ax[model_i].pcolor((sigma_obs_individual.drop('SII').drop('Merged').to_array() - (sigma_mem_individual[model_name].mean('member')+2*sigma_mem_individual[model_name].std('member'))).transpose().where(
               (sigma_obs_individual.drop('SII').drop('Merged').to_array() - (sigma_mem_individual[model_name].mean('member')+2*sigma_mem_individual[model_name].std('member'))).transpose()>0).where(both_true==False), 
                       cmap='Blues', edgecolor='0.5', lw=2, vmin=-999, vmax=-998)
  
    ax[model_i].set_xticks(np.arange(0.5, 3.6, 1));
    ax[model_i].set_yticks(np.arange(0.5, 11.6, 1));
    ax[model_i].set_xticklabels(['CDR','NT','BT','Had'], fontsize=14, rotation=0)
    
    if model_i == 0:
        ax[model_i].set_yticklabels(month_names_short, fontsize=14)
    else:
        ax[model_i].set_yticklabels(['','','','','','','','','','','',''], fontsize=14)
        
    ax[model_i].set_title(model_print_names[model_i], fontsize=16);

plt.subplots_adjust(wspace=0.05, top=0.85)
# plt.suptitle(r'Observational Dataset - (Ensemble mean - 2 Standard Deviations of $\sigma_{mem}$)', fontsize=22);

In [488]:
SD_plus2_xr = []
SD_minus2_xr = []
for model_i, model_name in enumerate(model_names):

    SD_plus2  = (sigma_obs_individual.drop('SII').drop('Merged').to_array() - (sigma_mem_individual[model_name].mean('member')+1.65*sigma_mem_individual[model_name].std('member'))).transpose()
    SD_minus2 = (sigma_obs_individual.drop('SII').drop('Merged').to_array() - (sigma_mem_individual[model_name].mean('member')-1.65*sigma_mem_individual[model_name].std('member'))).transpose()

    SD_plus2_valid = SD_plus2.where(SD_plus2>0,0)
    SD_plus2_xr.append(SD_plus2_valid.where(SD_plus2_valid==0,1))

    SD_minus2_valid = SD_plus2.where(SD_minus2<0,0)
    SD_minus2_xr.append(SD_minus2_valid.where(SD_minus2_valid==0,1))
    
SD_plus2_xr = xr.concat((SD_plus2_xr), dim='model').sum('variable').transpose()
SD_plus2_xr['model'] = model_names
    
SD_minus2_xr = xr.concat((SD_minus2_xr), dim='model').sum('variable').transpose()
SD_minus2_xr['model'] = model_names

In [1]:
                                                   #white black             red          
cmap_red  = mpl.colors.LinearSegmentedColormap.from_list("",["1", "0", "0", "0", (0.8, 0.1, 0.1)])
                                                    #white black             blue          
cmap_blue = mpl.colors.LinearSegmentedColormap.from_list("",["1", "0", "0", "0", (0.1, 0.1, 0.8)])

fig = plt.figure(figsize=[4.5,5.5])

plt.pcolor(SD_plus2_xr.where(SD_plus2_xr>0).reindex(month=list(reversed(SD_plus2_xr.month))), cmap=cmap_blue, vmin=0, vmax=4, edgecolor='0.5', lw=2)
plt.pcolor(SD_minus2_xr.where(SD_minus2_xr>0).reindex(month=list(reversed(SD_minus2_xr.month))), cmap=cmap_red, vmin=0, vmax=4, edgecolor='0.5', lw=2)
# plt.pcolor((SD_minus2_xr<0).where(SD_plus2_xr<0).reindex(month=list(reversed(SD_minus2_xr.month))), cmap='binary', edgecolor='0.5', lw=2)

plt.pcolor(SD_plus2_xr.where(SD_plus2_xr==0).where(SD_minus2_xr==0).reindex(month=list(reversed(SD_plus2_xr.month))), cmap='binary', edgecolor='0.5', lw=2)


plt.xticks(np.arange(0.5, 5.6, 1), labels=model_print_names, fontsize=14, rotation=90);
plt.yticks(np.arange(0.5, 11.6, 1), labels=month_names_short.copy()[::-1],fontsize=14);