# Analysis of CMIP6, ERA5, and CloudSat


# Table of Contents
<ul>
<li><a href="#introduction">1. Introduction</a></li>
<li><a href="#data_wrangling">2. Data Wrangling</a></li>
<li><a href="#exploratory">3. Exploratory Data Analysis</a></li>
<li><a href="#conclusion">4. Conclusion</a></li>
<li><a href="#references">5. References</a></li>
</ul>

# 1. Introduction <a id='introduction'></a>


**Questions**
* How is the cloud phase and snowfall 


> **_NOTE:_** .

# 2. Data Wrangling <a id='data_wrangling'></a>


## Organize my data

- Define a prefix for my project (you may need to adjust it for your own usage on your infrastructure).
    - input folder where all the data used as input to my Jupyter Notebook is stored (and eventually shared)
    - output folder where all the results to keep are stored
    - tool folder where all the tools

The ERA5 0.25deg data is located in the folder `\scratch\franzihe\`, CloudSat at ...



In [None]:
import os
import pathlib
import sys
import socket
hostname = socket.gethostname()

abs_path = str(pathlib.Path(hostname).parent.absolute())
WORKDIR = abs_path[:- (len(abs_path.split('/')[-2] + abs_path.split('/')[-1])+1)]


if "mimi" in hostname:
    print(hostname)
    DATA_DIR = "/scratch/franzihe/"
    # FIG_DIR = "/uio/kant/geo-geofag-u1/franzihe/Documents/Figures/ERA5/"
    FIG_DIR = "/uio/kant/geo-geofag-u1/franzihe/Documents/Python/globalsnow/CloudSat_ERA5_CMIP6_analysis/Figures/CS_ERA5_CMIP6_season_McI/"
elif "glefsekaldt" in hostname: 
    DATA_DIR = "/home/franzihe/Data/"
    FIG_DIR = "/home/franzihe/Documents/Figures/ERA5/"

INPUT_DATA_DIR = os.path.join(DATA_DIR, 'input')
OUTPUT_DATA_DIR = os.path.join(DATA_DIR, 'output')
UTILS_DIR = os.path.join(WORKDIR, 'utils')

sys.path.append(UTILS_DIR)
# make figure directory
try:
    os.mkdir(FIG_DIR)
except OSError:
    pass

## Import python packages
- `Python` environment requirements: file [requirements_globalsnow.txt](../../requirements_globalsnow.txt) 
- load `python` packages from [imports.py](../../utils/imports.py)
- load `functions` from [functions.py](../../utils/functions.py)


In [None]:
# supress warnings
import warnings
warnings.filterwarnings('ignore') # don't output warnings

# import packages
from imports import(xr, ccrs, cy, plt, glob, cm, fct, np, pd, Line2D, Patch, r2_score, LinearSegmentedColormap, BoundaryNorm)
# from matplotlib.lines import Line2D
# from matplotlib.patches import Patch
# from sklearn.metrics import r2_score
from cartopy.util import add_cyclic_point


xr.set_options(display_style='html')

In [None]:
# reload imports
%load_ext autoreload
%autoreload 2

## Open variables
Get the data requried for the analysis. 



In [None]:
dat_in = os.path.join(OUTPUT_DATA_DIR, 'CS_ERA5_CMIP6')

# make output data directory
# try:
#     os.mkdir(dat_out)
# except OSError:
#     pass

In [None]:
# variable_id = ['tas', 'prsn', 'pr', 'lwp', 'clivi', 'areacella']

In [None]:
# Define list of models
list_models = ['cloudsat_250',
               'cloudsat_500',
               'era_30',
               'era_250',
               'era_500',
               'cmip_250',
               'cmip_500',
               'MIROC6', 
               'CanESM5', 
               'AWI-ESM-1-1-LR', 
               'MPI-ESM1-2-LR', 
               'UKESM1-0-LL', 
               'HadGEM3-GC31-LL',
               'CNRM-CM6-1',
               'CNRM-ESM2-1',
               'IPSL-CM6A-LR',
               'IPSL-CM5A2-INCA'
            ]



In [None]:
# Create empty dictionaries to store the Xarray datasets for different variables:
variables = ['orig', '2t', 'lcc', 'lcc_2t', 'lcc_sf', 'lcc_2t_days', 'lcc_2t_sf', ]
ds = {var: {} for var in variables}
ds

In [None]:
for model in list_models:
    
    for var in variables:
        file_pattern = f'{dat_in}/{var}/{model}_*{var}*.nc'
        files = sorted(glob(file_pattern))
        for file in files:
            _ds = xr.open_mfdataset(file)
            # [var][model]
            ds[var][model] = xr.Dataset()
            # make the data cyclic going from -180 to 180
            for var_id in _ds.keys():
                data = _ds[var_id]
                
                if 'lon' in _ds[var_id].dims and (data['lon'][0] != data['lon'][-1]*(-1)):
                    lon = _ds.coords['lon']
                    lon_idx = data.dims.index('lon')
                    wrap_data, wrap_lon = add_cyclic_point(data, coord=lon, axis=lon_idx)
                    
                    if len(wrap_data.shape) == 2:
                        ds[var][model][var_id] = xr.DataArray(data = wrap_data, coords=dict(lat=data['lat'],
                                                                                            lon=np.append(data['lon'].values, data['lon'][0].values*(-1))))
                    
                    if len(wrap_data.shape) == 3:
                        if 'time' in data.dims:
                            ds[var][model][var_id] = xr.DataArray(data = wrap_data, coords=dict(time=data['time'],
                                                                                                lat=data['lat'],
                                                                                                lon=np.append(data['lon'].values, data['lon'][0].values*(-1))))
                        elif 'model' in data.dims:
                            ds[var][model][var_id] = xr.DataArray(data = wrap_data, coords=dict(lat=data['lat'],
                                                                                                lon=np.append(data['lon'].values, data['lon'][0].values*(-1)),
                                                                                                model=data['model']), 
                                                                  )
                    if len(wrap_data.shape) == 4:
                        ds[var][model][var_id] = xr.DataArray(data = wrap_data, coords=dict(time=data['time'],
                                                                                            lat=data['lat'],
                                                                                            lon=np.append(data['lon'].values, data['lon'][0].values*(-1)),
                                                                                            model=data['model']))
                        
                else:
                    ds[var][model][var_id] = data
                    
                ds[var][model][var_id].attrs = data.attrs
                        

            
# Access the datasets using  ds[var][model]
# For example:
# lcc_2t_days_dataset = ds['lcc_2t_days']['era_30']

In [None]:
# def calc_stats_model_variation(ds):
#     # calculate the model variations
    
#     quantiles = (ds.quantile([0.25, 0.5, 0.75], dim='model', skipna=True, keep_attrs=False)).squeeze()
#     iqr = quantiles.sel(quantile=0.75) - quantiles.sel(quantile=0.25)
#     max_val = (quantiles.sel(quantile=0.75) +1.5*iqr).assign_coords({'quantile':'min'})
#     min_val = (quantiles.sel(quantile=0.25) +1.5*iqr).assign_coords({'quantile':'max'})
#     mean = ds.mean(dim='model', skipna=True, keep_attrs=True).assign_coords({'quantile':'mean'})
#     std = ds.std(dim='model', skipna=True, keep_attrs=True).assign_coords({'quantile':'std'})
#     stats = xr.concat([max_val, quantiles, min_val, mean], dim='quantile')
    
#     stats['areacella'] = stats['areacella'].isel(quantile=0).squeeze()
#     stats['days_season'] = stats['days_season'].isel(quantile=0, lat=0).squeeze()
#     stats['days_month'] = stats['days_month'].isel(quantile=0, lat=0).squeeze()
    
#     return(stats)

In [None]:
# for key in ds.keys():
#     ds[key]['cmip_250_stats'] = calc_stats_model_variation(ds[key]['cmip_250'])
#     ds[key]['cmip_500_stats'] = calc_stats_model_variation(ds[key]['cmip_500'])
    


In [None]:
def calculate_stats(data, weights, lat_slice):
        weighted_data = data.sel(lat=lat_slice).weighted(weights)
        mean = weighted_data.mean(('lat', 'lon'), skipna=True, keep_attrs=False)
        std = weighted_data.std(('lat', 'lon'), skipna=True, keep_attrs=False)
        if 'quantile' not in data.coords:
                
                quantiles = weighted_data.quantile([0.25, 0.5, 0.75], dim=('lat', 'lon'), skipna=True, keep_attrs=False)
                iqr = quantiles.sel(quantile=0.75) - quantiles.sel(quantile=0.25)
                max_val = (quantiles.sel(quantile=0.75) + 1.5 * iqr).assign_coords({'quantile': 'min'})
                min_val = (quantiles.sel(quantile=0.25) - 1.5 * iqr).assign_coords({'quantile': 'max'})
                stats = xr.concat([max_val, quantiles, min_val], dim='quantile')
                
        else:
                stats = xr.DataArray()
        
        return mean, std, stats

In [None]:
def weighted_average(data, weights):

    lat_north = slice(45, 90)
    lat_south = slice(-90, -45)
    
    NH_mean, NH_std, NH_stats = calculate_stats(data, weights, lat_north)
    SH_mean, SH_std, SH_stats = calculate_stats(data, weights, lat_south)
    
    mean = xr.concat([NH_mean, SH_mean], pd.Index(['NH', 'SH'], name="hemisphere"))
    std = xr.concat([NH_std, SH_std], pd.Index(['NH', 'SH'], name="hemisphere"))
    stats = xr.concat([NH_stats, SH_stats], pd.Index(['NH', 'SH'], name='hemisphere'))
    
    return mean, std, stats

In [None]:
def get_ratios_season_month(var1, var2, weights, stats, out_var,var3=None ):
    
    ratios = xr.Dataset()
    
    if var3 == None:
        var3 = xr.DataArray()
    
    # fseason = var1.groupby('time.season')
    
    fseason = {'NDJ':xr.concat([var1.sel(time=fct.is_season(var1['time.month'], 11,12)),
                                var1.sel(time=fct.is_season(var1['time.month'], 1,1))], dim='time'),
               'MJJ':var1.sel(time=fct.is_season(var1['time.month'], 5,7)),
               'FMA':var1.sel(time=fct.is_season(var1['time.month'], 2,4)), 
               'ASO':var1.sel(time=fct.is_season(var1['time.month'], 8,10))}
    
    fmonth = var1.groupby('time.month')
    
    if 'time' in var2.dims:
        # nseason = var2.groupby('time.season')
        nseason = {'NDJ':xr.concat([var2.sel(time=fct.is_season(var2['time.month'], 11, 12)),
                                    var2.sel(time=fct.is_season(var2['time.month'], 1, 1))], dim='time'),
                   'MJJ':var2.sel(time=fct.is_season(var2['time.month'], 5, 7)),
                   'FMA':var2.sel(time=fct.is_season(var2['time.month'], 2, 4)),
                   'ASO':var2.sel(time=fct.is_season(var2['time.month'], 8, 10))}
        nmonth = var2.groupby('time.month')

    if stats == 'count':
        # per season
        # if 'season' in var2.dims:
        #     ratios[out_var+'_season'] = fseason.count(dim='time', keep_attrs=False) / var2
            
        if 'time' in var2.dims:
            ratios[out_var+'_season'] = xr.concat([fseason['NDJ'].count(dim='time', keep_attrs=False) / nseason['NDJ'].count(dim='time', keep_attrs=False),
                                                   fseason['MJJ'].count(dim='time', keep_attrs=False) / nseason['MJJ'].count(dim='time', keep_attrs=False),
                                                   fseason['FMA'].count(dim='time', keep_attrs=False) / nseason['FMA'].count(dim='time', keep_attrs=False),
                                                   fseason['ASO'].count(dim='time', keep_attrs=False) / nseason['ASO'].count(dim='time', keep_attrs=False)], 
                                                  dim='season')
            ratios[out_var+'_season'] = ratios[out_var+'_season'].assign_coords({'season':['NDJ', 'MJJ', 'FMA', 'ASO']})
            
            
            ratios[out_var+'_month'] = fmonth.count(dim='time', keep_attrs=False) / nmonth.count(dim='time', keep_attrs=False)
            # all years
            ratios[out_var+'_year'] = var1.count(dim='time', keep_attrs=False) / var2.count(dim='time', keep_attrs=False)
        
            
        # per month
        if 'month' in var3.dims:
            ratios[out_var+'_month'] = fmonth.count(dim='time', keep_attrs=False) / var3
        elif 'time' in var3.dims:
            # all years
            ratios[out_var+'_year'] = var1.count(dim='time', keep_attrs=False) / var3.count(dim='time', keep_attrs=False)
    
    elif stats == 'sum':
        # per season
        ratios[out_var +'_season'] = xr.concat([fseason['NDJ'].sum(dim='time', skipna=True, keep_attrs=False) / nseason['NDJ'].sum(dim='time', skipna=True, keep_attrs=False),
                                                          fseason['MJJ'].sum(dim='time', skipna=True, keep_attrs=False) / nseason['MJJ'].sum(dim='time', skipna=True, keep_attrs=False),
                                                          fseason['FMA'].sum(dim='time', skipna=True, keep_attrs=False) / nseason['FMA'].sum(dim='time', skipna=True, keep_attrs=False),
                                                          fseason['ASO'].sum(dim='time', skipna=True, keep_attrs=False) / nseason['ASO'].sum(dim='time', skipna=True, keep_attrs=False),],
                                                  dim='season',)
        ratios[out_var+'_season'] = ratios[out_var+'_season'].assign_coords({'season':['NDJ', 'MJJ', 'FMA', 'ASO']})
        # per month
        ratios[out_var +'_month']  = (fmonth.sum(dim='time', skipna=True, keep_attrs=False)) / nmonth.sum(dim='time', skipna=True, keep_attrs=False)
        # all years
        ratios[out_var +'_year'] = (var1.sum(dim='time', skipna=True, keep_attrs=False)) / (var2.sum(dim='time', skipna=True, keep_attrs=False))

    elif stats == 'mean':
        # if out_var == 'sf_eff':
        # per season
        ratios[out_var+'_season'] = xr.concat([(xr.concat([(var1/var2).sel(time=fct.is_season(var1['time.month'], 11,12)),
                                                                     (var1/var2).sel(time=fct.is_season(var1['time.month'], 1,1))], dim='time')).mean(dim='time', skipna=True, keep_attrs=False),
                                                         ((var1/var2).sel(time=fct.is_season(var1['time.month'], 5,7))).mean(dim='time', skipna=True, keep_attrs=False),
                                                         ((var1/var2).sel(time=fct.is_season(var1['time.month'], 2,4))).mean(dim='time', skipna=True, keep_attrs=False),
                                                         ((var1/var2).sel(time=fct.is_season(var1['time.month'], 8,10))).mean(dim='time', skipna=True, keep_attrs=False)],
                                                 dim='season')
        ratios[out_var+'_season'] = ratios[out_var+'_season'].assign_coords({'season':['NDJ', 'MJJ', 'FMA', 'ASO']})
        # per month
        ratios[out_var+'_month'] = (var1/var2).groupby('time.month').mean(dim='time', skipna=True, keep_attrs=False)
        
        
        
        ratios[out_var+'_year'] = (var1/var2).mean(dim='time', skipna=True, keep_attrs=False)
    
    ratios[out_var +'_season'] = ratios[out_var +'_season'].where(ratios[out_var +'_season'] != 0., other = np.nan)
    ratios[out_var +'_month'] = ratios[out_var +'_month'].where(ratios[out_var +'_month'] != 0., other = np.nan)
    ratios[out_var +'_year'] = ratios[out_var +'_year'].where(ratios[out_var +'_year'] != 0., other = np.nan)
    
    if out_var != 'sf_eff':
        ratios[out_var +'_season'] = ratios[out_var +'_season']*100
        ratios[out_var +'_month'] = ratios[out_var +'_month']*100
        ratios[out_var +'_year'] = ratios[out_var +'_year']*100
    # for vars in ratios.keys():
    #     ratios[vars+'_mean'], ratios[vars+'_std'], ratios[vars+'_stats'] = weighted_average(ratios[vars], weights)

    return(ratios)

In [None]:
ratios = {}
for model in list_models:
    if model == 'cloudsat_250' or model == 'cloudsat_500':
        ds['lcc_2t_days'][model]['lwp'] = xr.DataArray(np.nan, coords=ds['lcc_2t_days'][model]['sf_avg_lcc_snow'].coords, dims=ds['lcc_2t_days'][model]['sf_avg_lcc_snow'].dims)
        ratios[model] = xr.merge(objects = [
            # get_ratios_season_month(var1 = ds['lcc_2t'][model]['n_lcc'], var2 = ds['2t'][model]['n_cld'], stats = 'sum', out_var = 'lcc_wo_snow', weights = ds['2t'][model]['areacella']),
            get_ratios_season_month(var1 = ds['orig'][model]['n_lcc'], var2 = ds['orig'][model]['n_obs'],stats ='sum', out_var = 'FLCC', weights = ds['orig'][model]['areacella']),
            get_ratios_season_month(var1 = ds['lcc_2t'][model]['n_lcc'], var2 = ds['orig'][model]['n_obs'], stats = 'sum', out_var = 'FsLCC', weights = ds['orig'][model]['areacella']), #out_var = 'lcc_wo_snow', weights = ds['2t'][model]['areacella']),
            get_ratios_season_month(var1 = ds['lcc_sf'][model]['n_sf_lcc_snow'], var2 = ds['orig'][model]['n_lcc'], stats = 'sum', out_var = 'FoP', weights = ds['orig'][model]['areacella']),
            get_ratios_season_month(var1 = ds['lcc_2t_sf'][model]['n_sf_lcc_snow'], var2 = ds['lcc_2t'][model]['n_lcc'], stats='sum', out_var = 'FoS', weights=ds['orig'][model]['areacella']), #out_var='lcc_w_snow', weights=ds['lcc_2t'][model]['areacella']),
            get_ratios_season_month(var1 = ds['lcc_2t_days'][model]['sf_avg_lcc_snow'], var2 = ds['lcc_2t_days'][model]['lwp'], stats = 'mean', out_var = 'sf_eff', weights = ds['orig'][model]['areacella'])
        ])
    else:
        ratios[model] = xr.merge(objects=[
            # get_ratios_season_month(var1=ds['lcc_2t'][model]['lwp'], var2=ds['2t'][model]['twp'].where(ds['2t'][model]['twp']>0.), stats='count', out_var='lcc_wo_snow', weights=ds['2t'][model]['areacella']),              # relative frequency of liquid containing clouds in relation to when there is a cloud
            # get_ratios_season_month(var1=ds['lcc_2t'][model]['lwp'], var2=ds['2t'][model]['tas'], stats='count', out_var='lcc_wo_snow', weights=ds['2t'][model]['areacella']), # sLCC frequency compared to all observations when T<0C
            ## use of 'tas' in var2 as this has values everywhere where data is valid, while 'lwp' or 'prsn' might not have values
            get_ratios_season_month(var1=ds['lcc'][model]['lwp'], var2=ds['orig'][model]['tas'], stats='count', out_var='FLCC', weights=ds['orig'][model]['areacella']),
            get_ratios_season_month(var1=ds['lcc_2t'][model]['lwp'], var2=ds['orig'][model]['tas'], stats='count', out_var='FsLCC', weights=ds['orig'][model]['areacella']),#out_var='lcc_wo_snow', weights=ds['2t'][model]['areacella']), # sLCC frequency compared to all observations when T<0C
            get_ratios_season_month(var1=ds['lcc_sf'][model]['prsn'], var2=ds['orig'][model]['tas'], stats='count', out_var='FoP', weights=ds['orig'][model]['areacella']),
            get_ratios_season_month(var1=ds['lcc_2t_sf'][model]['prsn'], var2=ds['lcc_2t'][model]['lwp'], stats='count', out_var='FoS', weights=ds['orig'][model]['areacella']), #out_var='lcc_w_snow', weights=ds['lcc_2t'][model]['areacella']),   # relative frequency of snowfall from liquid containing clouds
            get_ratios_season_month(var1=ds['lcc_2t_days'][model]['prsn'], var2=ds['lcc_2t_days'][model]['lwp'], stats='mean', out_var='sf_eff', weights=ds['orig'][model]['areacella']),      # relative snowfall (precipitation) efficency
        ])

In [None]:
d = {
     # 'lcc_wo_snow': {'cb_label':'FsLCC (%)', 'levels':np.arange(0,110,10), 'vmin': 0, 'vmax':100, 'diff_levels':np.arange(-30,35,5), 'diff_vmin':-30, 'diff_vmax':30},
#      'lcc_w_snow':  {'cb_label':'FoS in sLCCs (%)', 'levels':np.arange(0,110,10), 'vmin': 0, 'vmax':100, 'diff_levels':np.arange(-60,65,5), 'diff_vmin':-60, 'diff_vmax':60},
#      'sf_eff':      {'cb_label':'SE in sLCCs (h$^{-1}$)', 'levels':np.arange(0,5.5,.5), 'vmin':0, 'vmax':5, 'diff_levels':np.arange(-1.2,1.4,.2), 'diff_vmin':-1.2, 'diff_vmax':1.2}#'Relative snowfall efficiency (h$^{-1}$)'
     
     'FLCC' : {'cb_label':'FLCC (%)',             'levels':np.arange(0,110,10), 'vmin':0, 'vmax': 100,   'diff_levels':np.arange(-100,110,10),   'diff_vmin':-100, 'diff_vmax':100},
     'FsLCC': {'cb_label':'FsLCC (%)',            'levels':np.arange(0,110,10), 'vmin':0, 'vmax': 100,   'diff_levels':np.arange(-100,110,10),   'diff_vmin':-100, 'diff_vmax':100},
     'FoP'  : {'cb_label':'FoP in LCCs (%)',      'levels':np.arange(0,110,10), 'vmin':0, 'vmax': 100,   'diff_levels':np.arange(-100,110,10),   'diff_vmin':-100, 'diff_vmax':100},
     'FoS'  : {'cb_label':'FoS in sLCCs (%)',     'levels':np.arange(0,110,10), 'vmin':0, 'vmax': 100,   'diff_levels':np.arange(-100,110,10),   'diff_vmin':-100, 'diff_vmax':100},
     'sf_eff': {'cb_label':'SE in sLCCs (h$^{-1}$)','levels':np.arange(0,5.5,.5), 'vmin':0, 'vmax': 5,   'diff_levels':np.arange(-1.2,1.4,.2),   'diff_vmin':-1.2, 'diff_vmax':1.2} }


In [None]:
def get_only_valid_values(ratios, res, out_var):
    for time in ['season', 'month', 'year']:
        # for out_var, time in product(d.keys(), times):
        #     for cs_key, era_key, cmip_key in zip(cloudsat_keys, era_keys, cmip_keys):
        cs_key = f'cloudsat_{res}'
        era_key = f'era_{res}'
        cmip_key = f'cmip_{res}'
        
        v1_250 = ratios[cs_key][f'{out_var}_{time}']
        v1_250 = v1_250.where(v1_250 != 0., other = np.nan)
        
        v2_250 = ratios[era_key][f'{out_var}_{time}']
        v2_250 = v2_250.where(v2_250 != 0., other = np.nan)

        
        v3_250 = ratios[cmip_key][f'{out_var}_{time}']
        v3_250 = v3_250.where(v3_250 != 0., other = np.nan)

        v1_era_250 = v1_250.copy()
        v1_era_250 = v1_era_250.where(~np.isnan(v2_250))  
        ratios[cs_key][f'{out_var}_{time}_era'] = v1_era_250

        v1_cmip_250 = v1_250.copy()
        v1_cmip_250 = v1_cmip_250.where(~np.isnan(v3_250))
        v1_cmip_250 = v1_cmip_250.mean('model',skipna=True)
        ratios[cs_key][f'{out_var}_{time}_cmip'] = v1_cmip_250

        v2_250_cs = v2_250.copy()
        v2_250_cs = v2_250_cs.where(~np.isnan(v1_250))
        ratios[era_key][f'{out_var}_{time}_cs'] = v2_250_cs

        v3_250_cs = v3_250.copy()
        v3_250_cs = v3_250_cs.where(~np.isnan(v1_250))
        ratios[cmip_key][f'{out_var}_{time}_cs'] = v3_250_cs
            
        if out_var == 'sf_eff':
            v1_cmip_250 = v2_250.copy()
            v1_cmip_250 = v1_cmip_250.where(~np.isnan(v3_250))
            v1_cmip_250 = v1_cmip_250.mean('model', skipna=True)
            ratios[era_key][f'{out_var}_{time}_cmip'] = v1_cmip_250
            
            v2_250_era = v2_250.copy()
            v2_250_era = v2_250_era.where(~np.isnan(v2_250))
            ratios[era_key][f'{out_var}_{time}_era'] = v2_250_era
            
            v2_250_cmip = v3_250.copy()
            v2_250_cmip = v2_250_cmip.where(~np.isnan(v3_250))
            ratios[cmip_key][f'{out_var}_{time}_cmip'] = v2_250_cmip
            
            v3_250_era = v3_250.copy()
            v3_250_era = v3_250_era.where(~np.isnan(v2_250))
            ratios[cmip_key][f'{out_var}_{time}_era'] = v3_250_era
            

    return (ratios)


In [None]:
for var_name in d.keys():
    ratios = get_only_valid_values(ratios, '250', var_name)
    ratios = get_only_valid_values(ratios, '500', var_name)

In [None]:
# # Calculate weighted averages

for model in ratios.keys():
    weights = ds['2t'][model]['areacella']
    for vars in ratios[model].keys():
        ratios[model][vars+'_mean'], ratios[model][vars+'_std'], ratios[model][vars+'_stats'] = weighted_average(ratios[model][vars], weights)

In [None]:
fig_label = ['a)', 'b)', 'c)', 'd)', 'e)', 'f)', 'g)', 'h)', 'i)', 'j)', 'k)', 'l)', 'm)', 'n)', 'o)', 'p)', 'q)', 'r)', 's)', 't)', 'u)', 'v)', 'w)', 'x)', 'y)', 'z)',
             'aa)', 'bb)', 'cc)', 'dd)', 'ee)', 'ff)', 'gg)', 'hh)', 'ii)', 'jj)', 'kk)', 'll)', 'mm)', 'nn)', 'oo)', 'pp)', 'qq)', 'rr)', 'ss)', 'tt)', 'uu)', 'vv)', 'ww)', 'xx)', 'yy)', 'zz)']



Answer from both of us: you calculate the mean value for each grid cell right? Just calculate the standard deviation over all models per grid cell as well (sigma), then devide sigma by the number of models N, that is your standard error SE = sigma/sqrt(N), your confidence interval CI is CI = 1.96*SE (for 95% confidence level), or CI = 2.58*SE (for 99% confidence level). So if you take the difference with ERA, just check if this difference is larger then that confidence interval CI to find significant pixels
3:18
Some background: https://towardsdatascience.com/confidence-intervals-explained-simply-for-data-scientists-8354a6e2266b

In [None]:
def plt_spatial_season(hemisphere,ds, var_name, dict_label,fig_dir, lat_extent):
  if var_name != 'sf_eff':
    # val1      = ds['era_500'][var_name + '_season']
    val1      = ds['cloudsat_500'][var_name + '_season']
    val1      = val1.where(val1 != 0., other=np.nan)
    val1      = val1.where(~np.isnan(val1))
    # val1_mean = ds['era_500'][var_name + '' +'_season_mean']
    val1_mean = ds['cloudsat_500'][var_name + '' + '_season_mean']
    
    
    # val1_500  = ds['cloudsat_250'][var_name + '_season_era']
    # val1_500  = val1_500.where(val1_500 != 0., other=np.nan)
    # val1_500  = val1_500.where(~np.isnan(val1_500))
    # val1_500_mean = ds['cloudsat_250'][var_name + '' +'_season_era_mean']
    
    val1_500  = ds['cloudsat_500'][var_name + '_season_cmip']
    val1_500  = val1_500.where(val1_500 != 0., other=np.nan)
    val1_500  = val1_500.where(~np.isnan(val1_500))
    val1_500_mean = ds['cloudsat_500'][var_name + '' +'_season_cmip_mean']


    val2_500      = ds['era_500'][var_name + '_season_cs']
    val2_500 = val2_500.where(val2_500 != 0., other=np.nan)
    val2_500 = val2_500.where(~np.isnan(val2_500))
    val2_500_mean = ds['era_500'][var_name + '' + '_season_cs_mean']
    
    diff_era  = val1_500.where(~np.isnan(val2_500)) - val2_500.where(~np.isnan(val1_500))
    diff_era_mean = val1_500_mean - val2_500_mean
    
    val3_500      = ds['cmip_500'][var_name + '_season_cs']
    # val3_500 = val3_500.where(val3_500 != 0., other=np.nan)
    # val3_500 = val3_500.where(~np.isnan(val3_500))
    val3_500_mean = ds['cmip_500'][var_name + '' +'_season_cs_mean']
    
    model_labels = ['CloudSat (500km)', 'CloudSat - ERA5 (500km)', 'CloudSat - CMIP6$_{mean}$ (500km)', ]
  if var_name == 'sf_eff':
    val1      = ds['era_500'][var_name + '_season']
    val1      = val1.where(val1 != 0., other=np.nan)
    val1      = val1.where(~np.isnan(val1))
    val1_mean = ds['era_500'][var_name + '' +'_season_mean']
  
    val1_500  = ds['era_500'][var_name + '_season_cmip']
    val1_500  = val1_500.where(val1_500 != 0., other=np.nan)
    val1_500  = val1_500.where(~np.isnan(val1_500))
    val1_500_mean = ds['era_500'][var_name + '' +'_season_mean']
    
    val2_500 = ds['cmip_500'][var_name + '_season']
    val2_500 = val2_500.where(val2_500 != 0., other = np.nan)
    val2_500 = val2_500.where(~np.isnan(val2_500))
    val2_500_mean = ds['cmip_500'][var_name + '' + '_season_era_mean']
    
    diff_era  = val2_500
    diff_era_mean = val2_500_mean
    
    if 'model' in diff_era.coords:
      # create model mean
      diff_era = diff_era.mean('model', skipna=True, keep_attrs=False)
      diff_era = diff_era.where(diff_era != 0., other=np.nan)
      diff_era = diff_era.where(~np.isnan(diff_era))
      diff_era_mean = diff_era_mean.mean('model',skipna=True, keep_attrs=False)
    
    # diff_era  = val1_500.where(~np.isnan(val2_500)) - val2_500.where(~np.isnan(val1_500))
    # diff_era_mean = val1_500_mean - val2_500_mean
    
    val3_500      = ds['cmip_500'][var_name + '_season_era']
    val3_500 = val3_500.where(val3_500 != 0., other=np.nan)
    # val3_500 = val3_500.where(~np.isnan(val3_500))
    val3_500_mean = ds['cmip_500'][var_name + '' +'_season_era_mean']
    
  
    model_labels = ['ERA5 (500 km)', 'CMIP6$_{mean}$ (500km)', 'ERA5 - CMIP6$_{mean}$ (500km)', ]
  
  # if 'model' in val2_500.coords:
  #   val2_500      = val2_500.mean('model',skipna=True, keep_attrs=False)
  #   val2_500 = val2_500.where(val2_500 != 0., other=np.nan)
  #   val2_500 = val2_500.where(~np.isnan(val2_500))
  #   val2_500_mean = val2_500_mean.mean('model',skipna=True, keep_attrs=False)
  
  diff_cmip = val1_500.where(~np.isnan(val3_500)) - val3_500.where(~np.isnan(val1_500))
  diff_cmip_mean = val1_500_mean - val3_500_mean
  if 'model' in diff_cmip.coords:
    
    # create model mean
    diff_cmip = diff_cmip.mean('model', skipna=True, keep_attrs=False)
    diff_cmip = diff_cmip.where(diff_cmip != 0., other=np.nan)
    diff_cmip = diff_cmip.where(~np.isnan(diff_cmip))
    diff_cmip_mean = diff_cmip_mean.mean('model',skipna=True, keep_attrs=False)
    

    # create model std
    #  https://towardsdatascience.com/confidence-intervals-explained-simply-for-data-scientists-8354a6e2266b
    std = val3_500.std(dim='model',skipna=True, keep_attrs=False)
    std = std.where(std != 0., other =np.nan)
    std = std.where(~np.isnan(std))
    # calculate statistic significance dependend on the model spread
    SE = std/np.sqrt(len(val3_500['model'])) # standard error
    CI = (SE) * 1.96 # for 95% confidence level, or CI=2.58*SE for 99% confidence level
  
  
  

  list1 = [val1, diff_era, diff_cmip]
           #(val1_500 - val2_500.where(~np.isnan(val1_500))), 
           

  list_glob = [val1_mean, diff_era_mean, diff_cmip_mean
               #val1_500_mean - val2_500_mean, 
               ]
  projections = {'NH': ccrs.NorthPolarStereo(central_longitude=0.0, globe=None),
                'SH': ccrs.SouthPolarStereo(central_longitude=0.0, globe=None)}

  projection = projections[hemisphere]
  density=4

  


  f, axsm = plt.subplots(nrows=3, ncols=4, subplot_kw={'projection': projection}, figsize=[12, 9], sharex=True, sharey=True)
  coast = cy.feature.NaturalEarthFeature(category='physical', scale='110m', facecolor='none', name='coastline')

  rows = model_labels
  props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)


  for ax, row in zip(axsm[:, 0], rows):
      ax.text(-0.07, 0.55, row, va='bottom', ha='center', rotation='vertical', rotation_mode='anchor', transform=ax.transAxes, fontweight='bold')

  for ax, k in zip(axsm.flatten(), range(len(fig_label))):
      if hemisphere == 'NH':
          ax.set_extent([-180, 180, 90, lat_extent], ccrs.PlateCarree())
      elif hemisphere == 'SH':
          ax.set_extent([-180, 180, -90, -1*(lat_extent)], ccrs.PlateCarree())
      ax.add_feature(coast, alpha=0.5)
      gl = ax.gridlines(draw_labels=True)
      gl.top_labels = False
      gl.right_labels = False
      ax.text(0.05, 0.95, f'{fig_label[k]}', fontweight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes)


  for ax, k, season in zip(axsm.flatten()[:4], range(len(fig_label)), val1.season):
      ax.set(title=f'season = {season.values}')

  # for i, (value,mean) in enumerate(zip(["CS_30","CS_30 - ERA_30" ,"CS_250 - CMIP_250", "CS_500 - CMIP_500"],
  #                               ["CS_30_glob", "CS_30 - ERA_30_glob" ,"CS_250 - CMIP_250_glob", "CS_500 - CMIP_500_glob"])):
  for i, (value,hemi_glob) in enumerate(zip(list1,list_glob)):
    if i == 0:
      cmap = cm.hawaii_r
      
      cb_label = dict_label['cb_label']
      levels = dict_label['levels']
      vmin = dict_label['vmin'] 
      vmax = dict_label['vmax']
      
      if var_name != 'sf_eff':
        cbaxes = f.add_axes([0.92, 0.65, 0.0125, 0.225])
      if var_name == 'sf_eff':
        cbaxes = f.add_axes([0.92, 0.4, 0.0125, 0.45])
    elif i == 1:
      sub_title = ""
      if var_name != 'sf_eff':
        cmap = cm.bam
        levels = dict_label['diff_levels']
        vmin = dict_label['diff_vmin']
        vmax = dict_label['diff_vmax']
      if var_name == 'sf_eff':
        cmap = cm.hawaii_r
        levels = dict_label['levels']
        vmin = dict_label['vmin']
        vmax = dict_label['vmax']
    elif i == 2:
      cmap = cm.bam
      sub_title = ""
      levels = dict_label['diff_levels']
      vmin = dict_label['diff_vmin']
      vmax = dict_label['diff_vmax']
      if var_name != 'sf_eff':
        cbaxes = f.add_axes([0.92, 0.13, 0.0125, 0.45])
        cb_label = 'CloudSat - Model (%)'
      if var_name == 'sf_eff':
        cbaxes = f.add_axes([0.92, 0.13, 0.0125, 0.2])
        cb_label = 'ERA5 - Model (h$^{-1}$)'
             
    for ax, season in zip(axsm.flatten()[i*4: (i+1)*4+1], val1.season):
      #   print(i, value,hemi_glob, season.values)
        if i == 0:
          sub_title =f'season = {season.values}'

        val = value.sel(lat=slice(45,90)) if hemisphere == 'NH' else value.sel(lat=slice(-90,-45))
        cf = (val.where(~np.isnan(val))).sel(season=season).plot(ax=ax, transform=ccrs.PlateCarree(), extend =None, add_colorbar=False, cmap=cmap, levels=levels, vmin=vmin, vmax=vmax)
        
        val2 = hemi_glob.sel(hemisphere=hemisphere, season=season).values.round(2)
        if var_name != 'sf_eff':
          ax.text(0.05, 0.125, f'{val2:.0f}%',transform=ax.transAxes, fontsize=11, verticalalignment='top',bbox=props)
        if var_name == 'sf_eff':
          ax.text(0.05, 0.125, f'{val2:.1f}'+'h$^{-1}$',transform=ax.transAxes, fontsize=11, verticalalignment='top',bbox=props)
        ax.set_title(sub_title)
        
        # plot statistic not significant 
        # take the difference with ERA, just check if this difference is lower then that confidence interval CI to find insignificant pixels
          
        if i == 2:
          diff = diff_cmip.sel(lat=slice(45,90)) if hemisphere == 'NH' else value.sel(lat=slice(-90,-45))
          (diff.where(np.abs(diff) < CI)).sel(season=season).plot.contourf(ax=ax, transform=ccrs.PlateCarree(), colors='none', hatches=[density*'/',density*'/'],add_colorbar=False,)
          ax.set_title('')
        
    if i == 0 or i == 2:
        cbar = plt.colorbar(cf, cax=cbaxes, shrink=0.5,extend=None, orientation='vertical', label=cb_label)
    
    if i == 2:
      s = f.subplotpars
      # bb = [s.left+.45, s.top - 0.79, (s.right - s.left), 0.05]
      bb = [0.95, 0.09, 0.0125, 0.05]
      axsm.flatten()[i].legend([Patch(facecolor='none', edgecolor='k', hatch=density*'/', label='CI < 95%')
        ],
        ['CI < 95%'],
        bbox_to_anchor=bb,loc=8,ncol=1,borderaxespad=0,fancybox=True,bbox_transform=f.transFigure,
    )
        
        
  plt.tight_layout(pad=0., w_pad=0., h_pad=0.)  

  figname = f'{var_name}_season_{hemisphere}_2007_2010.png'
  plt.savefig(fig_dir + figname, format='png', bbox_inches='tight', transparent=False)

In [None]:
for var_name in d.keys():
    for hemisphere in ['NH', 'SH']:
        plt_spatial_season(hemisphere, ratios, var_name, d[var_name],FIG_DIR, 45)

In [None]:
def plt_monthly_model_variation(ds_dict, var_name, dict_label,fig_dir):
    colors = cm.hawaii(range(0, 256, int(256 / 3) + 1))

    
    
    f, axsm = plt.subplots(nrows=1, ncols=2, sharex=True, sharey=True, figsize=[12, 5])
    ax = axsm.flat

    # bp= [[], []]
    bp= [[],]
    for i, hemisphere in enumerate(['NH', 'SH']):
        # ax[i].hlines(0., 0.25, 12.75, colors='k')
        ax[i].grid(True)
        cs_data = xr.concat([ds_dict['cloudsat_500'][var_name + '_month_mean'].sel(hemisphere=hemisphere),
                             ds_dict['cloudsat_500'][var_name + '_year_mean'].sel(hemisphere=hemisphere).assign_coords(coords={'month':13})], dim='month')
        ax[i].scatter(x=np.arange(1,14), y=cs_data, color='k', marker='o',s=50)
        
        # era_data = ds_dict['era_30'][var_name + '_month_mean'].sel(hemisphere=hemisphere,)
        cmip_key = 'cmip_500'
        if var_name != 'sf_eff':
            era_data = xr.concat([ds_dict['era_500'][var_name + '_month_cs_mean'].sel(hemisphere=hemisphere,),
                                ds_dict['era_500'][var_name + '_year_cs_mean'].sel(hemisphere=hemisphere,).assign_coords(coords={'month':13})], dim='month')
            cmip_data = xr.concat([ds_dict[cmip_key][var_name + '_month_cs_mean'],
                               ds_dict[cmip_key][var_name + '_year_cs_mean'].assign_coords(coords={'month':'years'})], dim='month')
        if var_name == 'sf_eff':
            era_data = xr.concat([ds_dict['era_500'][var_name + '_month_cmip_mean'].sel(hemisphere=hemisphere,),
                                ds_dict['era_500'][var_name + '_year_cmip_mean'].sel(hemisphere=hemisphere,).assign_coords(coords={'month':13})], dim='month')
            cmip_data = xr.concat([ds_dict[cmip_key][var_name + '_month_era_mean'],
                               ds_dict[cmip_key][var_name + '_year_era_mean'].assign_coords(coords={'month':'years'})], dim='month')
        # era_data.plot.line(ax=ax[i],x='month',color=colors[0],marker='o', linestyle=None)#,linewidth=1.5 linestyle=(0, (1, 1)))
        ax[i].scatter(x=np.arange(1.25,14.25), y=era_data, color=colors[0], marker="h", s=50)  
        
        
        # for (j, cmip_key), color in zip(enumerate(['cmip_500', 'cmip_500']), colors[1:]):
        j = 0
        
        color = colors[2]
        
        quantiles = cmip_data.quantile([0.25, 0.5, 0.75], dim=('model'), skipna=True, keep_attrs=False)
        iqr = quantiles.sel(quantile=0.75) - quantiles.sel(quantile=0.25)
        max_val = (quantiles.sel(quantile=0.75) + 1.5 * iqr).assign_coords({'quantile': 'min'})
        min_val = (quantiles.sel(quantile=0.25) - 1.5 * iqr).assign_coords({'quantile': 'max'})
        # means = ds_dict[cmip_key][var_name + '_month_mean'].mean(dim='model', skipna=True).assign_coords({'quantile': 'mean'})
        means = cmip_data.mean(dim='model', skipna=True).assign_coords({'quantile':'mean'})
        stats = xr.concat([max_val, quantiles, min_val, means], dim='quantile')
           
        if j == 0:
                # positions = np.arange(j + 0.75, j+12.75, 1)
                positions = np.arange(j + 0.75, j + 13.75, 1)
        else:
                positions = np.arange(j + 0.25, j + 13.25, 1)
            # boxplot_data = (ds_dict[cmip_key][var_name + '_month_stats'].sel(hemisphere=hemisphere,)).mean('model',skipna=True).transpose('quantile', 'month')
        boxplot_data = stats.sel(hemisphere=hemisphere,).transpose('quantile', 'month')
        bp[j] = ax[i].boxplot(boxplot_data,  positions=positions, widths=0.4, 
                            boxprops=dict(color=color, lw=1.5),
                            medianprops=dict(color=color, lw=1.5),
                            whiskerprops=dict(color=color, lw=1.5),
                            capprops=dict(color=color, lw=1.5),
                            flierprops=dict(marker='+',markeredgecolor=color, markersize=10),
                            showmeans=True, meanprops=dict(marker='D',markerfacecolor=color, markersize=4),
                            patch_artist=True,)
            
        for patch in bp[j]['boxes']:
                patch.set(facecolor=color, alpha=0.5)
            
            
          
            
        ax[i].set_title('Arctic' if hemisphere == 'NH' else 'Antarctic')   
        ax[i].text(0.05, 0.95, f'{fig_label[i]}', fontweight='bold', horizontalalignment='left', verticalalignment='top', transform=ax[i].transAxes)
        ax[i].set_xticks(np.arange(1,14)) 
        ax[i].set_xlim([0, 13.5])


        ax[i].set_xticklabels(np.append(np.arange(1,13), 'years'), fontsize=12)
        ax[i].set_xlabel('Month')
        
        # ax[i].set_yticks(np.arange(-1,1.5,.25))
        if var_name != 'sf_eff':
            ax[i].set_ylim([dict_label['vmin'],dict_label['vmax']])
            ax[i].set_yticks(np.arange(0,110,10))
        if var_name == 'sf_eff':
            ax[i].set_ylim([0,8.])
            ax[i].set_yticks(np.arange(0,8.50,.50))
        
        ax[i].set_ylabel(dict_label['cb_label'] if i==0 else '') 
        
    s = f.subplotpars
    bb = [s.left, s.top - 0.92, (s.right - s.left), 0.05]

    ax[1].legend([
            # Line2D([0], [0], color=colors[0], lw=1.5, label='ERA5 (30 km)', linestyle=(0, (1, 1))),
            Line2D([0], [0], marker='o', color='w', label='CloudSat (500km)', markersize=10, markerfacecolor='k'),
            Line2D([0], [0], marker='h', color='w', label='ERA5 (30 km)',markersize=10, markerfacecolor=colors[0], ),
            bp[0]["boxes"][0],
            # bp[1]["boxes"][0],
        ],
        ['CloudSat (500 km)',
            'ERA5$_{mean}$ (500 km)', #'CMIP6 (500 km)', 
         'CMIP6 (500 km)'],
        bbox_to_anchor=bb,loc=8,ncol=3,mode='expand',borderaxespad=0,fancybox=True,bbox_transform=f.transFigure,
    )
    
    plt.tight_layout(pad=0., w_pad=0., h_pad=0.)  ;
    
    figname = f'{var_name}_monthly_model_variation_2007_2010.png'
    plt.savefig(fig_dir + figname, format='png', bbox_inches='tight', transparent=False)

In [None]:
for var_name in d.keys():
    plt_monthly_model_variation(ratios, var_name, d[var_name], FIG_DIR)

In [None]:
ratios['250'] = xr.concat([ratios['era_250'].assign_coords(coords={'model':'ERA5'}), ratios['cmip_250']], dim=("model"))
ratios['500'] = xr.concat([ratios['era_500'].assign_coords(coords={'model':'ERA5'}), ratios['cmip_500']], dim=("model"))

In [None]:
def calc_linear_regression(df, model, ):
    # To do this we use the polyfit function from Numpy. Polyfit does a least squares polynomial fit over the data that it is given. 
    # We want a linear regression over the data in columns cloudsat and MIROC6 so we pass these as parameters. The final parameter is the 
    # degree of the polynomial. For linear regression the degree is 1.
    d = np.polyfit(df['reference'], df[model],1) # These are the a and b values we were looking for in the linear function formula.
    # We then use the convenience function poly1d to provide us with a function that will do the fitting.
    f = np.poly1d(d) #predict the estimated results
    # We now use the function f to produce our linear regression data and inserting that into a new column called Treg.
    df.insert(2, 'Treg', f(df['reference']))
    # the R-squared value is a number between 0 and 1. And the closer it is to 1 the more accurate your linear regression model is.
    R2 = r2_score(df[model],f(df['reference']))
    return(R2,d)

def calc_linear_regression_hemisphere(ratios_x, ratios_y, season, model, lat_slice, var_name):

    df_NH = pd.DataFrame()
    df_NH['reference'] = ratios_x[f'{var_name}_season'].sel(season=season, lat=lat_slice).to_dataframe()[f'{var_name}_season']
    
    df_NH[model] = ratios_y[f'{var_name}_season'].sel(season=season, model=model, lat=lat_slice).to_dataframe()[f'{var_name}_season']
    df_NH.replace([np.inf, -np.inf], np.nan, inplace=True)
    df_NH.dropna(inplace=True)
    R2_NH, d_NH = calc_linear_regression(df_NH, model)    
    return(df_NH, R2_NH, d_NH)


def calc_scatter_obs_model(ratios, var_name):
    lat_north = slice(45, 90)
    lat_south = slice(-90, -45)

    df_NH = dict()
    R2_NH = dict()
    d_NH = dict()

    df_SH = dict()
    R2_SH = dict()
    d_SH = dict()
    for model in ratios['500'].model.values:
        # print(model)
        df_NH[model] = dict()
        R2_NH[model] = dict()
        d_NH[model] = dict()
        
        df_SH[model] = dict()
        R2_SH[model] = dict()
        d_SH[model] = dict()
        for season in ratios['500'].season.values:
            # print(season)
            if var_name != 'sf_eff':
                df_NH[model][season], R2_NH[model][season], d_NH[model][season] = calc_linear_regression_hemisphere(ratios['cloudsat_500'], 
                                                                                                                                            ratios['500'], 
                                                                                                                                            season, 
                                                                                                                                            model, 
                                                                                                                                            lat_north, var_name)
                
                df_SH[model][season], R2_SH[model][season], d_SH[model][season] = calc_linear_regression_hemisphere(ratios['cloudsat_500'], 
                                                                                                                                            ratios['500'], 
                                                                                                                                            season, 
                                                                                                                                            model, 
                                                                                                                                            lat_south, var_name)
                
            elif var_name == 'sf_eff':
                df_NH[model][season], R2_NH[model][season], d_NH[model][season] = calc_linear_regression_hemisphere(ratios['era_500'], 
                                                                                                                                            ratios['500'], 
                                                                                                                                            season, 
                                                                                                                                            model, 
                                                                                                                                            lat_north, var_name)
                
                df_SH[model][season], R2_SH[model][season], d_SH[model][season] = calc_linear_regression_hemisphere(ratios['era_500'], 
                                                                                                                                            ratios['500'], 
                                                                                                                                            season, 
                                                                                                                                            model, 
                                                                                                                                            lat_south, var_name)
    return(df_NH, R2_NH, d_NH, df_SH, R2_SH, d_SH)


In [None]:
def plt_scatter_obs_model(ratios, var_name, dict_label, fig_dir):
    
    df_NH, R2_NH, d_NH, df_SH, R2_SH, d_SH = calc_scatter_obs_model(ratios, var_name)
    
    if var_name == 'sf_eff':
        df_NH.pop('ERA5')
        # df_SH.pop('ERA5')
    
    f, axsm = plt.subplots(nrows=len(df_NH.keys()), ncols=4, sharex=True, sharey=True, figsize=[15, 37.5])
    colors = cm.hawaii(range(0, 256, int(256 / 3) + 1))

    f.suptitle(dict_label['cb_label'], fontsize=15)

    for ax, k in zip(axsm.flatten(), range(len(fig_label))):
        ax.text(0.05, 0.95, f'{fig_label[k]}', fontweight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes)
        

    for i, model in zip(range(len(df_NH)), df_NH.keys()):
        for ax, season in zip(axsm.flatten()[i*4: (i+1)*4+1], df_NH[model].keys()):
                
                
                if i == 0:
                    ax.set_title(f'season = {season}')
                    
                ax.axline((0,0), slope=1, color='black',linestyle='--', )
                if var_name != 'sf_eff':
                    ax.set_xlim([dict_label['vmin'],dict_label['vmax']])
                    ax.set_ylim([dict_label['vmin'],dict_label['vmax']])
                if var_name == 'sf_eff':
                    ax.set_xlim([0,10])
                    ax.set_ylim([0,10])
                
                df_NH[model][season].plot.scatter(ax = ax, x = 'reference',y=model, label = 'NH', color = colors[0].reshape(1,-1), alpha=0.1, legend=False)
                df_NH[model][season].plot(x='reference', y='Treg',color=colors[0].reshape(1,-1),ax=ax, label= f"y$_N$ = {d_NH[model][season][0].round(2)} x + {d_NH[model][season][1].round(2)}" 
                    "\n" 
                    f"r$^2$ = {R2_NH[model][season].round(2)}")
                
                df_SH[model][season].plot.scatter(ax = ax, x = 'reference',y=model, label = 'SH', color = colors[2].reshape(1,-1), alpha=0.1, legend=False)
                df_SH[model][season].plot(x='reference', y='Treg',color=colors[2].reshape(1,-1),ax=ax, label= f"y$_S$ = {d_SH[model][season][0].round(2)} x + {d_SH[model][season][1].round(2)}" 
                    "\n" 
                    f"r$^2$ = {R2_SH[model][season].round(2)}")
                
                ax.legend(loc='lower right')
                ax.grid(True)
                ax.set_ylabel(model)
                if var_name != 'sf_eff':
                    ax.set_xlabel('CloudSat')
                if var_name == 'sf_eff':
                    ax.set_xlabel('ERA5')
            
                
    f.tight_layout(pad=0., w_pad=0., h_pad=0.)  
    f.subplots_adjust(top=0.96)
    
    figname = f'{var_name}_season_scatter_2007_2010.png'
    plt.savefig(fig_dir + figname, format='png', bbox_inches='tight', transparent=False)




In [None]:
for var_name in d.keys():
    plt_scatter_obs_model(ratios, var_name, d[var_name], FIG_DIR)

In [None]:
df_NH = dict()
df_SH = dict()
df = dict()
for var_name in d.keys():
    _, R2_var_NH, d_var_NH, _, R2_var_SH, d_var_SH = calc_scatter_obs_model(ratios, var_name)
    df_NH[var_name] = (pd.DataFrame({model: np.array(list(R2_var_NH[model].values())) for model in R2_var_NH.keys()}, index=['NDJ', 'MJJ', 'FMA', 'ASO'])).transpose()
    df_SH[var_name] = (pd.DataFrame({model: np.array(list(R2_var_SH[model].values())) for model in R2_var_SH.keys()}, index=['NDJ', 'MJJ', 'FMA', 'ASO'])).transpose()
    
    # df_NH[var_name][""] = np.nan
    # df[var_name] = pd.concat([df_NH[var_name], df_SH[var_name]],axis=1)

In [None]:
# set ERA5 correlation number to nan 
for season in df_NH['sf_eff'].columns:
    df_NH['sf_eff'][season].loc['ERA5'] = np.nan
    df_SH['sf_eff'][season].loc['ERA5'] = np.nan

In [None]:
def plt_R2_heatmap_season(df_NH, df_SH, dict_label, fig_dir):
    # define heatmap colors
    cmap = cm.hawaii_r  # define the colormap
    # extract all colors from the hawaii map
    cmaplist = [cmap(i) for i in range(cmap.N)]
    # create the new map
    cmap = LinearSegmentedColormap.from_list(
        'Custom cmap', cmaplist, cmap.N)

    # define the bins and normalize
    bounds = np.linspace(0, 1, 11)
    norm = BoundaryNorm(bounds, cmap.N)
    
    
    f, axsm = plt.subplots(nrows=1, ncols=len(df_NH)*2, sharex=True, sharey=True, figsize=[15, 5])
    ax = axsm.flatten()

    for ax, var_name, k in zip(axsm.flatten()[::2],dict_label.keys(), fig_label[::2]): 
        im = ax.imshow(df_NH[var_name], cmap=cmap, norm=norm)
        ax.set_title(f'{k} Arctic', )#fontsize=10.)
        
        if k == 'a)':
            ax.set(yticks=range(len(df_NH[var_name].index)), yticklabels=df_NH[var_name].index)
        if var_name == 'FLCC':
            x_position = 0.75  
        elif var_name == 'FsLCC':
            x_position = 0.75
        elif var_name == 'FoP' or var_name == 'FoS' or var_name == 'sf_eff':
            x_position = 0.55
        
        plt.figtext(x_position,-0.11, dict_label[var_name]['cb_label'], fontweight='bold', horizontalalignment='left', verticalalignment='center', transform=ax.transAxes)
        # elif var_name == 'sf_eff':
        #     plt.figtext(0.25,-0.09, dict_label[var_name]['cb_label'], fontweight='bold', horizontalalignment='left', verticalalignment='center', transform=ax.transAxes)
        # elif var_name == 'lcc_w_snow':
        #     plt.figtext(0.35,-0.09, dict_label[var_name]['cb_label'], fontweight='bold', horizontalalignment='left', verticalalignment='center', transform=ax.transAxes)
        ax.set(xticks=range(len(df_NH[var_name].columns)), xticklabels=df_NH[var_name].columns,)
    for ax, var_name, label, k in zip(axsm.flatten()[1::2],d.keys(), d.values(), fig_label[1::2]):     
        im = ax.imshow(df_SH[var_name], cmap=cmap, norm=norm)
        ax.set_title(f'{k} Antarctic')
        ax.set(xticks=range(len(df_SH[var_name].columns)), xticklabels=df_SH[var_name].columns,)

    # add space for colour bar
    f.subplots_adjust(right=0.85)
    cbar_ax = f.add_axes([1.01, 0.15, 0.0125, 0.7])
    
    f.colorbar(im, cax=cbar_ax, cmap=cmap, norm=norm,
        spacing='proportional', ticks=bounds, boundaries=bounds, label=f'r$^2$-values',shrink=0.5)

    

    f.tight_layout(pad=0., w_pad=0.4, h_pad=0.)  
    figname = f'R2_season_2007_2010.png'
    plt.savefig(fig_dir + figname, format='png', bbox_inches='tight', transparent=False)


In [None]:
plt_R2_heatmap_season(df_NH, df_SH, d,FIG_DIR)

In [None]:
def plt_spatial_season_no_diff(hemisphere,ds, var_name, dict_label, fig_dir,lat_extent):
  
  val1      = ds['cloudsat_500'][var_name + '_season']
  val1      = val1.where(val1 != 0., other=np.nan)
  val1      = val1.where(~np.isnan(val1))
  val1_mean = ds['cloudsat_500'][var_name + '' + '_season_mean']
  
  
  val2 = ds['era_500'][var_name + '_season']
  val2 = val2.where(val2 != 0., other=np.nan)
  val2 = val2.where(~np.isnan(val2))
  
  val3      = ds['cmip_500'][var_name + '_season']
  val3 = val3.where(val3 != 0., other=np.nan)
  val3 = val3.where(~np.isnan(val3))

  
      
  if var_name != 'sf_eff':
    
    val2_mean = ds['era_500'][var_name + '' + '_season_cs_mean']
      
    val3_mean = ds['cmip_500'][var_name + '' +'_season_cs_mean']

    
  if var_name == 'sf_eff':
    
    val2_mean = ds['era_500'][var_name + '' + '_season_cmip_mean']
    
    val3_mean = ds['cmip_500'][var_name + '' +'_season_era_mean']
 
  if 'model' in val3.coords:
    
    # create model mean
    val3 = val3.mean('model', skipna=True, keep_attrs=False)
    val3 = val3.where(val3 != 0., other=np.nan)
    val3 = val3.where(~np.isnan(val3))
    val3_mean = val3_mean.mean('model',skipna=True, keep_attrs=False)
 
  
  projections = {'NH': ccrs.NorthPolarStereo(central_longitude=0.0, globe=None),
                'SH': ccrs.SouthPolarStereo(central_longitude=0.0, globe=None)}

  projection = projections[hemisphere]
  density=4

  
  if var_name != 'sf_eff':
    list1 = [val1, val2, val3]
    list_glob = [val1_mean, val2_mean, val3_mean]
    model_labels = ['CloudSat (500km)', 'ERA5 (500km)', 'CMIP6$_{mean}$ (500km)', ]
    f, axsm = plt.subplots(nrows=3, ncols=4, subplot_kw={'projection': projection}, figsize=[12, 9], sharex=True, sharey=True)
    
      
  if var_name == 'sf_eff':
    list1 = [val2, val3]
    list_glob = [val2_mean, val3_mean]
    model_labels = ['ERA5 (500km)', 'CMIP6$_{mean}$ (500km)', ]
    f, axsm = plt.subplots(nrows=2, ncols=4, subplot_kw={'projection': projection}, figsize=[12, 6], sharex=True, sharey=True)
    
  levels = dict_label['levels']
  vmin = dict_label['vmin']
  vmax = dict_label['vmax']
  
  coast = cy.feature.NaturalEarthFeature(category='physical', scale='110m', facecolor='none', name='coastline')

  rows = model_labels
  props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)


  for ax, row in zip(axsm[:, 0], rows):
      ax.text(-0.07, 0.55, row, va='bottom', ha='center', rotation='vertical', rotation_mode='anchor', transform=ax.transAxes, fontweight='bold')

  for ax, k in zip(axsm.flatten(), range(len(fig_label))):
      if hemisphere == 'NH':
          ax.set_extent([-180, 180, 90, lat_extent], ccrs.PlateCarree())
      elif hemisphere == 'SH':
          ax.set_extent([-180, 180, -90, -1*lat_extent], ccrs.PlateCarree())
      ax.add_feature(coast, alpha=0.5)
      gl = ax.gridlines(draw_labels=True)
      gl.top_labels = False
      gl.right_labels = False
      ax.text(0.05, 0.95, f'{fig_label[k]}', fontweight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes)


  for ax, k, season in zip(axsm.flatten()[:4], range(len(fig_label)), val1.season):
      ax.set(title=f'season = {season.values}')

  # for i, (value,mean) in enumerate(zip(["CS_30","CS_30 - ERA_30" ,"CS_250 - CMIP_250", "CS_500 - CMIP_500"],
  #                               ["CS_30_glob", "CS_30 - ERA_30_glob" ,"CS_250 - CMIP_250_glob", "CS_500 - CMIP_500_glob"])):
  for i, (value,hemi_glob) in enumerate(zip(list1,list_glob)):
    cmap = cm.hawaii_r
    cmap = 'rainbow'
          
    if i == 1 or i == 2 or i ==3:# or i==1:
        sub_title = ""
    
          
    for ax, season in zip(axsm.flatten()[i*4: (i+1)*4+1], ratios['cloudsat_250'].season):
      #   print(i, value,hemi_glob, season.values)
        if i == 0:
          sub_title =f'season = {season.values}'

        val = value.sel(lat=slice(45,90)) if hemisphere == 'NH' else value.sel(lat=slice(-90,-45))
        cf = (val.where(~np.isnan(val))).sel(season=season).plot(ax=ax, transform=ccrs.PlateCarree(), extend =None, add_colorbar=False, cmap=cmap, levels=levels, vmin=vmin, vmax=vmax)
        

        val2 = hemi_glob.sel(hemisphere=hemisphere, season=season).values.round(2)
        if var_name != 'sf_eff':
          ax.text(0.05, 0.125, f'{val2:.0f}%',transform=ax.transAxes, fontsize=11, verticalalignment='top',bbox=props)
        if var_name == 'sf_eff':
          ax.text(0.05, 0.125, f'{val2:.1f}'+'h$^{-1}$',transform=ax.transAxes, fontsize=11, verticalalignment='top',bbox=props)
        ax.set_title(sub_title)
        
        
    if i == 1:
      cbaxes = f.add_axes([1.02, 0.25, 0.0125, 0.45])
      cb_label = dict_label['cb_label']
      cbar = plt.colorbar(cf, cax=cbaxes, shrink=0.5,extend=None, orientation='vertical', label=cb_label)
    
    
        
        
  plt.tight_layout(pad=0., w_pad=0., h_pad=0.)  

  figname = f'{var_name}_CS_ERA5_CMIP6_season_{hemisphere}_2007_2010.png'
  plt.savefig(fig_dir + figname, format='png', bbox_inches='tight', transparent=False)

In [None]:
for var_name in d.keys():
    for hemisphere in ['NH', 'SH']:
        # print(hemisphere, var_name, cb_label)
        plt_spatial_season_no_diff(hemisphere, ratios, var_name, d[var_name], FIG_DIR,45)

In [None]:
def plt_spatial_season_FLCC_FsLCC(hemisphere,ds, dict_label, fig_dir,lat_extent):
    val1_res  = ds['cloudsat_500']['FLCC' + '_season']
    val1_res  = val1_res.where(val1_res != 0., other=np.nan)
    val1_res  = val1_res.where(~np.isnan(val1_res))
    val1_res_mean = ds['cloudsat_500']['FLCC' + '' +'_season_mean']
    
    
    val2_res = ds['cloudsat_500']['FsLCC' + '_season']
    val2_res = val2_res.where(val2_res != 0., other=np.nan)
    val2_res = val2_res.where(~np.isnan(val2_res))
    val2_res_mean = ds['cloudsat_500']['FsLCC' + '' + '_season_mean']
    
    model_labels = ['FLCC', 'FsLCC', 'FLCC - FsLCC', ]

    
    diff_res = val1_res.where(~np.isnan(val2_res)) - val2_res.where(~np.isnan(val1_res))
    list1 = [val1_res, val2_res, (diff_res)]
    list_glob = [val1_res_mean, val2_res_mean, val1_res_mean - val2_res_mean]
    
    projections = {'NH': ccrs.NorthPolarStereo(central_longitude=0.0, globe=None),
    'SH': ccrs.SouthPolarStereo(central_longitude=0.0, globe=None)}

    projection = projections[hemisphere]



    f, axsm = plt.subplots(nrows=3, ncols=4, subplot_kw={'projection': projection}, figsize=[12, 9], sharex=True, sharey=True)
    coast = cy.feature.NaturalEarthFeature(category='physical', scale='110m', facecolor='none', name='coastline')

    rows = model_labels
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)


    for ax, row in zip(axsm[:, 0], rows):
        ax.text(-0.07, 0.55, row, va='bottom', ha='center', rotation='vertical', rotation_mode='anchor', transform=ax.transAxes, fontweight='bold')

    for ax, k in zip(axsm.flatten(), range(len(fig_label))):
        if hemisphere == 'NH':
            ax.set_extent([-180, 180, 90, lat_extent], ccrs.PlateCarree())
        elif hemisphere == 'SH':
            ax.set_extent([-180, 180, -90, -1*lat_extent], ccrs.PlateCarree())
        ax.add_feature(coast, alpha=0.5)
        gl = ax.gridlines(draw_labels=True)
        gl.top_labels = False
        gl.right_labels = False
        ax.text(0.05, 0.95, f'{fig_label[k]}', fontweight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes)


    # for ax, k, season in zip(axsm.flatten()[:4], range(len(fig_label)), val1_res.season):
    #     ax.set(title=f'season = {season.values}')

    for i, (value,hemi_glob) in enumerate(zip(list1,list_glob)):
        if i == 0 or i == 1:
            cmap = cm.hawaii_r
            levels = dict_label['levels']
            vmin = dict_label['vmin']
            vmax = dict_label['vmax']
        if i == 0:
            cbaxes = f.add_axes([1.0, 0.41, 0.0125, 0.45])
            cb_label = '(%)'
        elif i == 1:
            sub_title = ""
        elif i == 2:
            cmap = cm.bam
            sub_title = ""
            levels = dict_label['diff_levels']
            vmin = dict_label['diff_vmin']
            vmax = dict_label['diff_vmax']
            
            cbaxes = f.add_axes([1.0, 0.024, 0.0125, 0.3])
            cb_label = 'FLCC - FsLCC (%)'
            
            
        for ax, season in zip(axsm.flatten()[i*4: (i+1)*4+1], ds[f'cloudsat_500'].season):
            if i == 0:
                sub_title =f'season = {season.values}'

            val = value.sel(lat=slice(45,90)) if hemisphere == 'NH' else value.sel(lat=slice(-90,-45))
            cf = (val.where(~np.isnan(val))).sel(season=season).plot(ax=ax, transform=ccrs.PlateCarree(), extend =None, add_colorbar=False, cmap=cmap, levels=levels, vmin=vmin, vmax=vmax)

            
            val2 = hemi_glob.sel(hemisphere=hemisphere, season=season).values.round(2)
            
            ax.text(0.05, 0.125, f'{val2:.0f}%',transform=ax.transAxes, fontsize=11, verticalalignment='top',bbox=props)
            ax.set_title(sub_title)
            
            
        if i == 1 or i == 2:
            cbar = plt.colorbar(cf, cax=cbaxes, shrink=0.5,extend=None, orientation='vertical', label=cb_label)
        
        
            
            
    plt.tight_layout(pad=0., w_pad=0., h_pad=0.)  

    figname = f'FLCC_FsLCC_season_{hemisphere}_2007_2010.png'
    plt.savefig(fig_dir + figname, format='png', bbox_inches='tight', transparent=False)
    
    


In [None]:
for hemisphere in ['NH', 'SH']:
    plt_spatial_season_FLCC_FsLCC(hemisphere,ratios, d['FLCC'], FIG_DIR,45)


In [None]:
def plt_spatial_season_for_model(hemisphere,ds, var_name, dict_label, res, model, fig_dir,lat_extent):
# 
  val2_res      = ds[f'{res}'][var_name + '_season']
  val2_res = val2_res.where(val2_res != 0., other=np.nan)
  val2_res = val2_res.where(~np.isnan(val2_res))
  
  
  if var_name != 'sf_eff':
    val1_res  = ds[f'cloudsat_{res}'][var_name + '_season']
    val1_res  = val1_res.where(val1_res != 0., other=np.nan)
    val1_res  = val1_res.where(~np.isnan(val1_res))
    val1_res_mean = ds[f'cloudsat_{res}'][var_name + '' +'_season_mean']
    
    val2_res_mean = ds[f'{res}'][var_name + '' + '_season_cs_mean']
    
    model_labels = [f'CloudSat ({res}km)', f'{model} ({res}km)', f'CloudSat - {model}', ]

  if var_name == 'sf_eff':
    val1_res  = ds[f'era_{res}'][var_name + '_season']
    val1_res  = val1_res.where(val1_res != 0., other=np.nan)
    val1_res  = val1_res.where(~np.isnan(val1_res))
    val1_res_mean = ds[f'era_{res}'][var_name + '' +'_season_mean']
    
    val2_res_mean = ds[f'{res}'][var_name + '' + '_season_era_mean']
    
    model_labels = [f'ERA5 ({res}km)', f'{model} ({res}km)', f'CloudSat - {model}', ]
    
    
  if 'model' in val2_res.coords:
        if model != 'ERA5':
          # create model std
          std = (val2_res.isel(model=slice(1,11))).std(dim='model',skipna=True, keep_attrs=False)
          std = std.where(std != 0., other =np.nan)
          std = std.where(~np.isnan(std))
          # calculate statistic significance dependend on the model spread
          SE = std/np.sqrt(len(val2_res['model'])) # standard error
          CI = (SE) * 1.96 # for 95% confidence level, or CI=2.58*SE for 99% confidence level
          
        # create model mean
        val2_res = val2_res.sel(model=model)#.mean('model', skipna=True, keep_attrs=False)
        val2_res = val2_res.where(val2_res != 0., other=np.nan)
        val2_res = val2_res.where(~np.isnan(val2_res))
        val2_res_mean = val2_res_mean.sel(model=model)#.mean('model',skipna=True, keep_attrs=False)
        
        
    
  diff_res = val1_res.where(~np.isnan(val2_res)) - val2_res.where(~np.isnan(val1_res))
  list1 = [val1_res, val2_res, (diff_res)]
  list_glob = [val1_res_mean, val2_res_mean, val1_res_mean - val2_res_mean]


  
  

  projections = {'NH': ccrs.NorthPolarStereo(central_longitude=0.0, globe=None),
                'SH': ccrs.SouthPolarStereo(central_longitude=0.0, globe=None)}

  projection = projections[hemisphere]
  density=4



  f, axsm = plt.subplots(nrows=3, ncols=4, subplot_kw={'projection': projection}, figsize=[12, 9], sharex=True, sharey=True)
  coast = cy.feature.NaturalEarthFeature(category='physical', scale='110m', facecolor='none', name='coastline')

  rows = model_labels
  props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)


  for ax, row in zip(axsm[:, 0], rows):
      ax.text(-0.07, 0.55, row, va='bottom', ha='center', rotation='vertical', rotation_mode='anchor', transform=ax.transAxes, fontweight='bold')

  for ax, k in zip(axsm.flatten(), range(len(fig_label))):
      if hemisphere == 'NH':
          ax.set_extent([-180, 180, 90, lat_extent], ccrs.PlateCarree())
      elif hemisphere == 'SH':
          ax.set_extent([-180, 180, -90, -1*lat_extent], ccrs.PlateCarree())
      ax.add_feature(coast, alpha=0.5)
      gl = ax.gridlines(draw_labels=True)
      gl.top_labels = False
      gl.right_labels = False
      ax.text(0.05, 0.95, f'{fig_label[k]}', fontweight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes)


  for ax, k, season in zip(axsm.flatten()[:4], range(len(fig_label)), val1_res.season):
      ax.set(title=f'season = {season.values}')

  for i, (value,hemi_glob) in enumerate(zip(list1,list_glob)):
    if i == 0 or i == 1:
      cmap = cm.hawaii_r
      levels = dict_label['levels']
      vmin = dict_label['vmin']
      vmax = dict_label['vmax']
      if i == 0:
        cbaxes = f.add_axes([0.92, 0.41, 0.0125, 0.45])
        cb_label = dict_label['cb_label']
      elif i == 1:
        sub_title = ""
    elif i == 2:# or i ==3:
        cmap = cm.bam
        sub_title = ""
        levels = dict_label['diff_levels']
        vmin = dict_label['diff_vmin']
        vmax = dict_label['diff_vmax']
        
        cbaxes = f.add_axes([0.92, 0.13, 0.0125, 0.2])
        if var_name != 'sf_eff':
          cb_label = f'CloudSat - {model} (%)'
        if var_name == 'sf_eff':
          cb_label = f'ERA5 - {model} (%)'
          
    for ax, season in zip(axsm.flatten()[i*4: (i+1)*4+1], ds[f'cloudsat_{res}'].season):
      #   print(i, value,hemi_glob, season.values)
        if i == 0:
          sub_title =f'season = {season.values}'

        val = value.sel(lat=slice(45,90)) if hemisphere == 'NH' else value.sel(lat=slice(-90,-45))
        cf = (val.where(~np.isnan(val))).sel(season=season).plot(ax=ax, transform=ccrs.PlateCarree(), extend =None, add_colorbar=False, cmap=cmap, levels=levels, vmin=vmin, vmax=vmax)

        
        val2 = hemi_glob.sel(hemisphere=hemisphere, season=season).values.round(2)
        if var_name != 'sf_eff':
          ax.text(0.05, 0.125, f'{val2:.0f}%',transform=ax.transAxes, fontsize=11, verticalalignment='top',bbox=props)
        if var_name == 'sf_eff':
          ax.text(0.05, 0.125, f'{val2:.1f}'+'h$^{-1}$',transform=ax.transAxes, fontsize=11, verticalalignment='top',bbox=props)
        ax.set_title(sub_title)
        
        # plot statistic not significant 
        # take the difference with ERA, just check if this difference is lower then that confidence interval CI to find insignificant pixels

        if i == 2 and model != 'ERA5':
          diff = diff_res.sel(lat=slice(45,90)) if hemisphere == 'NH' else value.sel(lat=slice(-90,-45))
          (diff.where(np.abs(diff) < CI)).sel(season=season).plot.contourf(ax=ax, transform=ccrs.PlateCarree(), colors='none', hatches=[density*'/',density*'/'],add_colorbar=False,)
          ax.set_title('')
        
    if i == 1 or i == 2:
        cbar = plt.colorbar(cf, cax=cbaxes, shrink=0.5,extend=None, orientation='vertical', label=dict_label['cb_label'])
    
    if i == 2:
      bb = [0.95, 0.09, 0.0125, 0.05]
      axsm.flatten()[i].legend([Patch(facecolor='none', edgecolor='k', hatch=density*'/', label='CI < 95%')
        ],
        ['CI < 95%'],
        bbox_to_anchor=bb,loc=8,ncol=1,borderaxespad=0,fancybox=True,bbox_transform=f.transFigure,
    )
        
        
  plt.tight_layout(pad=0., w_pad=0., h_pad=0.)  

  figname = f'{model}_{var_name}_season_{hemisphere}_2007_2010.png'
  plt.savefig(fig_dir + figname, format='png', bbox_inches='tight', transparent=False)
  
  


In [None]:
# for var_name in d.keys():
#         for model in ratios['500']['model'].values:
#             for hemisphere in ['NH', 'SH']:
#                 if model == 'CanESM5' or model == 'IPSL-CM5A2-INCA':
#                     res = 500
#                 else:
#                     res = 250
#                 plt_spatial_season_for_model(hemisphere,ratios, var_name, d[var_name], res, model, FIG_DIR,45)

### Get only area as McIlhattan et al. (2017)
Arctic circle

In [None]:
FIG_DIR_mci =  os.path.join(FIG_DIR, 'McIlhattan/')

# make figure directory
try:
    os.mkdir(FIG_DIR_mci)
except OSError:
    pass

In [None]:
ds_mci = ds
for var in ds_mci.keys():
    for model in ds_mci[var].keys():
        ds_mci[var][model] = xr.concat([ds[var][model].sel(lat=slice(-90,-66.91)), ds[var][model].sel(lat=slice(66.91,90))], dim='lat')

In [None]:
ratios_mci = {}
for model in list_models:
    if model == 'cloudsat_250' or model == 'cloudsat_500':
        ds_mci['lcc_2t_days'][model]['lwp'] = xr.DataArray(np.nan, coords=ds_mci['lcc_2t_days'][model]['sf_avg_lcc_snow'].coords, dims=ds_mci['lcc_2t_days'][model]['sf_avg_lcc_snow'].dims)
        ratios_mci[model] = xr.merge(objects = [
            # get_ratios_season_month(var1 = ds_mci['lcc_2t'][model]['n_lcc'], var2 = ds_mci['2t'][model]['n_cld'], stats = 'sum', out_var = 'lcc_wo_snow', weights = ds_mci['2t'][model]['areacella']),
            get_ratios_season_month(var1 = ds_mci['orig'][model]['n_lcc'], var2 = ds_mci['orig'][model]['n_obs'],stats ='sum', out_var = 'FLCC', weights = ds_mci['orig'][model]['areacella']),
            get_ratios_season_month(var1 = ds_mci['lcc_2t'][model]['n_lcc'], var2 = ds_mci['orig'][model]['n_obs'], stats = 'sum', out_var = 'FsLCC', weights = ds_mci['orig'][model]['areacella']), #out_var = 'lcc_wo_snow', weights = ds_mci['2t'][model]['areacella']),
            get_ratios_season_month(var1 = ds_mci['lcc_sf'][model]['n_sf_lcc_snow'], var2 = ds_mci['orig'][model]['n_lcc'], stats = 'sum', out_var = 'FoP', weights = ds_mci['orig'][model]['areacella']),
            get_ratios_season_month(var1 = ds_mci['lcc_2t_sf'][model]['n_sf_lcc_snow'], var2 = ds_mci['lcc_2t'][model]['n_lcc'], stats='sum', out_var = 'FoS', weights=ds_mci['orig'][model]['areacella']), #out_var='lcc_w_snow', weights=ds_mci['lcc_2t'][model]['areacella']),
            get_ratios_season_month(var1 = ds_mci['lcc_2t_days'][model]['sf_avg_lcc_snow'], var2 = ds_mci['lcc_2t_days'][model]['lwp'], stats = 'mean', out_var = 'sf_eff', weights = ds_mci['orig'][model]['areacella'])
        ])
    else:
        ratios_mci[model] = xr.merge(objects=[
            # get_ratios_season_month(var1=ds_mci['lcc_2t'][model]['lwp'], var2=ds_mci['2t'][model]['twp'].where(ds_mci['2t'][model]['twp']>0.), stats='count', out_var='lcc_wo_snow', weights=ds_mci['2t'][model]['areacella']),              # relative frequency of liquid containing clouds in relation to when there is a cloud
            # get_ratios_season_month(var1=ds_mci['lcc_2t'][model]['lwp'], var2=ds_mci['2t'][model]['tas'], stats='count', out_var='lcc_wo_snow', weights=ds_mci['2t'][model]['areacella']), # sLCC frequency compared to all observations when T<0C
            ## use of 'tas' in var2 as this has values everywhere where data is valid, while 'lwp' or 'prsn' might not have values
            get_ratios_season_month(var1=ds_mci['lcc'][model]['lwp'], var2=ds_mci['orig'][model]['tas'], stats='count', out_var='FLCC', weights=ds_mci['orig'][model]['areacella']),
            get_ratios_season_month(var1=ds_mci['lcc_2t'][model]['lwp'], var2=ds_mci['orig'][model]['tas'], stats='count', out_var='FsLCC', weights=ds_mci['orig'][model]['areacella']),#out_var='lcc_wo_snow', weights=ds_mci['2t'][model]['areacella']), # sLCC frequency compared to all observations when T<0C
            get_ratios_season_month(var1=ds_mci['lcc_sf'][model]['prsn'], var2=ds_mci['orig'][model]['tas'], stats='count', out_var='FoP', weights=ds_mci['orig'][model]['areacella']),
            get_ratios_season_month(var1=ds_mci['lcc_2t_sf'][model]['prsn'], var2=ds_mci['lcc_2t'][model]['lwp'], stats='count', out_var='FoS', weights=ds_mci['orig'][model]['areacella']), #out_var='lcc_w_snow', weights=ds_mci['lcc_2t'][model]['areacella']),   # relative frequency of snowfall from liquid containing clouds
            get_ratios_season_month(var1=ds_mci['lcc_2t_days'][model]['prsn'], var2=ds_mci['lcc_2t_days'][model]['lwp'], stats='mean', out_var='sf_eff', weights=ds_mci['orig'][model]['areacella']),      # relative snowfall (precipitation) efficency
        ])

In [None]:
for var_name in d.keys():
    ratios_mci = get_only_valid_values(ratios_mci, '250', var_name)
    ratios_mci = get_only_valid_values(ratios_mci, '500', var_name)

In [None]:
# # Calculate weighted averages

for model in ratios_mci.keys():
    weights = ds['2t'][model]['areacella']
    for vars in ratios_mci[model].keys():
        ratios_mci[model][vars+'_mean'], ratios_mci[model][vars+'_std'], ratios_mci[model][vars+'_stats'] = weighted_average(ratios_mci[model][vars], weights)

In [None]:
for var_name in d.keys():
    for hemisphere in ['NH', 'SH']:
        plt_spatial_season(hemisphere, ratios_mci, var_name, d[var_name],FIG_DIR_mci,66)

In [None]:
for hemisphere in ['NH', 'SH']:
    plt_spatial_season_FLCC_FsLCC(hemisphere,ratios_mci, d['FLCC'], FIG_DIR_mci,66)


In [None]:
for var_name in d.keys():
    plt_monthly_model_variation(ratios_mci, var_name, d[var_name], FIG_DIR_mci)

In [None]:
ratios_mci['250'] = xr.concat([ratios_mci['era_250'].assign_coords(coords={'model':'ERA5'}), ratios_mci['cmip_250']], dim=("model"))
ratios_mci['500'] = xr.concat([ratios_mci['era_500'].assign_coords(coords={'model':'ERA5'}), ratios_mci['cmip_500']], dim=("model"))

In [None]:
for var_name in d.keys():
    plt_scatter_obs_model(ratios_mci, var_name, d[var_name], FIG_DIR_mci)

In [None]:
df_NH_mci = dict()
df_SH_mci = dict()
df = dict()
for var_name in d.keys():
    _, R2_var_NH, d_var_NH, _, R2_var_SH, d_var_SH = calc_scatter_obs_model(ratios_mci, var_name)
    df_NH_mci[var_name] = (pd.DataFrame({model: np.array(list(R2_var_NH[model].values())) for model in R2_var_NH.keys()}, index=['NDJ', 'MJJ', 'FMA', 'ASO'])).transpose()
    df_SH_mci[var_name] = (pd.DataFrame({model: np.array(list(R2_var_SH[model].values())) for model in R2_var_SH.keys()}, index=['NDJ', 'MJJ', 'FMA', 'ASO'])).transpose()

In [None]:
# set ERA5 correlation number to nan 
for season in df_NH_mci['sf_eff'].columns:
    df_NH_mci['sf_eff'][season].loc['ERA5'] = np.nan
    df_SH_mci['sf_eff'][season].loc['ERA5'] = np.nan

In [None]:
plt_R2_heatmap_season(df_NH_mci, df_SH_mci, d, FIG_DIR_mci)

In [None]:
for var_name in d.keys():
    for hemisphere in ['NH', 'SH']:
        # print(hemisphere, var_name, cb_label)
        plt_spatial_season_no_diff(hemisphere, ratios_mci, var_name, d[var_name], FIG_DIR_mci,66)

In [None]:
# for var_name in d.keys():
#         for model in ratios_mci['500']['model'].values:
#             for hemisphere in ['NH', 'SH']:
#                 if model == 'CanESM5' or model == 'IPSL-CM5A2-INCA':
#                     res = 500
#                 else:
#                     res = 250
#                 plt_spatial_season_for_model(hemisphere,ratios_mci, var_name, d[var_name], res, model, FIG_DIR_mci,66)