# Analysis of CMIP6, ERA5, and CloudSat


# Table of Contents
<ul>
<li><a href="#introduction">1. Introduction</a></li>
<li><a href="#data_wrangling">2. Data Wrangling</a></li>
<li><a href="#exploratory">3. Exploratory Data Analysis</a></li>
<li><a href="#conclusion">4. Conclusion</a></li>
<li><a href="#references">5. References</a></li>
</ul>

# 1. Introduction <a id='introduction'></a>


**Questions**
* How is the cloud phase and snowfall 


> **_NOTE:_** .

# 2. Data Wrangling <a id='data_wrangling'></a>


## Organize my data

- Define a prefix for my project (you may need to adjust it for your own usage on your infrastructure).
    - input folder where all the data used as input to my Jupyter Notebook is stored (and eventually shared)
    - output folder where all the results to keep are stored
    - tool folder where all the tools

The ERA5 0.25deg data is located in the folder `\scratch\franzihe\`, CloudSat at ...



In [None]:
lwp_threshold = 5

In [None]:
import os
import pathlib
import sys
import socket
hostname = socket.gethostname()

abs_path = str(pathlib.Path(hostname).parent.absolute())
WORKDIR = abs_path[:- (len(abs_path.split('/')[-2] + abs_path.split('/')[-1])+1)]


if "mimi" in hostname:
    print(hostname)
    DATA_DIR = "/mn/vann/franzihe/"
    # FIG_DIR = "/uio/kant/geo-geofag-u1/franzihe/Documents/Figures/ERA5/"
    FIG_DIR = f"/uio/kant/geo-geofag-u1/franzihe/Documents/Python/globalsnow/CloudSat_ERA5_CMIP6_analysis/Figures/CS_ERA5_CMIP6_{lwp_threshold}/"
elif "glefsekaldt" in hostname: 
    DATA_DIR = "/home/franzihe/Data/"
    FIG_DIR = "/home/franzihe/Documents/Figures/ERA5/"

INPUT_DATA_DIR = os.path.join(DATA_DIR, 'input')
OUTPUT_DATA_DIR = os.path.join(DATA_DIR, 'output')
UTILS_DIR = os.path.join(WORKDIR, 'utils')
FIG_DIR_mci = os.path.join(FIG_DIR, 'McIlhattan/')

sys.path.append(UTILS_DIR)
# make figure directory
try:
    os.mkdir(FIG_DIR)
except OSError:
    pass

try:
    os.mkdir(FIG_DIR_mci)
except OSError:
    pass

## Import python packages
- `Python` environment requirements: file [requirements_globalsnow.txt](../../requirements_globalsnow.txt) 
- load `python` packages from [imports.py](../../utils/imports.py)
- load `functions` from [functions.py](../../utils/functions.py)


In [None]:
# supress warnings
import warnings
warnings.filterwarnings('ignore') # don't output warnings

# import packages
from imports import(xr, ccrs, cy, plt, glob, cm, fct, np, pd, add_cyclic_point)
# from matplotlib.lines import Line2D
# from matplotlib.patches import Patch
# from sklearn.metrics import r2_score


xr.set_options(display_style='html')

In [None]:
# reload imports
%load_ext autoreload
%autoreload 2

## Open variables
Get the data requried for the analysis. 



In [None]:
dat_in = os.path.join(OUTPUT_DATA_DIR, 'CS_ERA5_CMIP6')

# make output data directory
# try:
#     os.mkdir(dat_out)
# except OSError:
#     pass

In [None]:
ratios = xr.open_mfdataset(glob(f'{dat_in}/ratios_500/*LWP{lwp_threshold}*.nc'))
ratios_mci = xr.open_mfdataset(glob(f'{dat_in}/ratios_500_mci/*LWP{lwp_threshold}*.nc'))

In [None]:
dict_label = {
     # 'lcc_wo_snow': {'cb_label':'FsLCC (%)', 'levels':np.arange(0,110,10), 'vmin': 0, 'vmax':100, 'diff_levels':np.arange(-30,35,5), 'diff_vmin':-30, 'diff_vmax':30},
#      'lcc_w_snow':  {'cb_label':'FoS in sLCCs (%)', 'levels':np.arange(0,110,10), 'vmin': 0, 'vmax':100, 'diff_levels':np.arange(-60,65,5), 'diff_vmin':-60, 'diff_vmax':60},
#      'sf_eff':      {'cb_label':'SE in sLCCs (h$^{-1}$)', 'levels':np.arange(0,5.5,.5), 'vmin':0, 'vmax':5, 'diff_levels':np.arange(-1.2,1.4,.2), 'diff_vmin':-1.2, 'diff_vmax':1.2}#'Relative snowfall efficiency (h$^{-1}$)'
     
     'FLCC' : {'cb_label':'FLCC (%)',             'levels':np.arange(0,105.,5.), 'vmin':0, 'vmax': 100.,   'diff_levels':np.arange(-100,110,10),   'diff_vmin':-100, 'diff_vmax':100},
     'FsLCC': {'cb_label':'FsLCC (%)',            'levels':np.arange(0,105.,5.), 'vmin':0, 'vmax': 100,   'diff_levels':np.arange(-100,110,10),   'diff_vmin':-100, 'diff_vmax':100},
     # 'FoP'  : {'cb_label':'FoP in LCCs (%)',      'levels':np.arange(0,105.,5.), 'vmin':0, 'vmax': 100,   'diff_levels':np.arange(-100,110,10),   'diff_vmin':-100, 'diff_vmax':100},
     'FoS'  : {'cb_label':'FoS in sLCCs (%)',     'levels':np.arange(0,105.,5.), 'vmin':0, 'vmax': 100,   'diff_levels':np.arange(-100,110,10),   'diff_vmin':-100, 'diff_vmax':100},
     # 'pr_eff': {'cb_label':'PE in sLCCs (h$^{-1}$)', 'levels':np.arange(0,550.,50.), 'vmin':0, 'vmax':500,   'diff_levels':np.arange(-120,140,20),   'diff_vmin':-120, 'diff_vmax':120},
     'FLCC-FsLCC': {'cb_label':'FLCC (%), FsLCC (%)',  'levels':np.arange(0,105.,5.), 'vmin':0, 'vmax': 100,   'diff_levels':np.arange(-100,110,10),   'diff_vmin':-100, 'diff_vmax':100},
     'sf_eff': {'cb_label':'SE in sLCCs (h$^{-1}$)','levels':np.arange(0,5.5,.5), 'vmin':0, 'vmax': 5,   'diff_levels':np.arange(-1.2,1.4,.2),   'diff_vmin':-1.2, 'diff_vmax':1.2},
     # 'sf_eff': {'cb_label':'SE in sLCCs (h$^{-1}$)','levels':np.arange(0,9.5,.5), 'vmin':0, 'vmax': 9,   'diff_levels':np.arange(-1.2,1.4,.2),   'diff_vmin':-1.2, 'diff_vmax':1.2},
     }


In [None]:
# calculate linear regression and create dataset with values
linregress = dict()
for var_name in (dict_label.keys()):
    linregress[var_name] = fct.get_linear_regression_hemisphere(ratios, var_name)


_ds = list(linregress.values())
_coord = list(linregress.keys())

regression = xr.concat(objs=_ds, dim=_coord).rename({"concat_dim":"variable"})
regression = regression.reindex({'model':['ERA5', 'MIROC6', 'CanESM5', 'AWI-ESM-1-1-LR',
                                          'MPI-ESM1-2-LR', 'UKESM1-0-LL', 'HadGEM3-GC31-LL', 'CNRM-CM6-1',
                                          'CNRM-ESM2-1', 'IPSL-CM6A-LR', 'IPSL-CM5A2-INCA']})

In [None]:
# calculate linear regression and create dataset with values
linregress_mci = dict()
for var_name in (dict_label.keys()):
    linregress_mci[var_name] = fct.get_linear_regression_hemisphere(ratios_mci, var_name)


_ds = list(linregress_mci.values())
_coord = list(linregress_mci.keys())

regression_mci = xr.concat(objs=_ds, dim=_coord).rename({"concat_dim":"variable"})
regression_mci = regression_mci.reindex({'model':['ERA5', 'MIROC6', 'CanESM5', 'AWI-ESM-1-1-LR',
                                          'MPI-ESM1-2-LR', 'UKESM1-0-LL', 'HadGEM3-GC31-LL', 'CNRM-CM6-1',
                                          'CNRM-ESM2-1', 'IPSL-CM6A-LR', 'IPSL-CM5A2-INCA']})

In [None]:
# .sel(model = ['MIROC6', 'CanESM5', 'AWI-ESM-1-1-LR',
#                                                     'MPI-ESM1-2-LR', 'UKESM1-0-LL', 'HadGEM3-GC31-LL', 'CNRM-CM6-1',
#                                                     'CNRM-ESM2-1', 'IPSL-CM6A-LR', 'IPSL-CM5A2-INCA'])
        

In [None]:
for var_name in (dict_label.keys()):
# for var_name in ['FLCC',]:#      'FLCC-FsLCC', 'FsLCC', 'FoS', 'sf_eff',]:

    fct.plt_spatial_season_var(ratios, var_name, dict_label, FIG_DIR, 45, lwp_threshold)
    fct.plt_spatial_season_var(ratios_mci, var_name, dict_label, FIG_DIR_mci, 66, lwp_threshold)
        
    # plot monthly model variation
    fct.plt_monthly_model_variation(ratios, var_name, dict_label[var_name],FIG_DIR, lwp_threshold)
    fct.plt_monthly_model_variation(ratios_mci, var_name, dict_label[var_name],FIG_DIR_mci, lwp_threshold)
        
    fct.plt_monthly_interannual_variation(ratios, var_name, lwp_threshold, FIG_DIR, dict_label[var_name])
    fct.plt_monthly_interannual_variation(ratios_mci, var_name, lwp_threshold, FIG_DIR_mci, dict_label[var_name])
        
    fct.plt_percent_difference_season(ratios, var_name, lwp_threshold, dict_label[var_name], FIG_DIR)
    fct.plt_percent_difference_season(ratios_mci, var_name, lwp_threshold, dict_label[var_name], FIG_DIR_mci)

    # plot scatter CloudSat vs model
    fct.plt_scatter_obs_model(ratios, regression, var_name, dict_label[var_name], FIG_DIR, lwp_threshold)
    fct.plt_scatter_obs_model(ratios_mci, regression_mci, var_name, dict_label[var_name], FIG_DIR_mci, lwp_threshold)

    # # plot variable for individual model
    # fct.plt_spatial_season_all_models(ratios, var_name, dict_label, FIG_DIR, 45)
    # # fct.plt_spatial_season_all_models(ratios_mci, var_name, dict_label, FIG_DIR_mci, 66)
    
# plot R2 values for all values and both hemispheres
fct.plt_R2_heatmap_season(regression, dict_label, FIG_DIR, lwp_threshold)
fct.plt_R2_heatmap_season(regression_mci, dict_label, FIG_DIR_mci, lwp_threshold)


In [None]:
for var_name in ['FoS',]:#              'sf_eff', 'FLCC','FLCC-FsLCC','FsLCC',:
# plot variable for individual model
    fct.plt_spatial_season_all_models(ratios, var_name, dict_label, FIG_DIR, 45)
    fct.plt_spatial_season_all_models(ratios_mci, var_name, dict_label, FIG_DIR_mci, 66)