# Dataframe creation

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import glob

import sys
sys.path.append('../..')
import scripts.icon_mcrph_func as imf


In [2]:
# get data
datapath = '/low_clouds_datasets/polar_day/'

ds = xr.open_mfdataset(glob.glob(datapath+"LLC_wrapper_tend_*_ICONv1_v6.nc")+
                       glob.glob(datapath+"LLC_meteo*_ICONv1_v6.nc"),
                   combine='by_coords', engine='netcdf4', parallel=True, chunks={'time': 'auto'})
# select only levels below 2.5km  
level_idx = 111
ds_levsel = ds.isel(height_2=slice(level_idx,None), height=slice(level_idx,-1))
ds_levsel['W'] = ds_levsel.W.interp(height=ds_levsel.height_2)

# set all non cloud values to nan
ds_levsel = ds_levsel.where((ds_levsel.QC+ds_levsel.QR+ds_levsel.QH+ds_levsel.QI+ds_levsel.QS+ds_levsel.QG)>1e-8, np.nan)


In [3]:
### add saturation with respect to ice to the ds_levsel
ds_levsel['sat_i'] = xr.DataArray(np.zeros(ds_levsel['QV'].shape),
                                dims=('time', 'height_2'))
ds_levsel['sat_i'].values[:-1,:] = imf.saturation_ice(ds_levsel['T'].values[1:,:],
                                                  ds_levsel['QV'].values[:-1,:], ds_levsel['RHO'].values[:-1,:])

### add saturation with respect to water to the ds_levsel
ds_levsel['sat_w'] = xr.DataArray(np.zeros(ds_levsel['QV'].shape),
                                dims=('time', 'height_2'))
ds_levsel['sat_w'].values[:-1,:] = imf.saturation_water(ds_levsel['T'].values[1:,:],
                                                    ds_levsel['QV'].values[:-1,:], ds_levsel['RHO'].values[:-1,:])

# convert T to Celsius
ds_levsel['T'] = ds_levsel['T'] - 273.15

In [4]:
# split sublimation and deposition
# store positive values of depsub_tend in a new variable
ds_levsel['deposition_ice_tend'] = ds_levsel['depsub_ice_tend'].where(ds_levsel['depsub_ice_tend'] > 0)
ds_levsel['sublimation_ice_tend'] = ds_levsel['depsub_ice_tend'].where(ds_levsel['depsub_ice_tend'] < 0)
# do same for snow, graupel and hail
ds_levsel['deposition_snow_tend'] = ds_levsel['depsub_snow_tend'].where(ds_levsel['depsub_snow_tend'] > 0)
ds_levsel['sublimation_snow_tend'] = ds_levsel['depsub_snow_tend'].where(ds_levsel['depsub_snow_tend'] < 0)
ds_levsel['deposition_graupel_tend'] = ds_levsel['depsub_graupel_tend'].where(ds_levsel['depsub_graupel_tend'] > 0)
ds_levsel['sublimation_graupel_tend'] = ds_levsel['depsub_graupel_tend'].where(ds_levsel['depsub_graupel_tend'] < 0)
ds_levsel['deposition_hail_tend'] = ds_levsel['depsub_hail_tend'].where(ds_levsel['depsub_hail_tend'] > 0)
ds_levsel['sublimation_hail_tend'] = ds_levsel['depsub_hail_tend'].where(ds_levsel['depsub_hail_tend'] < 0)
ds_levsel['deposition_atmo_tend'] = ds_levsel['depsub_atmo_tend'].where(ds_levsel['depsub_atmo_tend'] > 0)
ds_levsel['sublimation_atmo_tend'] = ds_levsel['depsub_atmo_tend'].where(ds_levsel['depsub_atmo_tend'] < 0)

# split condensation and evaporation
# store positive values of satad_cloud_tend in a new variable
ds_levsel['condensation_cloud_tend'] = ds_levsel['satad_cloud_tend'].where(ds_levsel['satad_cloud_tend'] > 0)
ds_levsel['evaporation_cloud_tend'] = ds_levsel['satad_cloud_tend'].where(ds_levsel['satad_cloud_tend'] < 0)
ds_levsel['condensation_atmo_tend'] = ds_levsel['satad_atmo_tend'].where(ds_levsel['satad_atmo_tend'] > 0)
ds_levsel['evaporation_atmo_tend'] = ds_levsel['satad_atmo_tend'].where(ds_levsel['satad_atmo_tend'] < 0)


In [5]:

# get all tendencies which mention ice, snow, graupel or hail and not total
frozen_tend = [tend for tend in ds_levsel.data_vars if ('ice' in tend or 'snow' in tend or 'graupel' in tend \
                                                        or 'hail' in tend) and 'total' not in tend \
                                                        and 'depsub' not in tend ]
print(frozen_tend)
# compute sum over all tendencies which mention ice, snow, graupel or hail
#ds_levsel['total_mass_tend']= ds_levsel[frozen_tend].to_array().sum(dim='variable')
# same for liquid water so with rain and cloud
liquid_tend = [tend for tend in ds_levsel.data_vars if ('rain' in tend or 'cloud' in tend) \
                                                       and 'total' not in tend and 'satad' not in tend]
print(liquid_tend)

atmo_tend = [tend for tend in ds_levsel.data_vars if ('atmo' in tend) and 'total' not in tend \
                                 and 'satad' not in tend and 'depsub' not in tend]
print(atmo_tend)

['s_eva_snow_tend', 'g_eva_graupel_tend', 'h_eva_hail_tend', 'c_homfr_ice_tend', 'i_homhet_ice_tend', 'rime_ice_tend', 'rime_snow_tend', 'rime_graupel_tend', 'rime_hail_tend', 'fr_col_ice_tend', 'fr_col_snow_tend', 'fr_col_graupel_tend', 'g_to_h_hail_tend', 'g_to_h_graupel_tend', 'h_col_ice_tend', 'h_col_hail_tend', 'h_col_snow_tend', 'r_freeze_ice_tend', 'r_freeze_hail_tend', 'r_freeze_graupel_tend', 'i_melt_ice_tend', 's_melt_snow_tend', 'g_melt_graupel_tend', 'h_melt_hail_tend', 'deposition_ice_tend', 'sublimation_ice_tend', 'deposition_snow_tend', 'sublimation_snow_tend', 'deposition_graupel_tend', 'sublimation_graupel_tend', 'deposition_hail_tend', 'sublimation_hail_tend']
['auSB_cloud_tend', 'auSB_rain_tend', 'acSB_cloud_tend', 'acSB_rain_tend', 'r_eva_rain_tend', 'c_homfr_cloud_tend', 'i_homhet_cloud_tend', 'rime_cloud_tend', 'rime_rain_tend', 'r_freeze_rain_tend', 'i_melt_rain_tend', 'i_melt_cloud_tend', 's_melt_rain_tend', 'g_melt_rain_tend', 'h_melt_rain_tend', 'ccn_act_cloud

In [6]:

#ds_levsel['total_mass_tend'] = ds_levsel[liquid_tend].to_array().sum(dim='variable')
# frozen processes
deposition_fr_vars = [var for var in frozen_tend if 'deposition' in var ]
sublimation_fr_vars = [var for var in frozen_tend if 'sublimation' in var ]
rime_fr_vars = [var for var in frozen_tend if 'rime' in var ]
homhet_fr_vars = [var for var in frozen_tend if 'homhet' in var ]
c_homfr_fr_vars = [var for var in frozen_tend if 'c_homfr' in var ]
fr_col_fr_vars = [var for var in frozen_tend if 'col_' in var]
fr_eva_fr_vars = [var for var in frozen_tend if 'eva' in var ]
g_to_h_fr_vars = [var for var in frozen_tend if 'g_to_h' in var  ]
r_freeze_fr_vars = [var for var in frozen_tend if 'r_freeze' in var ]
melt_fr_vars = [var for var in frozen_tend if 'melt' in var ]

# liquid processes
au_li_vars= [var for var in liquid_tend if 'auSB' in var ]
ac_li_vars = [var for var in liquid_tend if 'acSB' in var ]
eva_li_vars = [var for var in liquid_tend if 'eva' in var ] # rain evaporation and cloud evaporation both (so includes satad)
c_homfr_li_vars = [var for var in liquid_tend if 'homfr' in var ]
homhet_li_vars = [var for var in liquid_tend if 'homhet' in var ]
rime_li_vars = [var for var in liquid_tend if 'rime' in var ]
r_freeze_li_vars = [var for var in liquid_tend if 'r_freeze' in var ]
melt_li_vars = [var for var in liquid_tend if 'melt' in var ]
cond_li_vars = [var for var in liquid_tend if 'condensation' in var ]
ccn_act_li_vars = [var for var in liquid_tend if 'ccn_act' in var ]

In [7]:
# only select periods where the frozen mass is above 1e-8
frozen_mass = ds_levsel['QI'] + ds_levsel['QS'] + ds_levsel['QG'] + ds_levsel['QH']
liquid_mass  = ds_levsel['QC'] + ds_levsel['QR']
total_mass = frozen_mass + liquid_mass

In [8]:
# togethe in a pandas dataframe
df_fr_tends = pd.DataFrame(ds_levsel[deposition_fr_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten(),
                            columns=['deposition_fr'])
#df_fr_tends['deposition'] = ds_levsel[deposition_fr_vars].to_array().sum(dim="variable").values[total_mass > 1e-8].flatten()
df_fr_tends['sublimation_fr'] = np.abs(ds_levsel[sublimation_fr_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten())
df_fr_tends['rime_fr'] = ds_levsel[rime_fr_vars].to_array().sum(dim="variable").values[total_mass>= 1e-8].flatten()
df_fr_tends['homhet_fr'] = ds_levsel[homhet_fr_vars].to_array().sum(dim="variable").values[total_mass>= 1e-8].flatten()
df_fr_tends['c_homfr_fr'] = ds_levsel[c_homfr_fr_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten()
df_fr_tends['fr_col'] = ds_levsel[fr_col_fr_vars].to_array().sum(dim="variable").values[total_mass>= 1e-8].flatten()
df_fr_tends['fr_eva_fr'] = np.abs(ds_levsel[fr_eva_fr_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten())
df_fr_tends['g_to_h'] = ds_levsel[g_to_h_fr_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten()
df_fr_tends['r_freeze_fr'] = ds_levsel[r_freeze_fr_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten()
df_fr_tends['melt_fr'] = np.abs(ds_levsel[melt_fr_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten())

# compute sum over all tendencies which mention ice, snow, graupel or hail
df_fr_tends['total_fr'] = df_fr_tends.sum(axis=1)
display(df_fr_tends.describe())
# for liquid water
df_li_tends = pd.DataFrame(ds_levsel[au_li_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten(),
                           columns=["auSB"])
df_li_tends['acSB'] = ds_levsel[ac_li_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten()
df_li_tends['evaporation_li'] = np.abs(ds_levsel[eva_li_vars].to_array().sum(dim="variable").values[total_mass>= 1e-8].flatten())
df_li_tends['c_homfr_li'] = np.abs(ds_levsel[c_homfr_li_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten())
df_li_tends['homhet_li'] = np.abs(ds_levsel[homhet_li_vars].to_array().sum(dim="variable").values[total_mass>= 1e-8].flatten())
df_li_tends['rime_li'] = np.abs(ds_levsel[rime_li_vars].to_array().sum(dim="variable").values[total_mass>= 1e-8].flatten())
df_li_tends['r_freeze_li'] = np.abs(ds_levsel[r_freeze_li_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten())
df_li_tends['melt_li'] = ds_levsel[melt_li_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten()
df_li_tends['condensation_li'] = ds_levsel[cond_li_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten()
df_li_tends['ccn_act_li'] = ds_levsel[ccn_act_li_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten()

# total tendency                                                            
df_li_tends['total_li'] = df_li_tends.sum(axis=1)


Unnamed: 0,deposition_fr,sublimation_fr,rime_fr,homhet_fr,c_homfr_fr,fr_col,fr_eva_fr,g_to_h,r_freeze_fr,melt_fr,total_fr
count,5745699.0,5745699.0,5745699.0,5745699.0,5745699.0,5745699.0,5745699.0,5745699.0,5745699.0,5745699.0,5745699.0
mean,5.942106e-10,9.624927e-10,1.739451e-10,1.927201e-15,0.0,5.29181e-26,2.783697e-12,0.0,1.391021e-12,3.533301e-10,2.088155e-09
std,4.525915e-09,1.866228e-08,6.693553e-09,1.16922e-13,0.0,1.168749e-22,1.424589e-10,0.0,2.984849e-11,9.911468e-09,2.340637e-08
min,0.0,0.0,0.0,0.0,0.0,-1.161692e-20,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.397145e-24
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.380609e-15
75%,1.176885e-12,0.0,0.0,0.0,0.0,0.0,7.432085e-34,0.0,0.0,2.534821e-24,2.908991e-11
max,2.442119e-07,1.58071e-06,1.043216e-06,1.453253e-10,0.0,1.211241e-20,4.993368e-08,0.0,3.155673e-09,2.066405e-06,2.071629e-06


In [9]:
# specific humidity
eva_li_qv_vars = [var for var in atmo_tend if 'r_eva' in var or 'evaporation' in var] # rain evaporation and cloud evaporation both (so includes satad)
eva_fr_qv_vars = [var for var in atmo_tend if 's_eva' in var or 'g_eva' in var or 'h_eva' in var or 'sublimation' in var ] # rain evaporation and cloud evaporation both (so includes satad)
sublimation_qv_vars = [var for var in atmo_tend if 'sublimation' in var ]
deposition_qv_vars = [var for var in atmo_tend if 'deposition' in var ]
condensation_qv_vars = [var for var in atmo_tend if 'condensation' in var ]
homhet_qv_vars = [var for var in atmo_tend if 'homhet' in var ]
rime_qv_vars = [var for var in atmo_tend if 'rime' in var ]
fr_col_qv_vars = [var for var in atmo_tend if 'fr_col' in var ]
ccn_act_qv_vars = [var for var in atmo_tend if 'ccn_act' in var ]

# for qv dataframe
df_qv_tends = pd.DataFrame(ds_levsel[eva_li_qv_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten(),
                           columns=["eva_li_qv"])
df_qv_tends['eva_fr_qv'] = ds_levsel[eva_fr_qv_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten()
df_qv_tends['sublimation_qv'] = ds_levsel[sublimation_qv_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten()
df_qv_tends['deposition_qv'] = np.abs(ds_levsel[deposition_qv_vars].to_array().sum(dim="variable").values[total_mass>= 1e-8].flatten())
df_qv_tends['condensation_qv'] = np.abs(ds_levsel[condensation_qv_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten())
df_qv_tends['homhet_qv'] = np.abs(ds_levsel[homhet_qv_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten())
df_qv_tends['rime_qv'] = ds_levsel[rime_qv_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten()
df_qv_tends['fr_col_qv'] = ds_levsel[fr_col_qv_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten()
df_qv_tends['ccn_act_qv'] = ds_levsel[ccn_act_qv_vars].to_array().sum(dim="variable").values[total_mass >= 1e-8].flatten()

# total tendency                                                            
df_qv_tends['total_qv'] = df_qv_tends.sum(axis=1)

In [10]:
# Add T, W, QV to dataframes
df_qv_tends['T'] = ds_levsel['T'].values[total_mass >= 1e-8].flatten()
df_qv_tends['W'] = ds_levsel['W'].values[total_mass >= 1e-8].flatten()
df_qv_tends['QV'] = ds_levsel['QV'].values[total_mass >= 1e-8].flatten()
df_qv_tends['sat_i'] = ds_levsel['sat_i'].values[total_mass >= 1e-8].flatten()
df_qv_tends['sat_w'] = ds_levsel['sat_w'].values[total_mass >= 1e-8].flatten()

df_fr_tends['T'] = ds_levsel['T'].values[total_mass >= 1e-8].flatten()
df_fr_tends['W'] = ds_levsel['W'].values[total_mass >= 1e-8].flatten()
df_fr_tends['QV'] = ds_levsel['QV'].values[total_mass >= 1e-8].flatten()
df_fr_tends['sat_i'] = ds_levsel['sat_i'].values[total_mass >= 1e-8].flatten()
df_fr_tends['sat_w'] = ds_levsel['sat_w'].values[total_mass >= 1e-8].flatten()

df_li_tends['T'] = ds_levsel['T'].values[total_mass >= 1e-8].flatten()
df_li_tends['W'] = ds_levsel['W'].values[total_mass >= 1e-8].flatten()
df_li_tends['QV'] = ds_levsel['QV'].values[total_mass >= 1e-8].flatten()
df_li_tends['sat_i'] = ds_levsel['sat_i'].values[total_mass >= 1e-8].flatten()
df_li_tends['sat_w'] = ds_levsel['sat_w'].values[total_mass >= 1e-8].flatten()


In [11]:
# save as csv
df_fr_tends.to_csv('../../../data/processed/df_fr_tends_MJJA_S21.csv', index=False)
df_li_tends.to_csv('../../../data/processed/df_li_tends_MJJA_S21.csv', index=False)
df_qv_tends.to_csv('../../../data/processed/df_qv_tends_MJJA_S21.csv', index=False)

In [12]:
ds_levsel.W.values.shape

(291011, 39)