# Create selection of data for hydrometeor analysis

In [4]:
import xarray as xr
import numpy as np
import pandas as pd
import glob

In [5]:
# find all directories which have the same name in the folder in exp_path/icon_burga_v1 and exp_path/icon_burga_v0 and store the first 8 digits of the names in a list 
#exp_path = '/data/inscape/icon/experiments/nyalesund/testbed/'
exp_path='../../data/'
exp_name_v1 = 'exp_data_v1/'
exp_name_v0 = 'exp_data_v0/'

# find all directories which have the same name in the folder in exp_path/icon_burga_v1 and exp_path/icon_burga_v0 and store the first 8 digits of the names in a list
exp_dirs_v1 = glob.glob(exp_path+exp_name_v1+'*')
exp_dirs_v0 = glob.glob(exp_path+exp_name_v0+'*')
print(exp_dirs_v1)
# sort the lists
exp_dirs_v1.sort()
exp_dirs_v0.sort()

# create a list with the first 8 digits of the names which come after the last / in the directory names
exp_names_v1 = [exp_dir.split('/')[-1][19:27] for exp_dir in exp_dirs_v1]
exp_names_v0 = [exp_dir.split('/')[-1][19:27] for exp_dir in exp_dirs_v0]
print(exp_names_v1)

# compare the lists and find the common elements
exp_dates = list(set(exp_names_v1) & set(exp_names_v0))

print(len(exp_dates))

# only select the elements which are between 20210701 and 202112031 to decrease data size and to 
#exp_dates = [exp_date for exp_date in exp_dates if int(exp_date) >= 20210801 and int(exp_date) <= 20210805]
print(len(exp_dates))
print(exp_dates)
# delete 20210905 because the Meteogram doesn't exist for this date
#exp_dates.remove('20210905')

['../../data/exp_data_v1/METEOGRAM_patch001_20211026_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211031_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211001_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211019_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211024_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211007_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211018_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211008_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211014_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211009_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211015_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211010_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211012_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211013_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch001_20211027_awipev.nc', '../../data/exp_data_v1/METEOGRAM_patch

In [7]:
# open all the files which are in the common elements list exp_dates
exp_dir_names_v0 = [exp_path+exp_name_v0+'/METEOGRAM_patch001_'+exp_date+'_awipev.nc' for exp_date in exp_dates]
exp_dir_names_v1 = [exp_path+exp_name_v1+'/METEOGRAM_patch001_'+exp_date+'_awipev.nc' for exp_date in exp_dates]
print(exp_dir_names_v0)


ds_v1 = xr.open_mfdataset(exp_dir_names_v1, combine='by_coords', parallel=True)
ds_v0 = xr.open_mfdataset(exp_dir_names_v0, combine='by_coords', parallel=True)

['../../data/exp_data_v0//METEOGRAM_patch001_20211027_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211016_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211006_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211024_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211005_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211031_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211029_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211011_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211022_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211001_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211026_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211009_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211010_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211028_awipev.nc', '../../data/exp_data_v0//METEOGRAM_patch001_20211025_awipev.nc', '../../data/exp_data_v0/

In [8]:
# remove the first three hours each day
ds_v1 = ds_v1.where(ds_v1.time.dt.hour >= 3, drop=True)
ds_v0 = ds_v0.where(ds_v0.time.dt.hour >= 3, drop=True)

# select only the first 100 layers of height and 99 layers of height_2 
ds_v1 = ds_v1.isel(height=slice(50,len(ds_v1.height)), height_2=slice(50,len(ds_v1.height_2)))
ds_v0 = ds_v0.isel(height=slice(50,len(ds_v0.height)), height_2=slice(50,len(ds_v0.height_2)))


In [9]:
# create a dataframe containing QNC, QNI, QNS, QNR, QNG, QNH
df_QN_v1 = pd.DataFrame({'QNC': ds_v1['QNC'].values.flatten(),
                        'QNI': ds_v1['QNI'].values.flatten(),
                        'QNS': ds_v1['QNS'].values.flatten(),
                        'QNR': ds_v1['QNR'].values.flatten(),
                        'QNG': ds_v1['QNG'].values.flatten(),
                        'QNH': ds_v1['QNH'].values.flatten()},
                        columns=['QNC', 'QNI', 'QNS', 'QNR', 'QNG', 'QNH']
                        )


df_QN_v0 = pd.DataFrame({'QNC': ds_v0['QNC'].values.flatten(),
                        'QNI': ds_v0['QNI'].values.flatten(),
                        'QNS': ds_v0['QNS'].values.flatten(),
                        'QNR': ds_v0['QNR'].values.flatten(),
                        'QNG': ds_v0['QNG'].values.flatten(),
                        'QNH': ds_v0['QNH'].values.flatten()},
                        columns=['QNC', 'QNI', 'QNS', 'QNR', 'QNG', 'QNH']
                        )

In [10]:
df_Q_v1 = pd.DataFrame({'QC': ds_v1['QC'].values.flatten(),
                        'QI': ds_v1['QI'].values.flatten(),
                        'QS': ds_v1['QS'].values.flatten(),
                        'QR': ds_v1['QR'].values.flatten(),
                        'QG': ds_v1['QG'].values.flatten(),
                        'QH': ds_v1['QH'].values.flatten()},
                        columns=['QC', 'QI', 'QS', 'QR', 'QG', 'QH']
                        )


df_Q_v0 = pd.DataFrame({'QC': ds_v0['QC'].values.flatten(),
                        'QI': ds_v0['QI'].values.flatten(),
                        'QS': ds_v0['QS'].values.flatten(),
                        'QR': ds_v0['QR'].values.flatten(),
                        'QG': ds_v0['QG'].values.flatten(),
                        'QH': ds_v0['QH'].values.flatten()},
                        columns=['QC', 'QI', 'QS', 'QR', 'QG', 'QH']
                        )


In [11]:
# only select values where there are clouds using sum of df_Q variables
df_QN_cloudy_v1 = df_QN_v1[(df_Q_v1['QC']+df_Q_v1['QI']+df_Q_v1['QS']+df_Q_v1['QR']+df_Q_v1['QG']+df_Q_v1['QH']) >= 1e-8]
df_Q_cloudy_v1 = df_Q_v1[(df_Q_v1['QC']+df_Q_v1['QI']+df_Q_v1['QS']+df_Q_v1['QR']+df_Q_v1['QG']+df_Q_v1['QH']) >= 1e-8]

df_QN_cloudy_v0 = df_QN_v0[(df_Q_v0['QC']+df_Q_v0['QI']+df_Q_v0['QS']+df_Q_v0['QR']+df_Q_v0['QG']+df_Q_v0['QH']) >= 1e-8]
df_Q_cloudy_v0 = df_Q_v0[(df_Q_v0['QC']+df_Q_v0['QI']+df_Q_v0['QS']+df_Q_v0['QR']+df_Q_v0['QG']+df_Q_v0['QH']) >= 1e-8]


In [12]:
#store the dataframe in a csv file
df_QN_cloudy_v1.to_csv('../../data/processed/df_QN_v1_Oct2021.csv')
df_QN_cloudy_v0.to_csv('../../data/processed/df_QN_v0_Oct2021.csv')


In [None]:
df_Q_cloudy_v1.to_csv('../../data/processed/df_Q_v1_Oct2021.csv')
df_Q_cloudy_v0.to_csv('../../data/processed/df_Q_v0_Oct2021.csv')

In [13]:
# close all open datasets
ds_v1.close()
ds_v0.close()
