# Import

In [1]:
import load_ppe_fun as lp
from tqdm import tqdm
import numpy as np
import pandas as pd
import os
import pickle
import re
import netCDF4 as nc
import itertools

In [27]:
vnum = '0001'
nikki = '2025-05-27'
sim_config = 'rainshaft_Vx_r1'
target_simconfig = 'rainshaft_dt60'

if not os.path.exists(lp.nc_dir):
    os.makedirs(lp.nc_dir)
l_cic = True

n_init = 1

vars_strs = lp.get_dics(lp.output_dir, 'target', target_simconfig, n_init)
mps, nmp = lp.get_mps(lp.output_dir, nikki, sim_config, l_cic, vars_strs)
mps = lp.sort_strings_by_number(mps)
# condevp
# snapshot_var_idx = [4, 86, 119, 120]
# snapshot_var_idx = [115, 116, 117, 118]
# snapshot_var_idx = [135, 136, 137, 138]
# summary_var_idx = [95, 107, 121, 122]
# snapshot_var_idx = [21, 97, 98, 99]
summary_var_idx = [131, 132, 133, 134]
snapshot_var_idx = [135, 136, 137, 138]
# summary_var_idx = [108, 95, 107, 121, 122]

# Reading output

In [28]:
var_interest = snapshot_var_idx + summary_var_idx

vars_vn = [re.search(r'^[A-Z]*[a-z]*', istr[0])[0] for istr in vars_strs]

file_info = {'dir': lp.output_dir, 
             'date': nikki, 
             'version_number': vnum,
             'vars_vn': vars_vn}

nc_dict = {}
data_range = {}
ppe_var = {}
target_var = {}

# load PPE data from BOSS
nc_summary_pkl_fn = lp.output_dir + nikki + '/' + sim_config + '_ncs_' + str(var_interest) + '.pkl'
if os.path.isfile(nc_summary_pkl_fn):
    with open(nc_summary_pkl_fn, 'rb') as file:
        nc_dict = pickle.load(file)
else:
    ic_str = 'cic'
    file_info.update({'sim_config': sim_config})
    for imp, mp in enumerate(tqdm(mps, desc='loading PPEs')):
        file_info['mp_config'] = mp
        nc_dict = lp.load_KiD(file_info, var_interest, nc_dict, data_range, 
                              continuous_ic=True, set_OOB_as_NaN=False, set_NaN_to_0=True)[0]

    # load BIN_TAU
    for combo in itertools.product(*vars_strs):
        ic_str = "".join(combo)
        file_info.update({'sim_config': target_simconfig,
                          'vars_str': list(combo),
                          'date': 'target',
                          'mp_config': 'BIN_TAU'})
        nc_dict, lin_or_log, data_range = lp.load_KiD(file_info, var_interest, nc_dict, data_range, 
                                                      continuous_ic=False, set_OOB_as_NaN=False, set_NaN_to_0=True)

    with open(nc_summary_pkl_fn, 'wb') as file:
        pickle.dump(nc_dict, file)

loading PPEs: 100%|██████████| 10000/10000 [00:24<00:00, 405.38it/s]


# Saving output

## prepping

In [29]:
diag_dt = 300
diag_dz = 100


dt = nc_dict['time'][1] - nc_dict['time'][0]
dz = nc_dict['z'][1] - nc_dict['z'][0]
nt = len(nc_dict['time'])
nz = len(nc_dict['z'])

diag_ts = [x for x in nc_dict['time'][1:] if x % diag_dt == 0]
diag_zs = [x for x in nc_dict['z'][1:] if x % diag_dz == 0]
# diag_zs = np.concatenate((np.arange(0,3000,100), np.arange(3000,max(nc_dict['z'])+1, 250)))

ncase = 1
ncase_respective = [len(i) for i in vars_strs]
# ncase_respective = [11, 3]
for i in ncase_respective:
    ncase *= i
    
dims = {
    'ntime': len(diag_ts),
    'nz': len(diag_zs),
    'scalar_var': 1,
    'ncase': ncase,
    'nppe': len(mps),
}

ncvars = {
    'diag_ts': {
        'data': diag_ts,
        'dims': ('ntime',),
        'units': 's',
    },
    'diag_zs': {
        'data': diag_zs,
        'dims': ('nz',),
        'units': 'm',
    },
}

global_attrs = {
    'description': 'PPE data for ' + sim_config,
    'date_simulated': nikki,
}



## load BOSS

In [30]:
ic_str = 'cic'

for i_init, var_vn in enumerate(vars_vn):
    ncvars[var_vn + '_PPE'] = {}
    ncvars[var_vn + '_PPE']['data'] = np.zeros((len(mps),))


for i_init, var_vn in enumerate(vars_vn):
    ncvars[var_vn + '_PPE']['dims'] = ('nppe',)
    ncvars[var_vn + '_PPE']['units'] = '1/cc'
    for imp, mp in enumerate(mps):
        ncvars[var_vn + '_PPE']['data'][imp] = nc_dict[ic_str][mp][var_vn]
    
# store the parameter data into the netCDF file
header = pd.read_csv(lp.output_dir + nikki + '/' + sim_config + '/' + mp + '/' + 'params.csv').columns
param_names = np.array([a.strip() for a in header])
dims['nparams']=len(header)

ncvars['param_names'] = {}
ncvars['param_names']['dims'] = ('nparams',)
ncvars['param_names']['data'] = param_names
ncvars['param_names']['units'] = ''

ncvars['params_PPE'] = {}
ncvars['params_PPE']['dims'] = ('nppe','nparams',)
ncvars['params_PPE']['units'] = ''
ncvars['params_PPE']['data'] = np.zeros((len(mps), dims['nparams']))
for imp, mp in enumerate(tqdm(mps, desc='loading params')):
    param_df = pd.read_csv(lp.output_dir + nikki + '/' + sim_config + '/' + mp + '/' + 'params.csv')
    ncvars['params_PPE']['data'][imp, :] = np.array(param_df)



for isumm in summary_var_idx:
    var_ename = lp.indvar_ename_set[isumm]
    var_units = lp.indvar_units_set[isumm]
    if var_units != '':
        var_units = var_units[2:-1]
    ncvars['boss_' + var_ename] = {}
    ncvars['boss_' + var_ename]['data'] = np.array([nc_dict[ic_str][mp][var_ename] for mp in mps])
    ncvars['boss_' + var_ename]['dims'] = ('nppe',)
    ncvars['boss_' + var_ename]['units'] = var_units

    ncvars['bin_' + var_ename] = {}

for isnap in snapshot_var_idx:
    var_ename = lp.indvar_ename_set[isnap]
    var_units = lp.indvar_units_set[isnap]
    ncvars['boss_' + var_ename] = {}

    if var_units != '':
        var_units = var_units[2:-1]
    if nc_dict[ic_str][mps[0]][var_ename].shape[0] == nz:
        ncvars['boss_' + var_ename]['dims'] = ('nppe','nz','ntime',)
        ncvars['boss_' + var_ename]['data'] = np.zeros((len(mps), len(diag_zs), len(diag_ts)))
    else:
        ncvars['boss_' + var_ename]['dims'] = ('nppe','ntime',)
        ncvars['boss_' + var_ename]['data'] = np.zeros((len(mps), len(diag_ts)))
    ncvars['boss_' + var_ename]['units'] = var_units

    for imp, mp in enumerate(tqdm(mps, desc='processing PPEs - ' + var_ename)):
        for idgt, diag_t in enumerate(diag_ts):
            it = int(diag_t / dt)
            if nc_dict[ic_str][mp][var_ename].shape[0] == nz:
                for idgz, diag_z in enumerate(diag_zs):
                    iz = int(diag_z / dz) - 1
                    ncvars['boss_' + var_ename]['data'][imp, idgz, idgt] = nc_dict[ic_str][mp][var_ename][iz, it]
            else:
                ncvars['boss_' + var_ename]['data'][imp, idgt] = nc_dict[ic_str][mp][var_ename][it]

loading params: 100%|██████████| 10000/10000 [00:07<00:00, 1348.00it/s]
processing PPEs - V_nc: 100%|██████████| 10000/10000 [00:10<00:00, 955.06it/s]
processing PPEs - V_qc: 100%|██████████| 10000/10000 [00:10<00:00, 948.97it/s]
processing PPEs - V_qx: 100%|██████████| 10000/10000 [00:10<00:00, 949.02it/s]
processing PPEs - V_qy: 100%|██████████| 10000/10000 [00:10<00:00, 950.74it/s]


## load BIN

In [31]:
for isumm in summary_var_idx:
    var_ename = lp.indvar_ename_set[isumm]
    var_units = lp.indvar_units_set[isumm]
    if var_units != '':
        var_units = var_units[2:-1]
    ncvars['bin_' + var_ename] = {}
    ncvars['bin_' + var_ename]['dims'] = ('ncase',)
    ncvars['bin_' + var_ename]['units'] = var_units
    ncvars['bin_' + var_ename]['data'] = np.zeros(ncase)
    icase = 0
    for combo in itertools.product(*vars_strs):
        ic_str = "".join(combo)
        ncvars['bin_' + var_ename]['data'][icase] = nc_dict[ic_str]['BIN_TAU'][var_ename]
        icase += 1

for isnap in snapshot_var_idx:
    var_ename = lp.indvar_ename_set[isnap]
    var_units = lp.indvar_units_set[isnap]
    if var_units != '':
        var_units = var_units[2:-1]
    ncvars['bin_' + var_ename] = {}
    # ic_str inherits from above
    if nc_dict[ic_str]['BIN_TAU'][var_ename].shape[0] == nz:
        ncvars['bin_' + var_ename]['dims'] = ('ncase','nz','ntime',)
        ncvars['bin_' + var_ename]['data'] = np.zeros((ncase, len(diag_zs), len(diag_ts)))
    else:
        ncvars['bin_' + var_ename]['dims'] = ('ncase','ntime',)
        ncvars['bin_' + var_ename]['data'] = np.zeros((ncase, len(diag_ts)))
    ncvars['bin_' + var_ename]['units'] = var_units

    icase = 0
    for combo in itertools.product(*vars_strs):
        ic_str = "".join(combo)
        for idgt, diag_t in enumerate(diag_ts):
            it = int(diag_t / dt)
            if nc_dict[ic_str]['BIN_TAU'][var_ename].shape[0] == nz:
                for idgz, diag_z in enumerate(diag_zs):
                    iz = int(diag_z / dz) - 1
                    ncvars['bin_' + var_ename]['data'][icase, idgz, idgt] = nc_dict[ic_str]['BIN_TAU'][var_ename][iz, it]
            else:
                ncvars['bin_' + var_ename]['data'][icase, idgt] = nc_dict[ic_str]['BIN_TAU'][var_ename][it]
        icase += 1

for var_vn in vars_vn:
    
    ncvars['case_' + var_vn] = {}
    ncvars['case_' + var_vn]['data'] = []
    # ncvars['case_' + var_vn]['units'] = '1/cc'
    ncvars['case_' + var_vn]['dims'] = ('ncase',)
    
for combo in itertools.product(*vars_strs):
    for i_init, var_vn in enumerate(vars_vn):
        ncvars['case_' + var_vn]['data'].append(float(re.search(r'[+-]?\d*\.?\d+', combo[i_init]).group()))

In [32]:
global_attrs['thresholds_eff0'] = []
var_constraints = []
for ivar in summary_var_idx + snapshot_var_idx:
    var_constraints.append(lp.indvar_ename_set[ivar])    
global_attrs['var_constraints'] = np.array(var_constraints)
global_attrs['init_var'] = np.array(vars_vn)
global_attrs['n_init'] = n_init

for isumm in summary_var_idx + snapshot_var_idx:
    var_ename = lp.indvar_ename_set[isumm]
    ncvars['boss_' + var_ename]['data'][ncvars['boss_' + var_ename]['data'] < 0] = 0.
    ncvars['bin_' + var_ename]['data'][ncvars['bin_' + var_ename]['data'] < 0] = 0.
    value_greater_0 = ncvars['boss_' + var_ename]['data'][ncvars['boss_' + var_ename]['data'] > 0]
    global_attrs['thresholds_eff0'].append(np.nanpercentile(value_greater_0, 1))

In [33]:
global_attrs['thresholds_eff0']

[0.0005624346894522508,
 0.016197180191675823,
 0.034984125336011265,
 0.040090908726056446,
 0.14394813805818557,
 1.011835870742798,
 1.3606694531440735,
 1.1478587901592254]

# write data into netcdf

In [34]:
nc_file = nc.Dataset(lp.nc_dir + sim_config + '_N' + str(len(mps)) + '_dt' + str(diag_dt) + '.nc', 'r+', format='NETCDF4')

In [35]:
# Create dimensions (None for unlimited dimension, e.g., time)
for dim_name, dim in dims.items():
    if dim_name not in nc_file.dimensions:
        nc_file.createDimension(dim_name, dim)

# save global attributes
for attr_name, attr_value in global_attrs.items():
    if isinstance(attr_value, list):
        nc_file.setncattr(attr_name, np.array(attr_value))
    else:
        nc_file.setncattr(attr_name, attr_value)

outnc_dict = {}
for var_name, var in ncvars.items():
    if var_name not in nc_file.variables:
        if all(isinstance(item, str) for item in var['data']):
            outnc_dict[var_name] = nc_file.createVariable(var_name, str, var['dims'])
        else:
            outnc_dict[var_name] = nc_file.createVariable(var_name, np.float64, var['dims'])
        if 'data' in var:
            outnc_dict[var_name][:] = var['data']
        try:
            outnc_dict[var_name].units = var['units']
        except:
            outnc_dict[var_name].units = ""

In [36]:
nc_file.close()