In [1]:
import load_ppe_fun as lp
from tqdm import tqdm
import numpy as np
import pandas as pd
import os
import pickle
import re
import netCDF4 as nc

In [None]:
vnum = '0001'
nikki = '2025-04-22'
sim_config = 'condevp_withcoal'
target_simconfig = 'condevp_withcoal'
nc_dir = 'summary_ncs/'
if not os.path.exists(nc_dir):
    os.makedirs(nc_dir)
l_cic = True

var1_strs, var2_strs = lp.get_dics(lp.output_dir, 'target', target_simconfig)
mps, nmp = lp.get_mps(lp.output_dir, nikki, sim_config, l_cic)
mps = lp.sort_strings_by_number(mps)
# condevp
# snapshot_var_idx = [4, 86, 119, 120]
snapshot_var_idx = [115, 116, 117, 118]
summary_var_idx = [95, 107, 121, 122]

# Reading output

In [3]:
var_interest = snapshot_var_idx + summary_var_idx

file_info = {'dir': lp.output_dir, 
             'date': nikki, 
             'version_number': vnum}

diag_dt = 60
diag_dz = 100

nc_dict = {}
data_range = {}
ppe_var = {}
target_var = {}

var1_str = var1_strs[0]
var2_str = var2_strs[0]

# load PPE data from BOSS
nc_summary_pkl_fn = lp.output_dir + nikki + '/' + sim_config + '_ncs_' + str(var_interest) + '.pkl'
if os.path.isfile(nc_summary_pkl_fn):
    with open(nc_summary_pkl_fn, 'rb') as file:
        nc_dict = pickle.load(file)
else:
    ic_str = 'cic'
    file_info.update({'sim_config': sim_config, 
                      'var1_str': var1_str, 
                      'var2_str': var2_str})
    for imp, mp in enumerate(tqdm(mps, desc='loading PPEs')):
        file_info['mp_config'] = mp
        nc_dict = lp.load_KiD(file_info, var_interest, nc_dict, data_range, 
                              continuous_ic=True, set_OOB_as_NaN=False, set_NaN_to_0=True)[0]

    # load BIN_TAU
    for var1_str in var1_strs:
        for var2_str in var2_strs:
            mp = 'BIN_TAU'
            file_info.update({'sim_config': target_simconfig, 
                              'date': 'target',
                              'mp_config': mp,
                              'var1_str': var1_str, 
                              'var2_str': var2_str})
            nc_dict = lp.load_KiD(file_info, var_interest, nc_dict, data_range, 
                                  continuous_ic=False, set_OOB_as_NaN=False, set_NaN_to_0=True)[0]

    with open(nc_summary_pkl_fn, 'wb') as file:
        pickle.dump(nc_dict, file)

loading PPEs: 100%|██████████| 10000/10000 [01:12<00:00, 138.36it/s]


# Saving output

## prepping

In [4]:
dt = nc_dict['time'][1] - nc_dict['time'][0]
dz = nc_dict['z'][1] - nc_dict['z'][0]
nt = len(nc_dict['time'])
nz = len(nc_dict['z'])

diag_ts = [x for x in nc_dict['time'][1:] if x % diag_dt == 0]
diag_zs = [z for z in nc_dict['z'][1:] if z % diag_dz == 0]

dims = {
    'ntime': len(diag_ts),
    'nz': len(diag_zs),
    'scalar_var': 1,
    'ncase': len(var1_strs) * len(var2_strs),
    'nppe': len(mps),
    'nparams': 40,
}

ncvars = {
    'diag_ts': {
        'data': diag_ts,
        'dims': ('ntime',),
        'units': 's',
    },
    'diag_zs': {
        'data': diag_zs,
        'dims': ('nz',),
        'units': 'm',
    },
}

global_attrs = {
    'description': 'PPE data for ' + sim_config,
    'date_created': nikki,
}



## load BOSS

In [5]:
ic_str = 'cic'

ncvars['w_PPE'] = {}
ncvars['Na_PPE'] = {}
ncvars['Na_PPE']['data'] = np.zeros((len(mps),))
ncvars['w_PPE']['data'] = np.zeros((len(mps),))

for imp, mp in enumerate(mps):
    ncvars['Na_PPE']['dims'] = ('nppe',)
    ncvars['Na_PPE']['units'] = '1/cc'
    ncvars['Na_PPE']['data'][imp] = nc_dict[ic_str][mp]['Na']
    ncvars['w_PPE']['dims'] = ('nppe',)
    ncvars['w_PPE']['units'] = 'm/s'
    ncvars['w_PPE']['data'][imp] = nc_dict[ic_str][mp]['w']

# store the parameter data into the netCDF file
ncvars['params_PPE'] = {}
ncvars['params_PPE']['dims'] = ('nppe','nparams',)
ncvars['params_PPE']['units'] = ''
ncvars['params_PPE']['data'] = np.zeros((len(mps), dims['nparams']))
for imp, mp in enumerate(tqdm(mps, desc='loading params')):
    param_df = pd.read_csv(lp.output_dir + nikki + '/' + sim_config + '/' + mp + '/' + 'params.csv')
    ncvars['params_PPE']['data'][imp, :] = np.array(param_df)

header = pd.read_csv(lp.output_dir + nikki + '/' + sim_config + '/' + mp + '/' + 'params.csv').columns
param_names = np.array([a.strip() for a in header])

ncvars['param_names'] = {}
ncvars['param_names']['dims'] = ('nparams',)
ncvars['param_names']['data'] = param_names
ncvars['param_names']['units'] = ''

for isumm in summary_var_idx:
    var_ename = lp.indvar_ename_set[isumm]
    var_units = lp.indvar_units_set[isumm]
    if var_units != '':
        var_units = var_units[2:-1]
    ncvars['boss_' + var_ename] = {}
    ncvars['boss_' + var_ename]['data'] = np.array([nc_dict[ic_str][mp][var_ename] for mp in mps])
    ncvars['boss_' + var_ename]['dims'] = ('nppe',)
    ncvars['boss_' + var_ename]['units'] = var_units

    ncvars['bin_' + var_ename] = {}

for isnap in snapshot_var_idx:
    var_ename = lp.indvar_ename_set[isnap]
    var_units = lp.indvar_units_set[isnap]
    ncvars['boss_' + var_ename] = {}

    if var_units != '':
        var_units = var_units[2:-1]
    if nc_dict[ic_str][mps[0]][var_ename].shape[0] == nz:
        ncvars['boss_' + var_ename]['dims'] = ('nppe','nz','ntime',)
        ncvars['boss_' + var_ename]['data'] = np.zeros((len(mps), len(diag_zs), len(diag_ts)))
    else:
        ncvars['boss_' + var_ename]['dims'] = ('nppe','ntime',)
        ncvars['boss_' + var_ename]['data'] = np.zeros((len(mps), len(diag_ts)))
    ncvars['boss_' + var_ename]['units'] = var_units

    for imp, mp in enumerate(tqdm(mps, desc='processing PPEs - ' + var_ename)):
        for idgt, diag_t in enumerate(diag_ts):
            it = int(diag_t / dt)
            if nc_dict[ic_str][mp][var_ename].shape[0] == nz:
                for idgz, diag_z in enumerate(diag_zs):
                    iz = int(diag_z / dz) - 1
                    ncvars['boss_' + var_ename]['data'][imp, idgz, idgt] = nc_dict[ic_str][mp][var_ename][iz, it]
            else:
                ncvars['boss_' + var_ename]['data'][imp, idgt] = nc_dict[ic_str][mp][var_ename][it]

loading params: 100%|██████████| 10000/10000 [00:06<00:00, 1445.24it/s]
processing PPEs - LWP: 100%|██████████| 10000/10000 [00:00<00:00, 18482.84it/s]
processing PPEs - LNP: 100%|██████████| 10000/10000 [00:00<00:00, 18555.53it/s]
processing PPEs - liq_M3_path: 100%|██████████| 10000/10000 [00:00<00:00, 18140.60it/s]
processing PPEs - liq_M4_path: 100%|██████████| 10000/10000 [00:00<00:00, 17911.68it/s]


## load BIN

In [6]:
for isumm in summary_var_idx:
    var_ename = lp.indvar_ename_set[isumm]
    var_units = lp.indvar_units_set[isumm]
    if var_units != '':
        var_units = var_units[2:-1]
    ncvars['bin_' + var_ename] = {}
    ncvars['bin_' + var_ename]['dims'] = ('ncase',)
    ncvars['bin_' + var_ename]['units'] = var_units
    ncvars['bin_' + var_ename]['data'] = np.zeros((len(var1_strs) * len(var2_strs)))
    icase = 0
    for var1_str in var1_strs:
        for var2_str in var2_strs:
            ic_str = var1_str + var2_str
            ncvars['bin_' + var_ename]['data'][icase] = nc_dict[ic_str]['BIN_TAU'][var_ename]
            icase += 1

for isnap in snapshot_var_idx:
    var_ename = lp.indvar_ename_set[isnap]
    var_units = lp.indvar_units_set[isnap]
    if var_units != '':
        var_units = var_units[2:-1]
    ncvars['bin_' + var_ename] = {}
    ic_str = var1_strs[0] + var2_strs[0]
    if nc_dict[ic_str]['BIN_TAU'][var_ename].shape[0] == nz:
        ncvars['bin_' + var_ename]['dims'] = ('ncase','nz','ntime',)
        ncvars['bin_' + var_ename]['data'] = np.zeros((len(var1_strs) * len(var2_strs), len(diag_zs), len(diag_ts)))
    else:
        ncvars['bin_' + var_ename]['dims'] = ('ncase','ntime',)
        ncvars['bin_' + var_ename]['data'] = np.zeros((len(var1_strs) * len(var2_strs), len(diag_ts)))
    ncvars['bin_' + var_ename]['units'] = var_units

    icase = 0
    for var1_str in var1_strs:
        for var2_str in var2_strs:
            ic_str = var1_str + var2_str
            for idgt, diag_t in enumerate(diag_ts):
                it = int(diag_t / dt)
                if nc_dict[ic_str]['BIN_TAU'][var_ename].shape[0] == nz:
                    for idgz, diag_z in enumerate(diag_zs):
                        iz = int(diag_z / dz) - 1
                        ncvars['bin_' + var_ename]['data'][icase, idgz, idgt] = nc_dict[ic_str]['BIN_TAU'][var_ename][iz, it]
                else:
                    ncvars['bin_' + var_ename]['data'][icase, idgt] = nc_dict[ic_str]['BIN_TAU'][var_ename][it]
            icase += 1


ncvars['case_Na'] = {}
ncvars['case_Na']['data'] = []
ncvars['case_Na']['units'] = '1/cc'
ncvars['case_Na']['dims'] = ('ncase',)
ncvars['case_w'] = {}
ncvars['case_w']['data'] = []
ncvars['case_w']['units'] = 'm/s'
ncvars['case_w']['dims'] = ('ncase',)
for var1_str in var1_strs:
    for var2_str in var2_strs:
        ncvars['case_Na']['data'].append(float(re.search(r'[+-]?\d*\.?\d+', var1_str).group()))
        ncvars['case_w']['data'].append(float(re.search(r'[+-]?\d*\.?\d+', var2_str).group()))

In [15]:
global_attrs['thresholds_eff0'] = []
var_constraints = []
for ivar in summary_var_idx + snapshot_var_idx:
    var_constraints.append(lp.indvar_ename_set[ivar])    
global_attrs['var_constraints'] = np.array(var_constraints)

for isumm in summary_var_idx + snapshot_var_idx:
    var_ename = lp.indvar_ename_set[isumm]
    ncvars['boss_' + var_ename]['data'][ncvars['boss_' + var_ename]['data'] < 0] = 0.
    ncvars['bin_' + var_ename]['data'][ncvars['bin_' + var_ename]['data'] < 0] = 0.
    value_greater_0 = ncvars['boss_' + var_ename]['data'][ncvars['boss_' + var_ename]['data'] > 0]
    global_attrs['thresholds_eff0'].append(np.nanpercentile(value_greater_0, 10))

In [16]:
global_attrs['thresholds_eff0']

[0.0009198112060650008,
 6788507643.723516,
 2.893820765810066e-14,
 1.741725177486439e-22,
 0.006989605719340425,
 294982.8814359918,
 2.2990520010753964e-19,
 9.868453027736005e-33]

# write data into netcdf

In [17]:
nc_file = nc.Dataset(nc_dir + sim_config + '.nc', 'r+', format='NETCDF4')

In [18]:
# Create dimensions (None for unlimited dimension, e.g., time)
for dim_name, dim in dims.items():
    if dim_name not in nc_file.dimensions:
        nc_file.createDimension(dim_name, dim)

# save global attributes
for attr_name, attr_value in global_attrs.items():
    if isinstance(attr_value, list):
        nc_file.setncattr(attr_name, np.array(attr_value))
    else:
        nc_file.setncattr(attr_name, attr_value)

outnc_dict = {}
for var_name, var in ncvars.items():
    if var_name not in nc_file.variables:
        if all(isinstance(item, str) for item in var['data']):
            outnc_dict[var_name] = nc_file.createVariable(var_name, str, var['dims'])
        else:
            outnc_dict[var_name] = nc_file.createVariable(var_name, np.float64, var['dims'])
        if 'data' in var:
            outnc_dict[var_name][:] = var['data']
        outnc_dict[var_name].units = var['units']

In [19]:
nc_file.close()