### can run this one by one interactively or, as a batch, run `../scripts/general_postproc.py`

In [None]:
import sys
import pandas as pd
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
sys.path.insert(0,'../scripts/')
from postprocessing import setup_postproc, check_pdc, plot_phi, get_obs_and_noise, get_pars, plot_group, plot_pars_group

In [None]:
curr_model = '01473000'
# curr_model = '05431486'
# curr_model = '09112500'
curr_run_root = 'prior_mc_reweight'
eval_pdc = True
unzip_dirs = True
phi_cutoffs = {cm:{crr:9e99 for crr in ['ies','prior_mc_reweight']}
                for cm in ['01473000','05431486', '09112500']}
pstdir, results_file, tmp_res_path, fig_dir, obs, pst = setup_postproc(curr_model, curr_run_root, unzip_dirs)

### check out PDC

In [None]:
pst.nnz_obs_groups

In [None]:
if eval_pdc:
    pdc = check_pdc(tmp_res_path, curr_run_root, pst, obs)
    print(pdc)

### look at PHI history

In [None]:
phi = plot_phi(tmp_res_path, curr_run_root, curr_model, fig_dir)

### Truncate PHI at a threshold

In [None]:
phi

In [None]:
best_iter = 0
if 'prior' in curr_run_root:
    best_iter = 0
best_iter

## now rejection sampling for outlier PHI values

In [None]:
orgphi = phi.loc[best_iter].iloc[5:].copy()
ax = orgphi.hist(bins=50)
lims = ax.get_xlim()

In [None]:
# catalog of cutoffs heuristically determined
phi_cutoffs['01473000']['prior_mc_reweight'] = 3.8e7
phi_cutoffs['01473000']['ies'] = 1.08e9
phi_cutoffs['05431486']['prior_mc_reweight'] = .6e12
phi_cutoffs['05431486']['ies'] = 7.5e8
phi_cutoffs['09112500']['prior_mc_reweight'] = 1.2e9
phi_cutoffs['09112500']['ies'] = 8.675e8

In [None]:
phi_too_high = phi_cutoffs[curr_model][curr_run_root]
phi_too_high

In [None]:
phi = orgphi.loc[orgphi<=phi_too_high]
fig,ax = plt.subplots(1,2)
### --> need to indicate which reals we will carry forward <-- ###
orgphi.hist(bins=50, ax=ax[0])
reals = phi.index 
phi.hist(bins=50, ax=ax[1])
ax[0].axvline(phi_too_high, color='orange')
ax[1].set_xlim(lims)
ax[0].set_title(f'Original PHI: {len(orgphi)} reals')
ax[1].set_title(f'Truncated PHI: {len(phi)} reals')
plt.savefig(fig_dir/ 'phi_histogram.pdf')


### now read in and trim hotstart files

In [None]:
parens = pd.read_csv(tmp_res_path / f'{curr_run_root}.{best_iter}.par.csv',
                         index_col=0, low_memory=False).loc[reals]

In [None]:
parens.index = [str(i) for i in range(len(parens)-1)] + ['base']

In [None]:
parens.to_csv(pstdir / 'hotstart.starting_pars.csv.zip')

In [None]:
obens = pd.read_csv(tmp_res_path / f'{curr_run_root}.{best_iter}.obs.csv', 
                    low_memory=False, index_col=0).loc[reals]
obens.index = parens.index
obens_noise = pd.read_csv(tmp_res_path / f'{curr_run_root}.obs+noise.csv', 
                          low_memory=False, index_col=0).loc[reals]
obens_noise.index = parens.index

In [None]:
obens.to_csv(pstdir / 'hotstart.starting_obs.csv.zip')
obens_noise.to_csv(pstdir / 'hotstart.starting_obs+noise.csv.zip')


In [None]:
pst.control_data.noptmax=2
pst.pestpp_options["ies_num_reals"] = len(reals)
pst.pestpp_options["ies_observation_ensemble"] = 'hotstart.starting_obs+noise.csv'
pst.pestpp_options["ies_restart_observation_ensemble"] = 'hotstart.starting_obs.csv'
pst.pestpp_options["ies_parameter_ensemble"] = 'hotstart.starting_pars.csv'

In [None]:
pst.pestpp_options

In [None]:
pst.write(str(pstdir / 'ies_hot.pst'), version=2)

In [None]:
pstdir