In [None]:
import sys
sys.path.append('../dependencies/')
import pandas as pd
import pyemu
import numpy as np
import pathlib as pl

### choose a cutout from {01473000, 05431486, 09112500}

In [None]:
wkdir = pl.Path('../NHM_extractions/20230110_pois_haj/09112500/')

### get the PST object 

In [None]:
pst = pyemu.Pst(str(wkdir / 'prior_mc.pst'))

In [None]:
pars = pst.parameter_data
obs = pst.observation_data

In [None]:
pst.obs_groups

### Read in the base localization matrix

In [None]:
base_loc = pd.read_csv('../Supporting_information/localization_groups.csv', index_col=0)

In [None]:
# Trim out obs groups that aren't present in the PST file nut are in the base localization matrix

In [None]:
base_loc

In [None]:
base_loc = base_loc.loc[pst.obs_groups]
base_loc

### find the unique combinations of observations

In [None]:
# get a little squirrelly with transposes and add a row with the combos of obs

In [None]:
base_loc=base_loc.T
base_loc['par_obs_combo'] = [set(base_loc.T.loc[base_loc.T[i]==1].index) for i in base_loc.T.columns]
# serious shenanigans below - sets aren't mutable, so use forzensets to get unique values, then cast back to sets
# horrible! bottom line is, all_combos had to be a unique list of sets. got it now (!)
all_combos = list(map(set,set(map(frozenset,base_loc['par_obs_combo'].to_list()))))
all_combos

### now just make par group names according to combinations of obs

In [None]:
group_lookup = {f'obs_combo_{i+1}':j for i,j in enumerate(all_combos)}

In [None]:
group_lookup

### assign the grouop names to the parameter base types according to the cols of the base localization matrix

In [None]:
base_loc['par_obs_group'] = [[k for k,v in group_lookup.items() if v==i][0] for i in base_loc.par_obs_combo]

### now we have a list of groups for parameters

In [None]:
new_par_groups = dict(zip(base_loc.index,base_loc.par_obs_group))# mapping a new group name for each par type.

### and we can cast the base_loc matrix back to original orientation and drop these names

In [None]:
base_loc = base_loc.drop(columns=['par_obs_combo', 'par_obs_group']).T

### so, update the parameter groupnames

In [None]:
for k,v in new_par_groups.items():
    pars.loc[pars.parnme.str.startswith(k), 'pargp'] = v

In [None]:
pars.pargp.unique()


### make sure we didn't miss any parameters in the groupings

In [None]:
assert 'pargp' not in pars.pargp.unique()

In [None]:
base_loc.columns

### make the final localization matrix

In [None]:
locmat = pd.DataFrame(0, base_loc.index, group_lookup.keys())

### loop over the groups and assign 1s where obs line up with par groups

In [None]:
for k,v in group_lookup.items():
    for cob in v:
        locmat.loc[cob,k] = 1.0

In [None]:
locmat

### finally save it out to a text format

In [None]:
pyemu.Matrix.from_dataframe(locmat).to_ascii(str(wkdir / 'loc.mat'))

### and refer to it in the PST file (TODO: add writing out the PST file)

In [None]:
pst.pestpp_options["ies_localizer"] = "loc.mat"


In [None]:
#Write a new version of the PEST++ control file (.pst)
pst.write(str(wkdir / 'prior_mc_loc.pst'), version=2)

#will have to track this file and may need to add a bunch of files to be tracked