In [None]:
import pandas as pd
import sys
import numpy as np
import pathlib as pl
sys.path.append('../dependencies/')
import pyemu
from datetime import datetime as dt
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

In [None]:
curr_model = '01473000'
wkdir = pl.Path(f'../NHM_extractions/20230110_pois_haj/{curr_model}/')
results_dir = pl.Path('../example_data/')

In [None]:
fig_dir = pl.Path(wkdir / 'figures' / 'prior')
if not fig_dir.exists():
    fig_dir.mkdir(parents=True)

In [None]:
datfmtmon = '%Y_%m'
datfmtdaily = '%Y_%m_%d'

## need pst file to get observation data

In [None]:
pst = pyemu.Pst(str(wkdir / 'prior_mc_better.pst'))

In [None]:
obs = pst.observation_data

In [None]:
obs.loc[obs.obsval==-9999,'obsval']=np.nan

## look at a histogram of PHI

In [None]:
phi = pd.read_csv(results_dir / 'prior_mc_better.phi.meas.csv').T.iloc[6:]

In [None]:
phi.hist(bins=50)

In [None]:
'actet_mean_mon'.endswith

In [None]:
pst.obs_groups

## read in the observations ensemble from the Prior MC

In [None]:
obens = pd.read_csv(results_dir  / 'prior_mc_better.0.obs.csv', low_memory=False, index_col=0)

In [None]:
obens.T

In [None]:
def plot_o_matic(obs_df, obens_df, curr_group, fig_dir):
    # some metdata
    plot_lw = 0.01
    plot_alpha = 0.15
    
    ### TODO: purge the zero padding garbage once we put it in the original construction code!!!!
    print(f'postprocessing group: {curr_group}')
    # get a list of all the obs names in the group
    curr_obs = obs.loc[obs.obgnme==curr_group,'obsnme'].to_list()
    # truncate the ensemble data
    curr_obs_df = obens[curr_obs].T.copy()
    # parse the obs names for useful metadata
    curr_obs_df['obs_location'] = [i.split(':')[-1] for i in curr_obs_df.index]
    curr_obs_df['datestring'] = [i.split(':')[1] for i in curr_obs_df.index]
    # get the obsval for plotting
    curr_obs_df = curr_obs_df.merge(obs['obsval'], left_index=True, right_index=True)
    # parse based on the meas freq
    if ('mon' in curr_group) & ('mean' not in curr_group):
        curr_obs_df['datestring'] = [f'{int(i.split("_")[0]):4d}_{int(i.split("_")[1]):02d}' 
                     for i in curr_obs_df['datestring']]   
        curr_obs_df['datetime'] = [dt.strptime(i, datfmtmon) for i in curr_obs_df['datestring']]
        with PdfPages(fig_dir / f'{curr_group}.pdf') as outpdf:
            for cn, cg in curr_obs_df.groupby('obs_location'):
                plt.figure()
                cgplot = cg.set_index(cg.datetime)
                cgplot = cgplot[cgplot.columns[:-4]]
                ax = cgplot.plot(legend=None, linewidth=plot_lw, color='grey', alpha = plot_alpha)
                cg.set_index(cg.datetime).obsval.plot(ax=ax, color='orange')
                ax.set_title(f'location = {cn}')
                outpdf.savefig()
                plt.close('all')
            
    elif 'mean_mon' in curr_group:
        curr_obs_df['month'] = [int(i.split(':')[1]) for i in curr_obs_df.index]
        with PdfPages(fig_dir / f'{curr_group}.pdf') as outpdf:
            for cn, cg in curr_obs_df.groupby('obs_location'):
                plt.figure()
                cgplot = cg.sort_values(by='month')
                cgplot = cgplot.set_index('month')
                ax = cgplot.plot(legend=None, linewidth=plot_lw, color='grey', alpha = plot_alpha)
                cg.sort_values(by='month').set_index('month').obsval.plot(ax=ax, color='orange')
                ax.set_title(f'location = {cn}')
                outpdf.savefig()
                plt.close('all')
            
    elif 'ann' in curr_group:
        curr_obs_df['year'] = [int(i.split(':')[1]) for i in curr_obs_df.index]
        with PdfPages(fig_dir / f'{curr_group}.pdf') as outpdf:
            for cn, cg in curr_obs_df.groupby('obs_location'):
                plt.figure()
                cgplot = cg.sort_values(by='year')
                cgplot = cgplot.set_index('year')
                ax = cgplot.plot(legend=None, linewidth=plot_lw, color='grey', alpha = plot_alpha)
                cg.sort_values(by='year').set_index('year').obsval.plot(ax=ax, color='orange')
                ax.set_title(f'location = {cn}')
                outpdf.savefig()
                plt.close('all')
    elif 'daily' in curr_group:
        curr_obs_df['datestring'] = [f'{int(i.split("_")[0]):4d}_{int(i.split("_")[1]):02d}_{int(i.split("_")[2]):02d}' 
                             for i in curr_obs_df['datestring']]
        curr_obs_df['datetime'] = [dt.strptime(i, datfmtdaily) for i in curr_obs_df['datestring']]
        curr_obs_df['year'] = [ int(i.split("_")[0])  for i in curr_obs_df.datestring]
        group_root = curr_group.replace('_daily','')
        for cn, cg in curr_obs_df.groupby('obs_location'):
            print(f'subpostprocessing location: {cn}')
            with PdfPages(fig_dir / f'{group_root}_{cn}.pdf') as outpdf:
                cg = cg.sort_values(by='year')
                for cn2, cg2 in cg.groupby('year'):
                    plt.figure()
                    cgplot = cg2.set_index('datetime')[cg2.columns[:-5]]
                    ax = cgplot.plot(legend=None, linewidth=plot_lw, color='grey', alpha = plot_alpha)
                    cg2.set_index('datetime').obsval.plot(ax=ax, color='orange')
                    ax.set_title(f'year = {cn2}')
                    outpdf.savefig()
                    plt.close('all')

In [None]:
# run them all at once
[plot_o_matic(obs, obens, curr_group, fig_dir) for curr_group in pst.obs_groups];