In [1]:
import sys
import os
import shutil
import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt;
# for this course we use locally stored version of pyemu and flopy to avoid version conflicts
import pyemu
import flopy

sys.path.insert(0,"..")
import herebedragons as hbd

In [2]:
# specify the temporary working folder
t_d = os.path.join('..','peterson_template')

In [3]:
pst_file = "peterson_mf6.pst"
pst = pyemu.Pst(os.path.join(t_d, pst_file))

In [4]:
obs_df = pst.observation_data
obs_df.head()

Unnamed: 0_level_0,obsnme,obsval,weight,obgnme,oname,otype,time,usecol
obsnme,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
oname:hds_otype:lst_usecol:fountainno.1_time:10013,oname:hds_otype:lst_usecol:fountainno.1_time:10013,1680.910414,1.0,oname:hds_otype:lst_usecol:fountainno.1,hds,lst,10013,fountainno.1
oname:hds_otype:lst_usecol:fountainno.1_time:10043,oname:hds_otype:lst_usecol:fountainno.1_time:10043,1680.906893,1.0,oname:hds_otype:lst_usecol:fountainno.1,hds,lst,10043,fountainno.1
oname:hds_otype:lst_usecol:fountainno.1_time:1005,oname:hds_otype:lst_usecol:fountainno.1_time:1005,1681.077101,1.0,oname:hds_otype:lst_usecol:fountainno.1,hds,lst,1005,fountainno.1
oname:hds_otype:lst_usecol:fountainno.1_time:10074,oname:hds_otype:lst_usecol:fountainno.1_time:10074,1680.910079,1.0,oname:hds_otype:lst_usecol:fountainno.1,hds,lst,10074,fountainno.1
oname:hds_otype:lst_usecol:fountainno.1_time:10105,oname:hds_otype:lst_usecol:fountainno.1_time:10105,1680.612831,1.0,oname:hds_otype:lst_usecol:fountainno.1,hds,lst,10105,fountainno.1


In [5]:
#check for nonzero weights
obs_df.weight.value_counts()

weight
1.0    66454
Name: count, dtype: int64

In [6]:
# assign all weight zero
obs_df.loc[:, 'weight'] = 0

# check for non zero weights
obs_df.weight.unique()

array([0.])

### Measured Data

In [7]:
# Assuming you've already read the CSV as df
obs = pd.read_csv('../input-files/data-files/flow-observed-heads.csv')
mod = pd.read_csv('../output-files/head_obs.csv')

In [8]:
# Function to find the closest time in mod for a given time in obs
def find_closest_time(time, mod_times):
    closest_time = min(mod_times, key=lambda x: abs(x - time))
    return closest_time

# Apply the function to the 'time' column in obs
obs['time'] = obs['time'].apply(lambda x: find_closest_time(x, mod['time']))
obs.set_index('time', inplace=True)

In [9]:
obs

Unnamed: 0_level_0,FOUNTAINNO.1,SC01406518ACDT02-MW006,SC01406518CAAMW1-1,SC01406519ABDT04-MW004,SC01406519BBBT01-MW002,SC01406520CDBMW2-4,SC01406527ACB1,SC01406527DDB1BANNING-LEWISRANCHES,SC01406527DDB2,SC01406527DDC1,...,SCO1506602BBBTH-18,SCO1506602BDC,SCO1506602CAA,SCO1506603AAD,SCO1506603BAC,SCO1506604AAA,SCO1506604AABTH-49,SCO1506604ABA2TH-50,SCO1506610BAA,SCO1506611ABCTH-47
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
59.0,,,,,,,,,,,...,,,,,,,,,,
89.0,,,,,,,,,,,...,,,,,,,,,,
454.0,,,,,,,,,,,...,,,,,,,,,,
639.0,,,,,,,,,,,...,,,,,,,,,,
789.0,,,,,,,,,1802.757888,1803.123648,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18077.0,,,1885.504992,1866.912192,,,,,,,...,,,,,,,,,,
18139.0,,,1886.068872,,,,,,,,...,,,,,,,,,,
18169.0,,,1885.861608,,,,,,,,...,,,,,,,,,,
18200.0,,,1885.633008,,,,,,,,...,,,,,,,,,,


In [10]:
# Initialize an empty list to store the restructured data
df = obs
reshaped_data = []

# Iterate through the columns (each site)
for site in df.columns:
    # Create a temporary DataFrame for each site with time and its corresponding value
    site_data = pd.DataFrame({
        'site': [site] * len(df),  # Repeated site name
        'time': df.index,          # Time (from the index)
        'value': df[site].values   # Observed value for the site
    })
    
    # Append this site-specific DataFrame to the reshaped data list
    reshaped_data.append(site_data)

# Concatenate all site-specific DataFrames into a single DataFrame
reshaped_df = pd.concat(reshaped_data, axis=0, ignore_index=True)

reshaped_df = reshaped_df.dropna()
obs_data=reshaped_df
#obs_data.set_index('site', inplace=True)
obs_data.iloc[:5]

Unnamed: 0,site,time,value
34,FOUNTAINNO.1,5753.0,1682.462472
156,FOUNTAINNO.1,17865.0,1682.026608
324,SC01406518ACDT02-MW006,17896.0,1881.350568
327,SC01406518ACDT02-MW006,17986.0,1881.594408
333,SC01406518ACDT02-MW006,18230.0,1881.307896


In [11]:
pest_obs_df= obs_data

In [12]:
# Make the 'usecol' values consistent by converting to lowercase in both DataFrames
obs_df['usecol'] = obs_df['usecol'].str.lower()
pest_obs_df['site'] = pest_obs_df['site'].str.lower()

# Remove the '.0' from the 'time' column in pest_obs_df if it's in float format
pest_obs_df['time'] = pest_obs_df['time'].astype(str).str.replace('.0', '', regex=False)

# Now merge or iterate as before
for idx, row in obs_df.iterrows():
    # Find the matching row in pest_obs_df
    match = pest_obs_df[(pest_obs_df['site'] == row['usecol']) & (pest_obs_df['time'] == row['time'])]
    
    if not match.empty:
        # If a match is found, update 'obsval' and set 'weight' to 1
        obs_df.at[idx, 'obsval'] = match['value'].values[0]
        obs_df.at[idx, 'weight'] = 1
    else:
        # If no match is found, set 'weight' to 0
        obs_df.at[idx, 'weight'] = 0

In [13]:
obs_df

Unnamed: 0_level_0,obsnme,obsval,weight,obgnme,oname,otype,time,usecol
obsnme,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
oname:hds_otype:lst_usecol:fountainno.1_time:10013,oname:hds_otype:lst_usecol:fountainno.1_time:10013,1680.910414,0.0,oname:hds_otype:lst_usecol:fountainno.1,hds,lst,10013,fountainno.1
oname:hds_otype:lst_usecol:fountainno.1_time:10043,oname:hds_otype:lst_usecol:fountainno.1_time:10043,1680.906893,0.0,oname:hds_otype:lst_usecol:fountainno.1,hds,lst,10043,fountainno.1
oname:hds_otype:lst_usecol:fountainno.1_time:1005,oname:hds_otype:lst_usecol:fountainno.1_time:1005,1681.077101,0.0,oname:hds_otype:lst_usecol:fountainno.1,hds,lst,1005,fountainno.1
oname:hds_otype:lst_usecol:fountainno.1_time:10074,oname:hds_otype:lst_usecol:fountainno.1_time:10074,1680.910079,0.0,oname:hds_otype:lst_usecol:fountainno.1,hds,lst,10074,fountainno.1
oname:hds_otype:lst_usecol:fountainno.1_time:10105,oname:hds_otype:lst_usecol:fountainno.1_time:10105,1680.612831,0.0,oname:hds_otype:lst_usecol:fountainno.1,hds,lst,10105,fountainno.1
...,...,...,...,...,...,...,...,...
oname:hds_otype:lst_usecol:sco1506611abcth-47_time:9862,oname:hds_otype:lst_usecol:sco1506611abcth-47_time:9862,1747.745193,0.0,oname:hds_otype:lst_usecol:sco1506611abcth-47,hds,lst,9862,sco1506611abcth-47
oname:hds_otype:lst_usecol:sco1506611abcth-47_time:9893,oname:hds_otype:lst_usecol:sco1506611abcth-47_time:9893,1747.743447,0.0,oname:hds_otype:lst_usecol:sco1506611abcth-47,hds,lst,9893,sco1506611abcth-47
oname:hds_otype:lst_usecol:sco1506611abcth-47_time:9921,oname:hds_otype:lst_usecol:sco1506611abcth-47_time:9921,1743.791280,1.0,oname:hds_otype:lst_usecol:sco1506611abcth-47,hds,lst,9921,sco1506611abcth-47
oname:hds_otype:lst_usecol:sco1506611abcth-47_time:9952,oname:hds_otype:lst_usecol:sco1506611abcth-47_time:9952,1747.740859,0.0,oname:hds_otype:lst_usecol:sco1506611abcth-47,hds,lst,9952,sco1506611abcth-47


In [14]:
pst.nnz_obs

2747

In [15]:
pst.nnz_obs_groups

['oname:hds_otype:lst_usecol:fountainno.1',
 'oname:hds_otype:lst_usecol:sc01406518acdt02-mw006',
 'oname:hds_otype:lst_usecol:sc01406518caamw1-1',
 'oname:hds_otype:lst_usecol:sc01406519abdt04-mw004',
 'oname:hds_otype:lst_usecol:sc01406519bbbt01-mw002',
 'oname:hds_otype:lst_usecol:sc01406520cdbmw2-4',
 'oname:hds_otype:lst_usecol:sc01406527acb1',
 'oname:hds_otype:lst_usecol:sc01406527ddb1banning-lewisranches',
 'oname:hds_otype:lst_usecol:sc01406527ddb2',
 'oname:hds_otype:lst_usecol:sc01406527ddc1',
 'oname:hds_otype:lst_usecol:sc01406528bbc03-002',
 'oname:hds_otype:lst_usecol:sc01406529bcbt07-mw004',
 'oname:hds_otype:lst_usecol:sc01406529cddpafb07mw03',
 'oname:hds_otype:lst_usecol:sc01406530dbdt07-mw006',
 'oname:hds_otype:lst_usecol:sc01406531bbct13-mw004',
 'oname:hds_otype:lst_usecol:sc01406531cbc04-009',
 'oname:hds_otype:lst_usecol:sc01406531dact11-mw001',
 'oname:hds_otype:lst_usecol:sc01406531dcct12-mw003',
 'oname:hds_otype:lst_usecol:sc01406534aac1',
 'oname:hds_otype

In [16]:
pst.nnz_obs

2747

In [17]:
# check noptmax
pst.control_data.noptmax

0

In [18]:
pst.control_data.noptmax = -1

In [19]:
pst.write(os.path.join(t_d,pst_file),version=2)

noptmax:-1, npar_adj:4, nnz_obs:2747


In [20]:
import os
import psutil
import pyemu
import time
import glob
import subprocess



In [21]:
pst.write_par_summary_table(filename="none")

Unnamed: 0,type,transform,count,initial value,lower bound,upper bound,standard deviation
rch_inst:0,rch_inst:0,log,4,-1,-3,-0.69897,0.575257


In [22]:
num_workers = psutil.cpu_count(logical=False) # update according to your available resources!

In [23]:
pst.npar_adj

4

In [24]:
m_d = os.path.join('../master_glm_1')

In [25]:
pyemu.os_utils.start_workers(t_d,"pestpp-glm","peterson_mf6.pst",num_workers=num_workers,worker_root="..",
                           master_dir=m_d, )  