In [2]:
# standard python utilities
import os
import sys
from os.path import basename, dirname, join, exists, expanduser
import glob
import shutil
import pandas as pd
import numpy as np
import time
from scipy.stats import gmean, hmean

# standard geospatial python utilities
# import pyproj # for converting proj4string
import shapely
import shapefile
import geopandas as gpd
from osgeo import gdal
import rasterio

# import flopy

In [3]:
usr_dir = expanduser('~')
doc_dir = join(usr_dir, 'Documents')
# dir of all gwfm data
gwfm_dir = join(usr_dir, 'Box/research_cosumnes/GWFlowModel')
# dir of stream level data for seepage study

sfr_dir = join(gwfm_dir,'SFR_data/')

In [4]:
def add_path(fxn_dir):
    """ Insert fxn directory into first position on path so local functions supercede the global"""
    if fxn_dir not in sys.path:
        sys.path.insert(0, fxn_dir)
# flopy github path - edited
add_path(doc_dir+'/GitHub/flopy')
import flopy 

# other functions
py_dir = join(doc_dir,'GitHub/CosumnesRiverRecharge/python_utilities')
add_path(py_dir)

from mf_utility import get_layer_from_elev
# functions like ghb_df must have all variables fed in directly (no using global variables)
# in a case like the ghb it might make more sense to make an actual class
from map_cln import gdf_bnds, plt_cln

In [6]:
ext_dir = 'F:/WRDAPP'
c_dir = 'C:/WRDAPP'

if os.path.exists(ext_dir):
    loadpth = ext_dir 
elif os.path.exists(c_dir):
    loadpth = c_dir 

loadpth = loadpth +'/GWFlowModel/Cosumnes/Regional/'
base_model_ws = loadpth+'historical_simple_geology_reconnection'
upscale = 8
all_model_ws = join(loadpth, 'parallel_realizations')

m = flopy.modflow.Modflow.load('MF.nam', model_ws= base_model_ws, 
                                exe_name='mf-owhm.exe', version='mfnwt')
print(m.dis.nlay)

19


In [7]:
delr = m.dis.delr[0]
delc = m.dis.delc[0]
nrow = m.dis.nrow
ncol = m.dis.ncol
nlay = m.dis.nlay
nper = m.dis.nper
nlay_tprogs = nlay - 3

strt_date = pd.to_datetime(m.dis.start_datetime)
end_date = (strt_date + pd.Series(m.dis.perlen.array.sum()-1).astype('timedelta64[D]'))[0]



In [8]:
# adjusters for boundary condition input
if not m.dis.steady.array[0]:
    time_tr0 = 0  
    nper_tr = nper 
else:
    time_tr0 = 1
    nper_tr = nper-1
print('NPER ', nper, 'NPER_TR ',nper_tr)

NPER  2192 NPER_TR  2192


In [9]:

botm = m.dis.botm.array
# num_tprogs = 120 (max available below levelling), upscaling
#     max_num_layers =148 # based on thickness from -6m (1 m below DEM min) to -80m
#     num_tprogs = int(max_num_layers/upscale)
dem_data = np.loadtxt(gwfm_dir+'\DIS_data\dem_52_9_200m_mean.tsv')


  dem_data = np.loadtxt(gwfm_dir+'\DIS_data\dem_52_9_200m_mean.tsv')


In [15]:
# only going to present the top 10 realizations
proj_dir = join(gwfm_dir, 'Regional')
best10 = pd.read_csv(join(proj_dir,'top_10_accurate_realizations.csv'))


# Copy files independent of geology

In [10]:
# directly copy files not impacted by changing geology
# pks = ['nam','dis','nwt','bas','oc','evt', 'gage', 'hob', 'tab','wel','bath']
# pks = ['input_data/*csv']
# pks
# files = [glob.glob(base_model_ws+'/*'+p, recursive=True)[0] for p in pks]
# files

In [12]:
# copy mf files except cbc and hds
mf_files = pd.Series(glob.glob(base_model_ws+'/MF.*'))
pks_rem = 'cbc|hds|list|.hob.out|.sfr.out|upw|sfr|lak'
# mf_files = mf_files[~mf_files.str.contains(pks_rem).values].tolist()
# pks_keep = 'wel|evt'
pks_keep = 'rch'
mf_files = mf_files[mf_files.str.contains(pks_keep).values].tolist()

# jtfs = glob.glob(base_model_ws+'/*.jtf')
# run = glob.glob(base_model_ws+'/*py*')

files = pd.Series(glob.glob(base_model_ws+'/**/*.csv', recursive=True))
f_keep = 'ghb'
files = files[files.str.contains(f_keep).values].tolist()

# files = mf_files+jtfs+run
# files = mf_files + files
files = mf_files

files

['F:/WRDAPP/GWFlowModel/Cosumnes/Regional/historical_simple_geology_reconnection\\MF.rch']

In [19]:
# for n in ['0']:
# for n in np.arange(0,100).astype(str):
for n in best10.realization.values.astype(str):
    if (int(n) % 10) == 0:
        print(n,end=',')
    for f in files:
        folder = '/realization'+ n.zfill(3)+'/'
        os.makedirs(all_model_ws+folder,exist_ok=True)
        shutil.copy(f, all_model_ws+folder)

# Create files dependent on geology

In [20]:
tprogs_id=''
mf_tprogs_dir = gwfm_dir+'/UPW_data/tprogs_final'+tprogs_id+'/'
tprogs_files = glob.glob(mf_tprogs_dir+'*')


In [21]:


tprogs_fxn_dir = doc_dir +'/GitHub/CosumnesRiverRecharge/tprogs_utilities'
if tprogs_fxn_dir not in sys.path:
    sys.path.append(tprogs_fxn_dir)
# import cleaning functions for tprogs
import tprogs_cleaning as tc


In [22]:
grid_p = gpd.read_file(gwfm_dir+'/DIS_data/grid/grid.shp')
m_domain = gpd.GeoDataFrame(pd.DataFrame([0]), geometry = [grid_p.unary_union], crs=grid_p.crs)

In [23]:
import h5py
uzf_dir = join(gwfm_dir,'UZF_data')
nrow_p, ncol_p = (100,230)
ss_strt = pd.to_datetime('2010-10-01')

def load_perc(strt_date, end_date):
    nper_tr = (end_date-strt_date).days+1
    # years and array index 
    years = pd.date_range(strt_date,end_date,freq='AS-Oct')
    yr_ind = (years-strt_date).days
    perc = np.zeros((nper_tr, nrow_p,ncol_p))
    # need separte hdf5 for each year because total is 300MB
    for n in np.arange(0,len(yr_ind)-1):
        fn = join(uzf_dir, 'basic_soil_budget',"percolation_WY"+str(years[n].year+1)+".hdf5")
        with h5py.File(fn, "r") as f:
            arr = f['array']['WY'][:]
            perc[yr_ind[n]:yr_ind[n+1]] = arr
    return(perc)

finf_in = load_perc(strt_date, end_date)
ss_finf_in = load_perc(ss_strt, strt_date)
ss_ndays = ss_finf_in.shape[0]

# subset data to local model
# finf_local_in = np.zeros((nper_tr, nrow, ncol))
# finf_local_in[:, grid_match.row-1, grid_match.column-1] = finf[:,grid_match.p_row-1, grid_match.p_column-1]
# ss_finf_local_in = np.zeros((ss_ndays, nrow, ncol))
# ss_finf_local_in[:, grid_match.row-1, grid_match.column-1] = ss_finf[:,grid_match.p_row-1, grid_match.p_column-1]


In [24]:
# join top and botm for easier array referencing for elevations
top_botm = np.zeros((m.dis.nlay+1,m.dis.nrow,m.dis.ncol))
top_botm[0,:,:] = m.dis.top.array
top_botm[1:,:,:] = m.dis.botm.array
botm = m.dis.botm.array
# load deep geology definition
deep_geology = np.loadtxt(base_model_ws+'/input_data/deep_geology.tsv', delimiter ='\t')
deep_geology = np.reshape(deep_geology, (m.dis.nlay,m.dis.nrow,m.dis.ncol))

# load pre-processed GHB dataframes
# df_mon = pd.read_csv(base_model_ws+'/input_data/ghb_general.csv', index_col='date', parse_dates=['date'])
# ghb_ss = df_mon.loc[strt_date].groupby(['row','column']).mean().reset_index()
# ghbdelta_spd = pd.read_csv(base_model_ws+'/input_data/ghbdelta_spd.csv')
# month intervals for organizing GHB
months = pd.date_range(strt_date,end_date, freq="MS")
month_intervals = (months-strt_date).days + time_tr0 # stress period for each month
# color id for facies
gel_color = pd.read_csv(join(gwfm_dir,'UPW_data', 'mf_geology_color_dict.csv'), comment='#')
# gel_color.geology = gel_color.geology.str.lower()

In [25]:
# only need to write geologic paramters once in case others change then could just reload
# initial guess for hydraulic parameters
params = pd.read_csv(base_model_ws+'/ZonePropertiesInitial.csv', index_col='Zone')
# convert from m/s to m/d
params['K_m_d'] = params.K_m_s * 86400  
# results from permeameter test
eff_K = pd.read_csv(join(gwfm_dir, "UPW_data", 'permeameter_regional.csv'))

In [37]:
bc_params = pd.read_csv(join(base_model_ws,'BC_scaling.csv'))
bc_params = bc_params.set_index('ParamName')
strhc_scale = bc_params.loc['strhc_scale', 'StartValue']


## Write out packages

In [38]:
t0 = time.time()
tprogs_info = [80, -80, 320]

# for t in np.arange(0, 100): #100
for t in best10.realization.values:
# for t in [82]:
    folder = 'realization'+ str(t).zfill(3)
    # update model workspace so outputs to right directory
    model_ws = join(all_model_ws, folder)
    m.change_model_ws(model_ws)
    print(folder, end=' ')
    ###############################################################################
    ## LPF Package ##

    # load TPROGs data
    tprogs_line = np.loadtxt(tprogs_files[t])
    masked_tprogs= tc.tprogs_cut_elev(tprogs_line, dem_data, tprogs_info)
    K, Sy, Ss, porosity = tc.int_to_param(masked_tprogs, params, porosity=True)
    # save tprogs facies array as input data for use during calibration
    tprogs_dim = masked_tprogs.shape
    np.savetxt(model_ws+'/tprogs_facies_array.tsv', np.reshape(masked_tprogs, (tprogs_dim[0]*nrow,ncol)), delimiter='\t')

    hk = np.zeros(botm.shape)
    vka = np.zeros(botm.shape)
    sy = np.zeros(botm.shape)
    ss = np.zeros(botm.shape)
    por = np.zeros(botm.shape)
    
    top = np.copy(botm[0,:,:]) # bottom of levelling layer
    bot1 = np.copy(botm[-3,:,:]) # top of laguna

    # I need to verify if a flattening layer is needed (e.g., variable thickness to maintain TPROGs connectivity)
    # pull out the TPROGS data for the corresponding depths
    K_c = tc.get_tprogs_for_elev(K, top, bot1, tprogs_info)
    Ss_c = tc.get_tprogs_for_elev(Ss, top, bot1, tprogs_info)
    Sy_c = tc.get_tprogs_for_elev(Sy, top, bot1, tprogs_info)
    n_c = tc.get_tprogs_for_elev(porosity, top, bot1, tprogs_info)

    # upscale as preset
    for kt, k in enumerate(np.arange(1,nlay_tprogs+1)):
        hk[k,:] = np.mean(K_c[upscale*kt:upscale*(kt+1)], axis=0)
        vka[k,:] = hmean(K_c[upscale*kt:upscale*(kt+1)], axis=0)
        ss[k,:] = np.mean(Ss_c[upscale*kt:upscale*(kt+1)], axis=0)
        sy[k,:] = np.mean(Sy_c[upscale*kt:upscale*(kt+1)], axis=0)
        por[k,:] = np.mean(n_c[upscale*kt:upscale*(kt+1)], axis=0)
    
    top = m.dis.top.array
    bot1 = m.dis.botm.array[0,:,:]
    # set parameters based on upscaled unsaturated zone
    hk[0,:,:] = np.mean(tc.get_tprogs_for_elev(K, top, bot1,tprogs_info),axis=0)
    vka[0,:,:] = hmean(tc.get_tprogs_for_elev(K, top, bot1,tprogs_info),axis=0)
    sy[0,:,:] = np.mean(tc.get_tprogs_for_elev(Sy, top, bot1,tprogs_info),axis=0)
    ss[0,:,:] = np.mean(tc.get_tprogs_for_elev(Ss, top, bot1,tprogs_info),axis=0)

    # check proportions of hydrofacies in TPROGs realization
    tprogs_vals = np.arange(1,5)
    tprogs_hist = np.histogram(masked_tprogs, np.append([0],tprogs_vals+0.1))[0]    
    tprogs_hist = tprogs_hist/np.sum(tprogs_hist)
    tprogs_quants = 1 - np.append([0], np.cumsum(tprogs_hist)/np.sum(tprogs_hist))
    vka_quants = pd.DataFrame(tprogs_quants[1:], columns=['quant'], index=tprogs_vals)
    # dataframe summarizing dominant facies based on quantiles
    vka_quants['vka_min'] = np.quantile(vka, tprogs_quants[1:])
    vka_quants['vka_max'] = np.quantile(vka, tprogs_quants[:-1])
    vka_quants['facies'] = params.loc[tprogs_vals].Lithology.values
    # scale vertical conductivity with a vertical anisotropy factor based
    # on quantiles in the upscaled tprogs data
    for p in tprogs_vals:
        vka[(vka<vka_quants.loc[p,'vka_max'])&(vka>vka_quants.loc[p,'vka_min'])] /= params.vani[p]

    # set values for second to bottom layer, Laguna formation
    hk[-2,:,:] = params.loc[5,'K_m_d']
    vka[-2,:,:] = params.loc[5,'K_m_d']/params.loc[5,'vani'] 
    sy[-2,:,:] = params.loc[5,'Sy']
    ss[-2,:,:] = params.loc[5,'Ss']

    # set values for bottom layer, Mehrten formation
    hk[-1,:,:] = params.loc[6,'K_m_d']
    vka[-1,:,:] = params.loc[6,'K_m_d']/params.loc[6,'vani'] 
    sy[-1,:,:] = params.loc[6,'Sy']
    ss[-1,:,:] = params.loc[6,'Ss']
    
    # rather than use a variable deep geology array which is complicated to determine local effects
    # use the mean column for each layer to define a block of Low K to correct gradient in the foothill
    adj_lowK = pd.DataFrame(np.transpose(np.where(deep_geology>0)), columns=['k','i','j'])
    # the mean didn't quite extend far enough or wasn't low enough K
    # adj_lowK = adj_lowK.groupby('k').mean()['j'].astype(int)
    # trying near minimum to extend further, manually adjusted to 0.15 to align with dem_data>56
    adj_lowK = adj_lowK.groupby('k').quantile(0.15)['j'].astype(int)
    adj_lowK_arr = np.zeros((nlay,nrow,ncol))
    for k in adj_lowK.index:
        adj_lowK_arr[k, :, adj_lowK.loc[k]:] = 1
    # don't want to adjust deepest two layers?
    # this doesn't make as much sense geologically
    # adj_lowK_arr[-1] = 0
#     adj_lowK_arr[-2:] = 0
    # this is causing potentially high water levels in the foothills
    # the deep_geology array shows where the mehrten formation comes out of the surface
    hk[adj_lowK_arr.astype(bool)] = params.loc[7,'K_m_d']
    vka[adj_lowK_arr.astype(bool)] = params.loc[7,'K_m_d']*10/params.loc[7,'vani']
    sy[adj_lowK_arr.astype(bool)] = params.loc[7,'Sy']
    ss[adj_lowK_arr.astype(bool)] = params.loc[7,'Ss']
    
    # reduce sand/gravel vka for seepage in LAK/SFR assuming some fining
    seep_vka = np.copy(vka)
    # coarse cutoff was 2 m/day with sand vka_min, increased to use average of vka_min and vka_max
    coarse_cutoff = vka_quants.loc[2,['vka_min','vka_max']].mean() #vka_quants.loc[2,'vka_min'] # sand minimum
    seep_vka[seep_vka > coarse_cutoff] /= bc_params.loc['coarse_scale', 'StartValue']
    
    # apply a uniform scaling to seep_vka in tprogs area
    seep_vka[~adj_lowK_arr.astype(bool)] /= bc_params.loc['seep_vka','StartValue']
    # apply additional scaling factors by breaking columns into 5 groups
    # stp = int(ncol/5)
    # for n in np.arange(0, 5):
    #     seep_vka[:, :, n*stp:(n+1)*stp] /= bc_params.loc['seep_vka'+str(n+1), 'StartValue']

    np.savetxt(model_ws+'/porosity_arr.tsv', np.reshape(por, (nlay*nrow,ncol)),delimiter='\t')
    # layvka 0 means vka is vert K, non zero means its the anisotropy ratio between horiz and vert
    layvka = 0
    # LAYTYP MUST BE GREATER THAN ZERO WHEN IUZFOPT IS 2
    # 0 is confined, >0 convertible, <0 convertible unless the THICKSTRT option is in effect
    # try making first 20 m convertible/ unconfined, 
    num_unconf = nlay
    laytyp = np.append(np.ones(num_unconf), np.zeros(nlay-num_unconf))
    # Laywet must be 0 if laytyp is confined laywet = [1,1,1,1,1]
    laywet = np.zeros(len(laytyp))
    laywet[laytyp==1] = 1
    #ipakcb = 55 means cell-by-cell budget is saved because it is non zero (default is 53)
    gel = flopy.modflow.ModflowUpw(model = m, hk =hk, layvka = layvka, vka = vka, 
                                   sy=sy, ss=ss,
                                laytyp=laytyp, laywet = 0, ipakcb=55) # laywet must be 0 for UPW

    print('UPW done', end=' ')
    #################################################################
    ## SFR K update ##
    sfr = m.sfr
    # update VKA
    zero_cond = (sfr.reach_data.strhc1 ==0)
    sfr.reach_data.strhc1 = seep_vka[sfr.reach_data.k, sfr.reach_data.i, sfr.reach_data.j]/strhc_scale
    # make sure segments for routing have zero conductance
    sfr.reach_data.strhc1[zero_cond] = 0
    
    print('SFR done', end=' ')

    # save dataframe of stream reach data
    sfrdf = pd.DataFrame(sfr.reach_data)
    grid_sfr = grid_p.set_index(['row','column']).loc[list(zip(sfrdf.i+1,sfrdf.j+1))].reset_index(drop=True)
    grid_sfr = pd.concat((grid_sfr,sfrdf),axis=1)
    # group sfrdf by vka quantiles
    sfr_vka = vka[grid_sfr.k, grid_sfr.i, grid_sfr.j]
    grid_sfr['facies'] = ''
    for p in vka_quants.index:
        facies = vka_quants.loc[p]
        grid_sfr.loc[(sfr_vka< facies.vka_max)&(sfr_vka>= facies.vka_min),'facies'] = facies.facies
    #     # add color for facies plots
    grid_sfr = grid_sfr.join(gel_color.set_index('geology')[['color']], on='facies')
    grid_sfr.to_csv(model_ws+'/grid_sfr.csv')
    
    ###############################################################################
    ## Update LAK Package ##
    lak = m.lak
    lakarr = lak.lakarr.array[0,:] # first stress period
    # set Ksat same as vertical conductivity, 
    lkbd_thick = 2
    lkbd_K = np.copy(vka) # switched to vka instead to avoid mixing variable impact
    # lkbd_K = np.copy(seep_vka)
    lkbd_K[lak.lakarr==0] = 0 # where lake cells don't exist set K as 0
    # leakance is K/lakebed thickness
    # bdlknc = lkbd_K/lkbd_thick
    bdlknc = (lkbd_K/lkbd_thick)/bc_params.loc['bdlknc_scale', 'StartValue'] #, accounted for in seep_vka

    # have to use util_array function or flopy throws an error
    lak.bdlknc = flopy.utils.util_array.Transient3d(m, (nlay,nrow,ncol),
                                       np.float32, bdlknc, name ='bdlknc')

#     print('LAK done', end=' ')
    ###############################################################################
    ## write files ##
    gel.write_file()
    sfr.write_file()
    lak.write_file()

    ## Run the model ##
    print('.... \n')
    # run the modflow model
#     success, buff = m.run_model()
t1 = time.time()
print('Total run time %.2f hrs' % ((t1-t0)/3600))

realization005 UPW done SFR done .... 

realization087 UPW done SFR done .... 

realization071 UPW done SFR done .... 

realization059 UPW done SFR done .... 

realization089 UPW done SFR done .... 

realization094 UPW done SFR done .... 

realization021 UPW done SFR done .... 

realization081 UPW done SFR done .... 

realization095 UPW done SFR done .... 

realization082 UPW done SFR done .... 

Total run time 0.10 hrs
