In [1]:
# standard python utilities
import os
import sys
from os.path import basename, dirname, join, exists
import glob
import pandas as pd
import numpy as np
import time
from scipy.stats import gmean


# standard geospatial python utilities
# import pyproj # for converting proj4string
import shapely
import shapefile
import geopandas as gpd
from osgeo import gdal
import rasterio

# import flopy

In [2]:
doc_dir = os.getcwd()
while os.path.basename(doc_dir) != 'Documents':
    doc_dir = os.path.dirname(doc_dir)
# dir of all gwfm data
gwfm_dir = os.path.dirname(doc_dir)+'/Box/research_cosumnes/GWFlowModel'
# dir of stream level data for seepage study
proj_dir = gwfm_dir + '/Oneto_Denier/'
dat_dir = proj_dir+'Stream_level_data/'

sfr_dir = gwfm_dir+'/SFR_data/'

In [3]:
flopy_dir = doc_dir+'/GitHub/flopy'
if flopy_dir not in sys.path:
    sys.path.insert(0, flopy_dir)
    
import flopy 

In [4]:
ext_dir = 'F:/WRDAPP'
c_dir = 'C:/WRDAPP'
if os.path.exists(ext_dir):
    loadpth = ext_dir 
elif os.path.exists(c_dir):
    loadpth = c_dir 
loadpth +=  '/GWFlowModel/Cosumnes/Stream_seepage'
# all_model_ws = join(loadpth, 'parallel_oneto_denier')
# model_nam = 'inset_oneto_denier'

model_nam = 'oneto_denier_2014_2018'
all_model_ws = join(loadpth, 'parallel_oneto_denier_2014_2018')

base_model_ws = join(loadpth,model_nam)

m = flopy.modflow.Modflow.load('MF.nam', model_ws= base_model_ws, 
                                exe_name='mf-owhm.exe', version='mfnwt')


In [5]:
delr = m.dis.delr[0]
delc = m.dis.delc[0]
nrow = m.dis.nrow
ncol = m.dis.ncol
nlay = m.dis.nlay

strt_date = pd.to_datetime(m.dis.start_datetime)
end_date = (strt_date + pd.Series(m.dis.perlen.array.sum()).astype('timedelta64[D]'))[0]

time_tr0 = 1 # if a steady state period exists then offset

# Copy files independent of geology

In [9]:
# directly copy files not impacted by changing geology
# pks = ['nam','dis','nwt','bas','oc','evt', 'gage', 'hob', 'tab','wel','bath']
# pks = ['input_data/*csv']
# pks
# files = [glob.glob(base_model_ws+'/*'+p, recursive=True)[0] for p in pks]
# files

In [14]:
# copy mf files except cbc and hds
mf_files = pd.Series(glob.glob(base_model_ws+'/MF.*'))
# pks_rem = 'cbc|hds|upw|sfr|ghb|lak'
# mf_files = mf_files[~mf_files.str.contains(pks_rem).values].tolist()
pks_keep = 'wel'
mf_files = mf_files[mf_files.str.contains(pks_keep).values].tolist()

# jtfs = glob.glob(base_model_ws+'/*.jtf')
# run = glob.glob(base_model_ws+'/*py*')

# files = mf_files+jtfs+run
mf_files = pd.Series(glob.glob(base_model_ws+'/MF.*'))
# can't copy all csv files
# files = glob.glob(base_model_ws+'/*.csv') +glob.glob(base_model_ws+'/*/*.csv')
# files = glob.glob(base_model_ws+'/*.bat') 

files = mf_files + files
files

['F:/WRDAPP/GWFlowModel/Cosumnes/Stream_seepage\\oneto_denier_2014_2018\\MF.wel',
 'F:/WRDAPP/GWFlowModel/Cosumnes/Stream_seepage\\oneto_denier_2014_2018\\04_XSg_filled.csv',
 'F:/WRDAPP/GWFlowModel/Cosumnes/Stream_seepage\\oneto_denier_2014_2018\\grid_sfr.csv',
 'F:/WRDAPP/GWFlowModel/Cosumnes/Stream_seepage\\oneto_denier_2014_2018\\gwl_long.csv',
 'F:/WRDAPP/GWFlowModel/Cosumnes/Stream_seepage\\oneto_denier_2014_2018\\ZonePropertiesInitial.csv',
 'F:/WRDAPP/GWFlowModel/Cosumnes/Stream_seepage\\oneto_denier_2014_2018\\input_data\\ghbdelta_spd.csv',
 'F:/WRDAPP/GWFlowModel/Cosumnes/Stream_seepage\\oneto_denier_2014_2018\\input_data\\ghbnw_spd.csv',
 'F:/WRDAPP/GWFlowModel/Cosumnes/Stream_seepage\\oneto_denier_2014_2018\\input_data\\ghbse_spd.csv',
 'F:/WRDAPP/GWFlowModel/Cosumnes/Stream_seepage\\oneto_denier_2014_2018\\input_data\\ghbup_spd.csv',
 'F:/WRDAPP/GWFlowModel/Cosumnes/Stream_seepage\\oneto_denier_2014_2018\\input_data\\ghb_general.csv']

In [15]:
import shutil, os

for n in np.arange(0,100).astype(str):
    for f in files:
        folder = '/realization'+ n.zfill(3)+'/'
        os.makedirs(all_model_ws+folder,exist_ok=True)
        shutil.copy(f, all_model_ws+folder)

# Create files dependent on geology

In [6]:
tprogs_id=''
mf_tprogs_dir = gwfm_dir+'/UPW_data/tprogs_final'+tprogs_id+'/'
tprogs_files = glob.glob(mf_tprogs_dir+'*')


In [7]:


tprogs_fxn_dir = doc_dir +'/GitHub/CosumnesRiverRecharge/tprogs_utilities'
if tprogs_fxn_dir not in sys.path:
    sys.path.append(tprogs_fxn_dir)
# import cleaning functions for tprogs
import tprogs_cleaning as tc


In [8]:
# reference between regional and local grids
grid_match = gpd.read_file(join(proj_dir, 'GIS','grid_match.shp'))
# grid_match

In [9]:
grid_dir = join(gwfm_dir, 'DIS_data/streambed_seepage/grid')
grid_fn = join(grid_dir,  'inset_oneto_denier','rm_only_grid.shp')
grid_p = gpd.read_file(grid_fn)
grid_p.crs='epsg:32610'
m_domain = gpd.GeoDataFrame(pd.DataFrame([0]), geometry = [grid_p.unary_union], crs=grid_p.crs)

In [10]:
# m = flopy.modflow.Modflow.load('MF.nam', model_ws=base_model_ws, 
#                                 exe_name='mf-owhm.exe', version='mfnwt')
nrow = m.dis.nrow
ncol = m.dis.ncol
nlay = m.dis.nlay

botm = m.dis.botm.array
# num_tprogs = 120 (max available below levelling), upscaling
#     max_num_layers =148 # based on thickness from -6m (1 m below DEM min) to -80m
upscale = 8
#     num_tprogs = int(max_num_layers/upscale)
dem_data_p = np.loadtxt(gwfm_dir+'\DIS_data\dem_52_9_200m_mean.tsv')


  dem_data_p = np.loadtxt(gwfm_dir+'\DIS_data\dem_52_9_200m_mean.tsv')


In [11]:
def get_layer_from_elev(elev, botm_slice, nlay):
    """  Return uppermost model layer occupied at least partly by some elevation data
    Parameters
    ----------
    elev: 1D array (n) with elevations matching model elevation units
    botm: 2D array (nlay, n) with layer elevations of model using same x,y locations at elev1D
    """
    elev_lay = np.zeros(len(elev))
    for k in np.arange(0,nlay-1):
        for j in np.arange(0,len(elev)):
            if botm_slice[k,j] > elev[j]:
                elev_lay[j] = k + 1
#             if botm_slice[k,j] < elev[j]:
#                 elev_lay[j] = k 
    return(elev_lay.astype(int))

def ghb_df(rows, cols, ghb_hd, distance):
    """ Given rows and columns create GHB based on interpolated head levels"""
    # pull out head for rows and columns
    head = ghb_hd.loc[list(zip(rows, cols))].value.values
    ghb_lay = get_layer_from_elev(head, botm[:,rows, cols], m.dis.nlay)

    df = pd.DataFrame(np.zeros((np.sum(nlay - ghb_lay),5)))
    df.columns = ['k','i','j','bhead','cond']
    # get all of the i, j,k indices to reduce math done in the for loop
    n=0
    nk = -1
    for i, j in list(zip(rows,cols)):
        nk +=1
        for k in np.arange(ghb_lay[nk], nlay):
            df.loc[n,'i'] = i
            df.loc[n,'j'] = j
            df.loc[n,'k'] = k
            n+=1
    df[['k','i','j']] = df[['k','i','j']].astype(int)
    cond = hk[df.k, df.i, df.j]*(top_botm[df.k, df.i, df.j]-top_botm[df.k +1 , df.i, df.j])*delr/distance
    df.cond = cond
    df.bhead = ghb_hd.loc[list(zip(df.i, df.j))].value.values
    # drop cells where the head is below the deepest cell?
    return(df)

In [12]:
# join top and botm for easier array referencing for elevations
top_botm = np.zeros((m.dis.nlay+1,m.dis.nrow,m.dis.ncol))
top_botm[0,:,:] = m.dis.top.array
top_botm[1:,:,:] = m.dis.botm.array
    
# load pre-processed GHB dataframes
df_mon = pd.read_csv(base_model_ws+'/input_data/ghb_general.csv', index_col='date', parse_dates=['date'])
ghb_ss = df_mon.loc[strt_date].groupby(['row','column']).mean()
ghbdelta_spd = pd.read_csv(base_model_ws+'/input_data/ghbdelta_spd.csv')
# month intervals for organizing GHB
months = pd.date_range(strt_date,end_date, freq="MS")
month_intervals = (months-strt_date).days + time_tr0 # stress period for each month
# color id for facies
gel_color = pd.read_csv(join(gwfm_dir,'UPW_data', 'mf_geology_color_dict.csv'), comment='#')
gel_color.geology = gel_color.geology.str.lower()

In [212]:
# for t in np.arange(0, 100):
#     folder = 'realization'+ str(t).zfill(3)
#     # update model workspace so outputs to right directory
#     model_ws = join(all_model_ws, folder)
#     m.change_model_ws(model_ws)
#     tfn = join(model_ws, 'tprogs_local.csv')
#     os.remove(tfn)

In [16]:
t0 = time.time()
tprogs_info = [80, -80, 320]

for t in np.arange(0, 100): #100
    folder = 'realization'+ str(t).zfill(3)
    # update model workspace so outputs to right directory
    model_ws = join(all_model_ws, folder)
    m.change_model_ws(model_ws)
    print(folder, end=' ')
    ###############################################################################
    ## LPF Package ##
    # only need to write geologic paramters once in case others change then could just reload
    # initial guess for hydraulic parameters
    params = pd.read_csv(model_ws+'/ZonePropertiesInitial.csv', index_col='Zone')
    # convert from m/s to m/d
    params['K_m_d'] = params.K_m_s * 86400    

    # load TPROGs data
    tfn = join(model_ws, 'tprogs_local.csv')
    if not exists(tfn):
        tprogs_line = np.loadtxt(tprogs_files[t])
        # filter elevation by regional model
        masked_tprogs= tc.tprogs_cut_elev(tprogs_line, dem_data_p, tprogs_info)
        # subset masked data to local model
        masked_tprogs_local = np.zeros((tprogs_info[2], nrow, ncol))
        masked_tprogs_local[:, grid_match.row-1, grid_match.column-1] = masked_tprogs[:,grid_match.p_row-1, grid_match.p_column-1]
        tdim = masked_tprogs_local.shape
        np.savetxt(tfn, np.reshape(masked_tprogs_local, (tprogs_info[-1]*nrow, ncol)))
    else:
        masked_tprogs_local = np.reshape(np.loadtxt(tfn),(tprogs_info[-1], nrow, ncol))
    # convert from facies to real values
    K, Sy, Ss,porosity = tc.int_to_param(masked_tprogs_local, params, porosity=True)

    hk = np.zeros(botm.shape)
    vka = np.zeros(botm.shape)
    sy = np.zeros(botm.shape)
    ss = np.zeros(botm.shape)
    por = np.zeros(botm.shape)

    top = np.copy(m.dis.top.array)
    bot1 = np.copy(botm[-1,:,:])
    # tprogs_info = ()
    from scipy.stats import hmean, gmean

    # I need to verify if a flattening layer is needed (e.g., variable thickness to maintain TPROGs connectivity)
    # pull out the TPROGS data for the corresponding depths
    K_c = tc.get_tprogs_for_elev(K, top, bot1,tprogs_info)
    Ss_c = tc.get_tprogs_for_elev(Ss, top, bot1,tprogs_info)
    Sy_c = tc.get_tprogs_for_elev(Sy, top, bot1,tprogs_info)
    n_c = tc.get_tprogs_for_elev(porosity, top, bot1,tprogs_info)

    # upscale as preset
    for k in np.arange(0,nlay):
        hk[k,:] = np.mean(K_c[upscale*k:upscale*(k+1)], axis=0)
        vka[k,:] = hmean(K_c[upscale*k:upscale*(k+1)], axis=0)
        ss[k,:] = np.mean(Ss_c[upscale*k:upscale*(k+1)], axis=0)
        sy[k,:] = np.mean(Sy_c[upscale*k:upscale*(k+1)], axis=0)
        por[k,:] = np.mean(n_c[upscale*k:upscale*(k+1)], axis=0)

    np.savetxt(model_ws+'/porosity_arr.tsv', np.reshape(por, (nlay*nrow,ncol)),delimiter='\t')
        # check proportions of hydrofacies in TPROGs realization
    tprogs_hist = np.histogram(masked_tprogs_local, [0,1.1,2.1,3.1,4.1])[0]
    tprogs_hist = tprogs_hist/np.sum(tprogs_hist)
    tprogs_quants = 1 - np.append([0], np.cumsum(tprogs_hist)/np.sum(tprogs_hist))
    # scale vertical conductivity with a vertical anisotropy factor based
    # on quantiles in the upscaled tprogs data
    for n, p in enumerate(np.arange(1,5)):
#         vka[vka >np.quantile(vka, (1-tprogs_hist[n]))] /= params.vani[p]
        vmax = np.quantile(vka, tprogs_quants[n])
        vmin = np.quantile(vka, tprogs_quants[n+1])
        vka[(vka<vmax)&(vka>vmin)] /= params.vani[p]
    # reduce sand/gravel vka for seepage in LAK/SFR assuming some fining
    seep_vka = np.copy(vka)
    coarse_cutoff = np.quantile(seep_vka, 1-tprogs_hist.cumsum()[1])
    seep_vka[seep_vka > coarse_cutoff] /= 10

    # layvka 0 means vka is vert K, non zero means its the anisotropy ratio between horiz and vert
    layvka = 0
    # LAYTYP MUST BE GREATER THAN ZERO WHEN IUZFOPT IS 2
    # 0 is confined, >0 convertible, <0 convertible unless the THICKSTRT option is in effect
    # try making first 5 layers convertible/ unconfined, 
    num_unconf = 5
    laytyp = np.append(np.ones(num_unconf), np.zeros(nlay-num_unconf))
    # Laywet must be 0 if laytyp is confined laywet = [1,1,1,1,1]
    laywet = np.zeros(len(laytyp))
    laywet[laytyp==1] = 1
    #ipakcb = 55 means cell-by-cell budget is saved because it is non zero (default is 53)
    gel = flopy.modflow.ModflowUpw(model = m, hk =hk, layvka = layvka, vka = vka, 
                                   sy=sy, ss=ss,
                                laytyp=laytyp, laywet = 0, ipakcb=55) # laywet must be 0 for UPW

    gel.write_file()
    print('UPW done', end=' ')
    #################################################################
    ## SFR K update ##
    sfr = m.sfr
    # update VKA
    zero_cond = (sfr.reach_data.strhc1 ==0)
    sfr.reach_data.strhc1 = seep_vka[sfr.reach_data.k, sfr.reach_data.i, sfr.reach_data.j] 
    # make sure segments for routing have zero conductance
    sfr.reach_data.strhc1[zero_cond] = 0
    
    sfr.write_file()
    print('SFR done', end=' ')

    # save dataframe of stream reach data
    sfrdf = pd.DataFrame(sfr.reach_data)
    grid_sfr = grid_p.set_index(['row','column']).loc[list(zip(sfrdf.i+1,sfrdf.j+1))].reset_index(drop=True)
    grid_sfr = pd.concat((grid_sfr,sfrdf),axis=1)
    # group sfrdf by vka quantiles
    vka_quants = pd.Series(np.quantile(vka, tprogs_quants[1:]))
    vka_quants.index=['mud','sandy mud','sand','gravel']
    grid_sfr['facies'] = 'mud'
    for n in np.arange(0,len(vka_quants)-1):
        grid_sfr.loc[vka[grid_sfr.k, grid_sfr.i, grid_sfr.j] > vka_quants.iloc[n],'facies'] = vka_quants.index[n+1]
#     # add color for facies plots
    grid_sfr = grid_sfr.join(gel_color.set_index('geology')[['color']], on='facies')
    grid_sfr.to_csv(model_ws+'/grid_sfr.csv')
    
    ###############################################################################
    ## GHB Package ##
    
    ghb_dict = {}
    # set steady state period
    ghb_all_ss = ghb_df(ghb_ss.index.get_level_values('row'),ghb_ss.index.get_level_values('column'),
                        ghb_ss, distance = 500)
    ghb_dict[0] = pd.concat((ghb_all_ss, ghbdelta_spd)).values

    for n in np.arange(0, len(months)):
        df_spd = df_mon.loc[months[n]]
        spd = month_intervals[n]
        ghb_gen = ghb_df(df_spd.row, df_spd.column, df_spd.set_index(['row','column']), distance = 500)
        ghb_dict[spd] = pd.concat((ghb_gen, ghbdelta_spd)).values
    
    # create GHB for flopy
    ghb = flopy.modflow.ModflowGhb(model = m,stress_period_data =  ghb_dict, ipakcb=55)
    # overwrite the previous ghb file with updated version
    ghb.write_file()

    print('GHB done', end=' ')

    ###############################################################################
    ## Update LAK Package ##
    lak = m.lak
    lakarr = lak.lakarr.array[0,:] # first stress period
    # set Ksat same as vertical conductivity, 
    lkbd_thick = 2
    lkbd_K = np.copy(seep_vka)
    lkbd_K[lak.lakarr==0] = 0 # where lake cells don't exist set K as 0
    # leakance is K/lakebed thickness
    bdlknc = lkbd_K/lkbd_thick
    # have to use util_array function or flopy throws an error
    lak.bdlknc = flopy.utils.util_array.Transient3d(m, (nlay,nrow,ncol),
                                       np.float32, bdlknc, name ='bdlknc')
    lak.write_file()

    print('LAK done', end=' ')
    ###############################################################################
    ## Run the model ##
    print('.... \n')
    # run the modflow model
#     success, buff = m.run_model()
t1 = time.time()
print('Total run time %.2f hrs' % ((t1-t0)/3600))

realization000 UPW done SFR done GHB done LAK done .... 

realization001 UPW done SFR done GHB done LAK done .... 

realization002 UPW done SFR done GHB done LAK done .... 

realization003 UPW done SFR done GHB done LAK done .... 

realization004 UPW done SFR done GHB done LAK done .... 

realization005 UPW done SFR done GHB done LAK done .... 

realization006 UPW done SFR done GHB done LAK done .... 

realization007 UPW done SFR done GHB done LAK done .... 

realization008 UPW done SFR done GHB done LAK done .... 

realization009 UPW done SFR done GHB done LAK done .... 

realization010 UPW done SFR done GHB done LAK done .... 

realization011 UPW done SFR done GHB done LAK done .... 

realization012 UPW done SFR done GHB done LAK done .... 

realization013 UPW done SFR done GHB done LAK done .... 

realization014 UPW done SFR done GHB done LAK done .... 

realization015 UPW done SFR done GHB done LAK done .... 

realization016 UPW done SFR done GHB done LAK done .... 

realization017

In [197]:
top = np.copy(m.dis.top.array)
bot1 = np.copy(botm[-1,:,:])
# tprogs_info = ()
from scipy.stats import hmean, gmean

for t in np.arange(0, 100): #100
    folder = 'realization'+ str(t).zfill(3)
    # update model workspace so outputs to right directory
    model_ws = join(all_model_ws, folder)## set up grouping to classify each stream segment as gravel through mud
    params = pd.read_csv(model_ws+'/ZonePropertiesInitial.csv', index_col='Zone')
    # convert from m/s to m/d
    params['K_m_d'] = params.K_m_s * 86400  
    tfn = join(model_ws, 'tprogs_local.csv')
    masked_tprogs_local = np.reshape(np.loadtxt(tfn),(tprogs_info[-1], nrow, ncol))
    # convert from facies to real values
    K, Sy, Ss,porosity = tc.int_to_param(masked_tprogs_local, params, porosity=True)

    hk = np.zeros(botm.shape)
    vka = np.zeros(botm.shape)
    # I need to verify if a flattening layer is needed (e.g., variable thickness to maintain TPROGs connectivity)
    # pull out the TPROGS data for the corresponding depths
    K_c = tc.get_tprogs_for_elev(K, top, bot1,tprogs_info)
    # upscale as preset
    for k in np.arange(0,nlay):
        hk[k,:] = np.mean(K_c[upscale*k:upscale*(k+1)], axis=0)
        vka[k,:] = hmean(K_c[upscale*k:upscale*(k+1)], axis=0)

    # check proportions of hydrofacies in TPROGs realization
    tprogs_hist = np.histogram(masked_tprogs_local, [0,1.1,2.1,3.1,4.1])[0]
    tprogs_hist = tprogs_hist/np.sum(tprogs_hist)
    tprogs_quants = np.flip(1 - np.append([0], np.cumsum(tprogs_hist)/np.sum(tprogs_hist)))
    for n in np.arange(0,4):
        vmax = np.quantile(vka, tprogs_quants[n+1])
        vmin = np.quantile(vka, tprogs_quants[n])
        vka[(vka<vmax)&(vka>vmin)]
    # reduce sand/gravel vka for seepage in LAK/SFR assuming some fining
    seep_vka = np.copy(vka)
    coarse_cutoff = np.quantile(seep_vka, 1-tprogs_hist.cumsum()[1])
    seep_vka[seep_vka > coarse_cutoff] /= 10  
    sfr = m.sfr
    # update VKA
    zero_cond = (sfr.reach_data.strhc1 ==0)
    sfr.reach_data.strhc1 = seep_vka[sfr.reach_data.k, sfr.reach_data.i, sfr.reach_data.j] 
    # make sure segments for routing have zero conductance
    sfr.reach_data.strhc1[zero_cond] = 0
    # save dataframe of stream reach data
    sfrdf = pd.DataFrame(sfr.reach_data)
    grid_sfr = grid_p.set_index(['row','column']).loc[list(zip(sfrdf.i+1,sfrdf.j+1))].reset_index(drop=True)
    grid_sfr = pd.concat((grid_sfr,sfrdf),axis=1)
    # group sfrdf by vka quantiles
    vka_quants = pd.Series(np.quantile(vka, tprogs_quants[1:]))
    vka_quants.index=['mud','sandy mud','sand','gravel']
    grid_sfr['facies'] = 'mud'
    for n in np.arange(0,len(vka_quants)-1):
        grid_sfr.loc[vka[grid_sfr.k, grid_sfr.i, grid_sfr.j] > vka_quants.iloc[n],'facies'] = vka_quants.index[n+1]
#     # add color for facies plots
    gel_color = pd.read_csv(join(gwfm_dir,'UPW_data', 'mf_geology_color_dict.csv'), comment='#')
    gel_color.geology = gel_color.geology.str.lower()
    grid_sfr = grid_sfr.join(gel_color.set_index('geology')[['color']], on='facies')
    grid_sfr.to_csv(model_ws+'/grid_sfr.csv')