In [None]:
# standard python utilities
import os
from os.path import basename, dirname, join, exists
import sys
import glob
import pandas as pd
import numpy as np
import calendar
import time

# standard python plotting utilities
import matplotlib as mpl
import matplotlib.pyplot as plt

# standard geospatial python utilities
import pyproj # for converting proj4string
import shapely
import geopandas as gpd
import rasterio

# mapping utilities
import contextily as ctx
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import matplotlib.font_manager as fm



In [None]:
doc_dir = os.getcwd()
while basename(doc_dir) != 'Documents':
    doc_dir = dirname(doc_dir)
    
# dir of all gwfm data
gwfm_dir = join(dirname(doc_dir),'Box/research_cosumnes/GWFlowModel')

flopy_dir = doc_dir+'/GitHub/flopy'
if flopy_dir not in sys.path:
    sys.path.insert(0, flopy_dir)
import flopy 

import flopy.utils.binaryfile as bf


In [None]:
# set box directory for output figures and data
box_dir = gwfm_dir+'/Levee_setback/levee_setback_distance_analysis/'

# tprogs_id = '' # original tprogs with conditioning data in output tsim
tprogs_id = '_no_conditioning'

data_dir = box_dir+ tprogs_id+'/data_output/'
fig_dir = box_dir+tprogs_id+'/figures/'
#
chan_dir = box_dir+'channel_data/'
gis_dir = chan_dir+'GIS/'

check_stream_loc = False
# fig_dir = box_dir+'north_shifted_stream_distance_analysis/figures/'


## Plotting head raster and contours

In [None]:
ext_dir = 'F:/WRDAPP'
c_dir = 'C:/WRDAPP'

if os.path.exists(ext_dir):
    loadpth = ext_dir 
elif os.path.exists(c_dir):
    loadpth = c_dir 

loadpth = loadpth +'/GWFlowModel/Cosumnes/levee_setback/setback_distance_analysis/'
model_ws = loadpth+'Permeameter_for_velocity' + tprogs_id

In [None]:

name = 'MF.nam'
# name = 'MF_child.nam'
m = flopy.modflow.Modflow.load(name, model_ws=model_ws, 
                                exe_name='mf2005', version='mf2005')


In [None]:
# cbb = flopy.utils.CellBudgetFile(model_ws+'/MF.upw.cbc')
cbb = flopy.utils.CellBudgetFile(model_ws+'/MF.cbc')

hdobj = flopy.utils.HeadFile(model_ws+'/MF.hds')
spd_stp = hdobj.get_kstpkper()
times = hdobj.get_times()


In [None]:
# create dates to match totim
# strt_date = pd.to_datetime(m.dis.start_datetime)
# end_date = strt_date+pd.DateOffset(months=m.dis.nper-1)
# dates = strt_date+pd.to_timedelta(zb_df.index-1, unit = 'days')

In [None]:
# create dataframe referencing between date, totim and kstp,kper
# time_spd = pd.DataFrame(np.transpose(np.array([times,spd_stp],dtype='object')),columns = ['totim','spd_stp'])
# time_spd.index = dates
# time_spd.totim = pd.to_numeric(time_spd.totim)

## Load SFR grid for buffering

In [None]:
# dem data for cropping above land surface
dem_data = np.loadtxt(gwfm_dir+'/DIS_data/dem_52_9_200m_linear.tsv')

In [None]:
grid_sfr = gpd.read_file(gwfm_dir+'/SFR_data/final_grid_sfr/grid_sfr.shp')
grid_p = gpd.read_file(gwfm_dir+'/DIS_data/grid/grid.shp')
local_setback = gpd.read_file(gwfm_dir+'/levee_setback/local_levee_setback_rectangles/local_levee_setback_rectangles.shp')
local_setback = local_setback.dropna()

In [None]:


if check_stream_loc == True:
    # offset river by 3000 m (15 cells northwest) to verify analysis
    new_sfr_geom = grid_p.set_index(['row','column']).loc[list(zip(grid_sfr.row-15, grid_sfr.column))].reset_index()
    fig,ax=plt.subplots()
    new_sfr_geom.plot(ax=ax)
    grid_sfr.plot(ax=ax)
    # m_domain.plot(ax=ax)
        # analysis checking results when channel is moved
    fig_dir = box_dir+'figures/north_shifted_stream_distance_analysis/'
    data_dir = box_dir+'data_output/north_shifted_stream_distance_analysis/'

    grid_sfr[['row','column','node','geometry']] = new_sfr_geom

In [None]:
# define regional zones by splitting up grid into 3 sections (upper, middle and lower cosumnes)
nmx=3
local_setback = grid_p.copy()
local_setback['id'] = 1
for n in np.arange(1,nmx):
    local_setback.loc[local_setback.column > n*grid_p.column.max()/nmx, 'id'] = n+1
# dissolve grid regions into group polygons
local_setback = local_setback.dissolve('id').reset_index()
# match previous structure for easier code reuse
local_setback['location'] = ['Lower','Middle','Upper']
local_setback = local_setback.drop(columns=['node','row','column'])

In [None]:
# random puts them too close together 
# grid_sfr.loc[np.random.randint(0,len(grid_sfr),size=(5))].plot()
# better to pick 5 unique locations based on regional setting
# Oneto-Denier (~200), Mahon Ranch (150), Teichert (100), Rooney (80), Michigan Bar (10)
fig,ax=plt.subplots(figsize=(6,6))
# grid_sfr.loc[[10, 80, 100, 150, 200]].plot('reach',ax=ax, legend=True)
local_setback.plot(color="None",edgecolor='black', ax=ax)
ctx.add_basemap(source = ctx.providers.Esri.WorldImagery, crs = 'epsg:26910', attribution=False,ax=ax)

plt.ticklabel_format(style='plain')

### Create 3D array of cells included in river setback

In [None]:
## instead of trying to crop the shapefile with the straightlines, just join the local rectangle to the model grid
## then after calculating all the levee setbacks simply apply a logical array based on where the rectangles are
local_setback_grid = gpd.overlay(grid_p, local_setback)

In [None]:
buf_sfr = grid_sfr.copy()
setbacks = np.arange(0, 3400,200)
str_setbacks = np.zeros((len(setbacks),m.dis.nrow,m.dis.ncol))
str_setbacks_local = np.zeros((len(setbacks),m.dis.nrow,m.dis.ncol))

# grid_sfr.plot()
for n in np.arange(0,len(setbacks)):
    buf_sfr.geometry = grid_sfr.buffer(setbacks[n])
    grid_sfr_buf = gpd.sjoin(grid_p,buf_sfr, how='right', lsuffix = 'grid', rsuffix = 'sfr',predicate='within')
    grid_sfr_buf = grid_sfr_buf.drop_duplicates('node_grid')
    # clip to local setback sites
    grid_sfr_buf_clipped = gpd.sjoin(grid_sfr_buf,local_setback, predicate='intersects',how='right')
    # individually identify local setback sites in arrays
    for t in np.arange(0,3):
        clip_vals = grid_sfr_buf_clipped.loc[grid_sfr_buf_clipped.id==t+1,:]
        str_setbacks_local[n,clip_vals.row_grid.values-1,clip_vals.column_grid.values-1] = t+1
    str_setbacks[n, grid_sfr_buf.row_grid.values-1,grid_sfr_buf.column_grid.values-1] = 1


In [None]:
# pull out local sites only for setback analysis
local_grid_bool = np.zeros((m.dis.nrow,m.dis.ncol))
for n,s in enumerate(local_setback_grid.location.unique()):
    df = local_setback_grid[local_setback_grid.location==s]
    local_grid_bool[df.row-1, df.column-1] = n+1
# crop setbacks to the polygon widths
local_str_setbacks = str_setbacks*local_grid_bool


In [None]:
# local_grid_bool = np.zeros((m.dis.nrow,m.dis.ncol))
# local_grid_bool[local_setback_grid.row-1, local_setback_grid.column-1] = 1
plt.imshow(local_str_setbacks[16,:,:])
plt.colorbar(shrink=0.5)
plt.show()

In [None]:
# code to set bounds of raster, but need to adjust to account for angle
#doesn't work with angle
# xmin, ymin, xmax, ymax = grid_p.geometry.total_bounds
# xmax, ymax = grid_p.geometry.bounds.max().loc[['maxx','maxy']]
# xmin, ymin = grid_p.geometry.bounds.min().loc[['maxx','maxy']]


In [None]:
def plt_setbacks_labeled(str_setbacks, label):
    str_setbacks_plt = np.copy(str_setbacks)
    # make non-setback cells masked
    str_setbacks_plt = np.ma.masked_where(str_setbacks_plt==0, str_setbacks_plt)
    # flip cumsum to make first setback 1 and last setback 10
    str_setbacks_plt = (str_setbacks_plt.sum(axis=0)-(len(setbacks)+1))*-1

    # plot setbacks labeled 
    fig,ax = plt.subplots(figsize=(12,6))
    cmap = plt.get_cmap('viridis',len(setbacks)+1)
    im=ax.imshow(str_setbacks_plt,  cmap=cmap, origin='upper',aspect=1)
    plt.colorbar(mappable = im, ax = ax, ticks = np.arange(0,len(setbacks)+1), shrink=0.7)
    plt.savefig(fig_dir+'setback_distances_raster_'+label+'.png',dpi=600)
    np.savetxt(chan_dir+ label+'_str_setback_id_arr.tsv', str_setbacks_plt, delimiter='\t')

plt_setbacks_labeled(str_setbacks,'regional')
# plt_setbacks_labeled(local_str_setbacks, 'local')

In [None]:
import h5py
# convert arrays of setback locations to hdf5 files 
f = h5py.File(join(chan_dir, 'setback_locs.hdf5'), "w")

grp = f.require_group('setbacks') # makes sure group exists
grp.attrs['description'] = 'Arrays identifying the cells included in each setback broken by layer'
dset = grp.require_dataset('local', local_str_setbacks.shape, dtype='f', compression="gzip", compression_opts=4)
dset[:] = local_str_setbacks
dset = grp.require_dataset('regional', str_setbacks.shape, dtype='f', compression="gzip", compression_opts=4)
dset[:] = str_setbacks
f.close()

## Vertical Groundwater Velocity Analysis
Two methods:
1. Simply count the number of cells (area/volume) at land surface within the setback
2. Count the total number of groups of cells (requires upscaling?)

In [None]:
# fig,ax = plt.subplots(figsize=(8,8))

# mapview = flopy.plot.PlotMapView(model=m,ax=ax)
# linecollection = mapview.plot_grid(linewidths = 0.1)

extcbb = flopy.utils.postprocessing.get_extended_budget(cbb)

(qx, qy, qz) = flopy.utils.postprocessing.get_specific_discharge(vectors = extcbb, model=m)
# mapview.plot_vector(qx, qy, istep=10, jstep=10)
qz *= -1
qy *= -1
qx *= -1

In [None]:
# get percentiles to determine which to plot
qz_quants = np.percentile(qz, [0,25,50,75,99,99.9])
qz_quants

In [None]:
# Add local path to sys.path for importing scripts
# sys.path.append(git_dir+'/01_python_scripts')
# sys.path

In [None]:
def elev_to_tprogs_layers(elev, tprogs_top_elev, tprogs_bot_elev, num_lays):
    """
    function to get the tprogs layers based on the given elevation
    Example
    layer 0 is 80 meters, layer 1 is 79.5 meters, layer -1 is -80 meters
    """
    lay_thick = (tprogs_top_elev - tprogs_bot_elev)/num_lays
    elev_round = np.round((elev) * (1/lay_thick)) / (1/lay_thick) # dem rounded to the layer thickness
    elev_round[elev_round >= tprogs_top_elev] = tprogs_top_elev# any elevation above the top is set to the top
    # subtract the calculated row from top elev divided by layer thickness to get to index 0 at top and index 320 and bottom
    elev_indices = tprogs_top_elev/lay_thick - elev_round*(1/lay_thick) 
    return(elev_indices.astype(int))

# tprogs_cleaning.get_tprogs_for_elev(dem_data)
tprogs_lay = elev_to_tprogs_layers(elev=dem_data,tprogs_top_elev=80, tprogs_bot_elev=-80, num_lays=320)
# elev_to_tprogs_layers?
rows = np.where(np.ones(tprogs_lay.shape)==1)[0]
cols = np.where(np.ones(tprogs_lay.shape)==1)[1]

Using 50th percentile means 50% would be connected and 75th would mean 25% are connected; but sand and gravel take up 24% of the domain volume (by TPROGs) so this should be the upper limit of cells connected, at least the 76th percentile must be used. And according to the paper by Thomas on 3D connectivity only 13% are needed for connection, so the actual value of connected cells should be less than 24% but greater than 13%. The 85th percentile seems to be a good alternative to look for truly the most high flow. Alisha suggested reviewers will ask why a certain perctile was used, thus I should apply the 87th to align with Thomas or use the absolute maximum of 99th per Alisha suggestion.

The mean length for gravel and sands is 1300 and 1100 m in the X direction and 450 m in the Y direction (general setback) direction. Initially plotting for 1 realization seems to show and increase in slope after 400m, suggesting mean lengths are a good indicator for setback distances and that setback should be at least the mean length at the 85th percentile, increasing the percentile to 95th showed a bigger increase in slope at 1000m. It may be worth testing this hypothesis for the 85th and 95th percentile.

In [None]:
folder = '/realization'+ str(0).zfill(3)+'/'
run_ws = model_ws+folder

# iterable part
cbb = flopy.utils.CellBudgetFile(run_ws+'/MF.cbc')
# load velocity in z direction
extcbb = flopy.utils.postprocessing.get_extended_budget(cbb)

(qx, qy, qz) = flopy.utils.postprocessing.get_specific_discharge(vectors = extcbb, model=m)
# convert flow to positive as it is all moving in the downward, -z direction
# q = qz * -1 # not a good indicator at all
# much better to use magntiude of velocity vector
q = np.sqrt(qx**2 + qy**2 + qz**2)

In [None]:
colors = ['lightgray', 'blue']
scale = [0, 1]
cmap=mpl.colors.ListedColormap(colors)
norm=mpl.colors.BoundaryNorm(scale, len(colors))


In [None]:
# get high conductivity at ground surface
q_plt = np.zeros((100,230))
q_plt[rows,cols] = q[tprogs_lay[rows,cols],rows,cols] 


fig, ax = plt.subplots(3,1,figsize=(10,6), sharex=True)
fig.tight_layout(h_pad=2)
# cmap = mpl.cm.get_cmap('viridis', 2)    # 2 discrete colors

percentile_check = [87, 95, 99]
for n,i in enumerate(percentile_check):
    # split cells into low and high conductivity, based on chosen flow percentile
    q_lay = np.zeros((100,230))
    q_lay[q_plt >= np.percentile(q_plt,i)] = 1
    im = ax[n].imshow(q_lay,cmap=cmap)
    ax[n].set_title(str(i)+'th Percentile')

# color bar on all rows, with two discrete labels
cbar=plt.colorbar(mappable = im, ax=ax,  ticks = [0.25,.75], shrink= 0.7)
cbar.ax.set_yticklabels(['Low flow facies', 'High flow facies'])
# fig.tight_layout()
plt.savefig(fig_dir+'Comparison of high flow facies at ground surface with 87, 95, 99th percentiles.png',dpi=600)

In [None]:
flow_percentile = 95

# split cells into low and high conductivity, based on chosen flow percentile
q_lay = np.zeros((320, 100,230))
q_lay[q >= np.percentile(q,flow_percentile)] = 1


# get high conductivity at ground surface
q_plt = np.zeros((100,230))
q_plt[rows,cols] = q_lay[tprogs_lay[rows,cols],rows,cols] 



In [None]:

def plt_high_recharge_setback(qz_lay, str_setbacks,label):
    nx = 5
    ny = int(len(setbacks)/nx)
    fig, ax = plt.subplots(ny,nx,figsize=(10,6), sharex=True,sharey=True)
    fig.tight_layout(h_pad=1) # space between plots
#     cmap = mpl.cm.get_cmap('viridis', 2)    # 2 discrete colors
    buf_sfr = grid_sfr.copy()

    n=0
    for x in np.arange(0,ny):
        for y in np.arange(0,nx):
            temp = np.zeros(qz_lay.shape)
            temp[str_setbacks[n,:,:].astype('bool')] = qz_lay[str_setbacks[n,:,:].astype('bool')]
            mapview = flopy.plot.PlotMapView(model=m,ax=ax[x,y])
            im = mapview.plot_array(temp, cmap=cmap)
            ax[x,y].ticklabel_format(style='plain')
#             im = ax[x,y].imshow(temp,cmap=cmap)
            ax[x,y].set_title(str(200+200*n)+'m Setback')
            grid_sfr.plot(ax=ax[x,y], color='black')
            sfr_union = gpd.GeoDataFrame(pd.DataFrame([0]), geometry = [grid_sfr.unary_union], crs='epsg:3310')
            sfr_union.geometry = sfr_union.buffer(setbacks[n])
            sfr_union.plot(color="None", edgecolor='black',ax=ax[x,y], linewidth = 0.2)
#             sfr_union.plot(color="None", edgecolor='black',ax=ax[x,y])
            n+=1

    # color bar on all rows, with two discrete labels
    cbar=plt.colorbar(mappable = im, ax=ax,  ticks = [0.25,.75], shrink= 0.7)
    cbar.ax.set_yticklabels(['Low flow facies', 'High flow facies'])

#     plt.savefig(fig_dir+'Comparison of ' + str(flow_percentile)+'th flow facies for '+ label+' setback distances.png',dpi=600)
plt_high_recharge_setback(q_plt, str_setbacks,label='regional')
# plt_high_recharge_setback(q_plt, local_str_setbacks,label='local')

The plots of setback distance and high flow cells at ground surface make the 99th percentile look very sparse and the 95th percentile look more reasonable.

We want to vectorize the raster to find:
1. The number of unique high flow groups
2. The area of each high flow group
3. The mean, max, min, median and variance of area
3. Location? Not needed as it will be summed anyway  

In a dataframe there will be one one row for each realization times the number of setbacks with columns for the mean, median, max, min, and variance of the area of high flow cells.
Need to iterate over 1. realization 2. setbacks to adjust what are considered high flow cells 3. count groups and area of high flow cells. Must iterate over setback first because a setback may split a group into two or cut off part of a group unlike the cumulative area analysis.

In [None]:
#             qz_lay_setback = np.zeros((100,230)) # overlap high flow cells with setback distance
#             qz_lay_setback[str_setbacks[0,:,:].astype('bool')] = qz_lay[str_setbacks[0,:,:].astype('bool')]
# #             hf = qz_lay[str_setbacks[n,:,:].astype('bool')]
#             hf_tot[r,n] = qz_lay_setback.sum() # hf.sum()
#             qz_lay_setback_local = np.zeros((100,230)) # overlap high flow cells with local setback distance
#             qz_lay_setback_local[local_str_setbacks[0,:,:].astype('bool')] = qz_lay[local_str_setbacks[0,:,:].astype('bool')]
#             #hf_local = qz_lay[local_str_setbacks[n,:,:].astype('bool')]
#             hf_tot_local[r,n] = qz_lay_setback_local.sum()#hf_local.sum()

In [None]:
def highflow_at_groundsurface(run_ws, flow_percentile):
    ''' take Cell by Cell budget file and finds high flow cells by percentile
    then finds those that outcrop at ground surface'''
    cbb = flopy.utils.CellBudgetFile(run_ws+'/MF.cbc')
    # load velocity in z direction
    extcbb = flopy.utils.postprocessing.get_extended_budget(cbb)
    (qx, qy, qz) = flopy.utils.postprocessing.get_specific_discharge(vectors = extcbb, model=m)
    # convert flow to positive as it is all moving in the downward, -z direction
    # q = qz * -1 # not a good indicator at all
    # much better to use magntiude of velocity vector
    q = np.sqrt(qx**2 + qy**2 + qz**2)
    # split cells into low and high conductivity, based on chosen flow percentile
    q_lay = np.zeros((320, 100,230))
    q_lay[q >= np.percentile(q,flow_percentile)] = 1

    # get high conductivity at ground surface
    q_plt = np.zeros((100,230))
    q_plt[rows,cols] = q_lay[tprogs_lay[rows,cols],rows,cols] 

    return(q_plt)

In [None]:
def overlay_hf_setback(q_lay, str_setbacks):
    q_lay_setback = np.zeros((100,230)) # overlap high flow cells with setback distance
    q_lay_setback[str_setbacks.astype('bool')] = q_lay[str_setbacks.astype('bool')]
    #             hf = qz_lay[str_setbacks[n,:,:].astype('bool')]
    return(q_lay_setback) # hf.sum()


In [None]:
from shapely.ops import Polygon
from rasterio.features import shapes, rasterize
# from shapely.geometry import MultiPoint
#, LineString, linemerge, polygonize, unary_union
# import pprint

def calc_area_stats(r, s, l, q_lay, stat_cols):
    ''' for a given realizaiton and setback, find unique cell groups and take summary stats'''
    # The function shapes from rasterio requires uint8 format
    q_lay_uint = q_lay.astype(rasterio.uint8)
    # 'Values of False or 0 will be excluded from feature generation'
    out = shapes(q_lay_uint, mask=q_lay.astype(bool), connectivity = 8)
    alldata = list(out)
    cell_stats = pd.DataFrame(np.zeros((1,len(stat_cols))), columns=stat_cols)
    num_cells = np.zeros((len(alldata)))
    # iterate over all high flow cell groups
    for i in np.arange(0,len(alldata)):
        # coordinates are in terms of row and column number
        grp_coords = alldata[i][0].get('coordinates')[0]
        # the polygon area corresponds to the number of cells included
        grp_poly = Polygon(grp_coords)
        # grp_poly = MultiPoint(temp) # method to check corners
        num_cells[i] = grp_poly.area
    if len(num_cells)!=0: #if a realization has no cells then leave as zeros
        # calculate statistics for given realization, setback
        cell_stats = cell_stats.assign(Num_Grps = len(num_cells), Mean = num_cells.mean(), Median = np.median(num_cells), 
                                       Min = num_cells.min(),Max = num_cells.max(), Variance = num_cells.var(),
                                       Realization = r, Setback = s)
    if 'Location' in stat_cols:
        cell_stats.Location = l
    return(cell_stats)

In [None]:

def high_flow_count(flow_percentile, str_setbacks, local_str_setbacks):
    tic = time.time()
    # will count total number of cells for each setback distance and for all 100 realizations
    hf_tot = np.zeros((100,len(setbacks)))
    # layer for each local setback
    num_sites = len(np.unique(local_str_setbacks))-1
    hf_tot_local = np.zeros((num_sites, 100,len(setbacks)))
    hf_all = np.zeros((100, 100, 230)) # map high flow for each realization
    
    # dataframe for grouping and area analysis
    stat_cols = ['Num_Grps','Mean','Median','Min','Max','Variance','Realization', 'Setback']
    cell_stats_all = pd.DataFrame(np.zeros((100*len(str_setbacks),len(stat_cols))), columns=stat_cols)
    local_cols = stat_cols+['Location']
    cell_stats_all_local = pd.DataFrame(np.zeros((num_sites*100*len(str_setbacks),len(local_cols))), columns=local_cols)
    
    k=0 # counter 
    kl = 0 # local counter
    for r in np.arange(0,100):
        print('Realization', r, ' time since start ',(time.time()-tic)/60)
        folder = '/realization'+ str(r).zfill(3)+'/'
        run_ws = model_ws+folder
        
        q_lay = highflow_at_groundsurface(run_ws, flow_percentile)
        hf_all[r,:] = np.copy(q_lay)
        # complete analysis for regional and local setbacks
        for n in np.arange(0,len(setbacks)):
            # overlay high flow cells with setback distance
            q_lay_setback = overlay_hf_setback(q_lay, str_setbacks[n,:,:])
            # calculate total cells in each setback
            hf_tot[r,n] = q_lay_setback.sum()
            # calculate high flow groups and summary statistics
            cell_stats_all.iloc[k] = calc_area_stats(r,n, 0, q_lay_setback, stat_cols)
            # iterate over local setbacks
            for l in np.unique(local_str_setbacks)[1:].astype(int):
                arr = np.zeros(local_str_setbacks[n,:,:].shape)
                arr[local_str_setbacks[n,::]==l] = 1
                q_lay_setback_local = overlay_hf_setback(q_lay, arr)
                hf_tot_local[l-1,r,n] = q_lay_setback_local.sum()
                cell_stats_all_local.iloc[kl] = calc_area_stats(r,n,l, q_lay_setback_local, local_cols)
                kl+=1
            k +=1
    hf_tot_df = pd.DataFrame(hf_tot, columns = setbacks)
    hf_tot_local = np.reshape(hf_tot_local, (num_sites*100,len(setbacks)))
    hf_tot_local_df = pd.DataFrame(hf_tot_local, columns = setbacks)
    hf_all_out = np.reshape(hf_all, (100*100, 230))
    np.savetxt(data_dir+'surface_highflow_by_realization_'+str(flow_percentile)+'.tsv', hf_all_out, delimiter = '\t')
    
    # save counted high flow cells to a csv
    hf_tot_df.to_csv(data_dir+'surface_highflow_by_distance_regional_'+str(flow_percentile)+'.csv', index=False)
    hf_tot_local_df.to_csv(data_dir+'surface_highflow_by_distance_local_'+str(flow_percentile)+'.csv', index=False)
    # save grouping analysis and area statistics
    cell_stats_all.to_csv(data_dir+'surface_highflow_cells_statistics_regional'+str(flow_percentile)+'.csv', index=False)
    cell_stats_all_local.to_csv(data_dir+'surface_highflow_cells_statistics_local'+str(flow_percentile)+'.csv', index=False)

    toc = time.time()
    print('Total time was', (toc-tic)/60, 'minutes')

In [None]:
model_ws 

In [None]:

high_flow_count(95, str_setbacks, local_str_setbacks)

In [None]:
# Check impact of lower hydraulic gradient 0.01 instead of 0.1 on flow result

In [None]:
percentile = 95
label='regional'
hf_tot_1 = pd.read_csv(data_dir+'surface_highflow_by_distance_'+label+'_'+str(percentile)+'.csv')

flow_percentile=95
hf_all_in =  np.loadtxt(data_dir+'surface_highflow_by_realization_'+str(flow_percentile)+'.tsv',delimiter = '\t')
hf_all1 = np.reshape(hf_all_in, (100, 100, 230))[0,:]

In [None]:

tic = time.time()
# will count total number of cells for each setback distance and for all 100 realizations
hf_tot = np.zeros((len(setbacks)))
# layer for each local setback
num_sites = len(np.unique(local_str_setbacks))-1
hf_all = np.zeros((100, 230)) # map high flow for each realization

# dataframe for grouping and area analysis
stat_cols = ['Num_Grps','Mean','Median','Min','Max','Variance','Realization', 'Setback']
cell_stats_all = pd.DataFrame(np.zeros((len(str_setbacks),len(stat_cols))), columns=stat_cols)

k=0 # counter 
kl = 0 # local counter
for r in [0]:
    print('Realization', r, ' time since start ',(time.time()-tic)/60)
    folder = '/realization'+ str(r).zfill(3)+'/'
    run_ws = model_ws+ '_VHG_0.01'+'/'

    q_lay = highflow_at_groundsurface(run_ws, flow_percentile)
    hf_all[:] = np.copy(q_lay)
    # complete analysis for regional and local setbacks
    for n in np.arange(0,len(setbacks)):
        # overlay high flow cells with setback distance
        q_lay_setback = overlay_hf_setback(q_lay, str_setbacks[n,:,:])
        # calculate total cells in each setback
        hf_tot[n] = q_lay_setback.sum()
        # calculate high flow groups and summary statistics
        cell_stats_all.iloc[k] = calc_area_stats(r,n, 0, q_lay_setback, stat_cols)
        # iterate over local setbacks
        for l in np.unique(local_str_setbacks)[1:].astype(int):
            arr = np.zeros(local_str_setbacks[n,:,:].shape)
            arr[local_str_setbacks[n,::]==l] = 1
            kl+=1
        k +=1
# hf_tot_df = pd.DataFrame(hf_tot, columns = setbacks)

toc = time.time()
print('Total time was', (toc-tic)/60, 'minutes')

In [None]:
# no difference between cumulative high flow cells
hf_tot-hf_tot_1.iloc[0]

In [None]:
# no difference between ground surface array
plt.imshow(hf_all-hf_all1)