In [2]:
#### import modules
import numpy as np
import arcpy
import os
import sys
from arcpy.sa import *
import pandas as pd
#import gdal
from arcpy import env
import shutil
import numpy.ma as ma
import netCDF4 as nc
import subprocess
import re
%matplotlib notebook
# this is a list of additional functions to load up, as to not clutter the script
%run ../../Std_input/COMMON/plot_and_table_functions

# set properties
arcpy.env.overwriteOutput = True # make sure overwrite files is on
# projection definition 
sr_project = arcpy.SpatialReference(32702)   # Project dataset into WGS84
cel_size = 100     # in m 
Control_File_Name = 'Tutuila_cal_controlFile.ctl'


#### General coverages and paths. More, basic model setup.
GIS_FOLDER = os.path.join('..', '..', 'Raw_GIS_Data')
STD_INPUT_FOLDER = os.path.join('..', '..', 'Std_input')
# path to the grid bound
Grid_shp = os.path.join(GIS_FOLDER, 'grid_bound.shp')

if not os.path.exists(os.path.join('..', 'output//')):
    os.makedirs(os.path.join('..', 'output//'))
                        
        
input_folder = os.path.join("..", "..", "Model_workspace", "input") 

# Move in other standard input files and modify control file to the shape of the current run
# modify the control file grid for the given run   (note this uses dimensions from the rainfall adjustment grid in april)
with open(os.path.join(input_folder, 'RF_adj_grids', 'rfadj_apr.asc'), 'r') as dims_file:   # open an ASC file and get the dimenstions out of it 
    dimsfile1 = dims_file.read().splitlines(True)
    x_dim = float(re.findall('\d+', dimsfile1[0])[-1])    
    y_dim = float(re.findall('\d+', dimsfile1[1])[-1])

with open(os.path.join('.', Control_File_Name), 'r') as fin:   # open file 
    data = fin.read().splitlines(True)
with open(os.path.join('.', Control_File_Name), 'w') as fout:     # delete first line
    fout.writelines(data[1:])
new_first = 'GRID {} {} 515000. 8409000. {} '.format(x_dim, y_dim, cel_size)  # new first line 
with open(os.path.join('.', Control_File_Name), 'r+') as file:                # add in new first line and save file  
    file_data = file.read()
    file. seek(0, 0)
    file. write(new_first + '\n' + file_data)

# land use lookup file
shutil.copy2(os.path.join(STD_INPUT_FOLDER, 'Landuse_lookup_maui_mod5.txt') ,os.path.join(input_folder))    

# Simple RO : RF ratios file
shutil.copy2(os.path.join(GIS_FOLDER, 'Runofftorainfall2\\RO_Rf_ratios_real_monthly3_2000_2010.txt') ,os.path.join(input_folder)) # note this is from the simplified version with zone IDs starting at 1

# Rain Fragments file
shutil.copy2(os.path.join(STD_INPUT_FOLDER, "Fragments", 'Rainfall_fragments_2001.prn') ,os.path.join(input_folder))  

#  Fragments sequence file
shutil.copy2(os.path.join(STD_INPUT_FOLDER, "Fragments", 'Sequence_file_2002.prn') ,os.path.join(input_folder)) 

# need to run this before the 1st model run to re-fresh the direct net infiltration coverage to not include the MFR. 
arcpy.Plus_3d(os.path.join(input_folder,  'Direct_infiltration',  "A_WL_Rast.asc"), os.path.join(input_folder,  'Direct_infiltration', "OSDS_inlf_in.asc"), os.path.join(input_folder,  'Direct_infiltration', "temprast"))
arcpy.RasterToASCII_conversion(os.path.join(input_folder,  'Direct_infiltration', "temprast"), os.path.join(input_folder,  'Direct_infiltration', "Total_inlf_in.asc"))


# RUN Da MODEL (with no MFR) 
os.chdir(os.path.join("..", "run"))
# Executable and control file copies
shutil.copy2(os.path.join("." , 'swb2.exe') ,os.path.join('..', 'output')) 
shutil.copy2(os.path.join("." , Control_File_Name) ,os.path.join('..', 'output')) 

os.chdir(os.path.join("..", "output"))
subprocess.call('swb2.exe {}'.format(Control_File_Name), shell=True)
os.chdir(os.path.join("..", "run"))

### Post process da files
outspace = os.path.join('..', "output", 'post_prcessed_no_MFR')
if not os.path.exists(outspace):
    os.makedirs(outspace)
    
# Parameters
Desired_files = ['actual_et',  'direct_net_infiltation', 'direct_soil_moisture',
             'interception', 'net_infiltration', 'rainfall', 'runoff'] # 'delta_soil_storage',  'irrigation', 
XLLCORNER =      515000.000
YLLCORNER =      8409000.000
CELLSIZE  =      cel_size

# functions
def create_file_reference( component_name ):
    '''
    This is a simple convenience function that will form a path and filename to a
    given water budget component
    '''
    # specify the prefix, path to SWB2 output, timeframe, and resolution
    #output_path = os.path.join(os.getcwd(), "output")
    #prefix      = '\\'
    start_year  = '2000-01-01'
    end_year    = '2009-12-31'
    ncol        = str(int(x_dim))
    nrow        = str(int(y_dim))
    return(  component_name + '__' + start_year + '_' 
          + end_year + '__' + nrow + '_by_' + ncol + '.nc' )

# some other functions to post process stuff

def ncdump(nc_fid):
    '''ncdump outputs dimensions, variables and their attribute information of netCDF4 files'''
    nc_attrs = nc_fid.ncattrs()
    nc_dims = [dim for dim in nc_fid.dimensions]  
    nc_vars = [var for var in nc_fid.variables] 
    return nc_attrs, nc_dims, nc_vars

def writeArrayToArcGrid(arr,filename,xll,yll,cellsize,no_data_val):
    """ this takes a 2d numpy array and turns it into an .asc file """
    arr                = np.copy(arr)
    arr[np.isnan(arr)] = no_data_val
    headerstring       = bytes('NCOLS %d\nNROWS %d\nXLLCENTER %f\nYLLCENTER %f\nCELLSIZE %f\nNODATA_value %f\n' % 
        (arr.shape[1], arr.shape[0], xll, yll, cellsize, no_data_val), 'UTF-8')

    with open(filename,'wb') as fout:
        fout.write(headerstring)
        np.savetxt(fout,arr,'%5.2f')
        

# post process the whole model domain
os.chdir(os.path.join("..", 'output'))  # difficulty in making the path to the file so need to change into the output directory
var = []; tot = []; nclist = []

# Step 1: make list of files that you wish to process 
for i in Desired_files:
    Da_file = create_file_reference(i)
    nclist.append(Da_file)
    
# Step 2 average the daily dimension (len(t) is # of days in the run) to annual 
for i, f in enumerate(nclist):
    nc_data = nc.Dataset(nclist[i])
    nc_attrs, nc_dims, nc_vars = ncdump(nc_data)
    nc_var = nc_vars[3]
    t = nc_data.variables['time'][:]
    y = nc_data.variables['y'][:]
    x = nc_data.variables['x'][:]
    nt = len(t)
    nrow = len(y)
    ncol = len(x)
    rd = np.zeros((nrow, ncol))  # create 0 array of the proper shape
    for day in range(nt):
        r_temp = nc_data.variables[str(nc_var)][day, :, :]
        r_filled = np.ma.filled(r_temp, fill_value=0)    # fills in missing values with 0s (i think) 
        rd = rd+r_filled                                 # sequentially add each day's value in each cell to the empty frame  
    r = rd/nt*365 # to create a one year average from all the years in model.  if want to add leap years add 0.25 
    
    # step 3: write each yearly average array to a .asc file
    keyname = Desired_files[i] 
    writeArrayToArcGrid(r, os.path.join(outspace, "{}_annual.asc".format(keyname)), XLLCORNER, YLLCORNER, CELLSIZE, -999)
    
    # Step 4: calculate total amounts of water in cubic meters per day and create statistics dataframe
    m3pd = ((cel_size**2)*r.sum()*.0254)/365 
    print("{} total  {} [m3/d]".format(keyname, '%.1f' % m3pd))
    var.append(keyname) ; tot.append(m3pd)     # make lists to populate pandas dataframe
    
    nc_data.close()          # make sure to close the nc file so it doesnt stay open

stat_frame = pd.DataFrame({'Variable' : var, 'total_[m3pd]': tot})    #in case you want the max and min#, "Max_[in]": mx, "Min_[in]":mn})
stat_frame["total_[MGD]"] = stat_frame["total_[m3pd]"]/3785.41178       # put things in MGD if interested, 3785.41178 is number of gal in m3      
Precip = list(stat_frame[stat_frame['Variable'] == 'rainfall']['total_[m3pd]'])[0]   # define the amount of calculated Precip
Dir_net_inf = list(stat_frame[stat_frame['Variable'] == 'direct_net_infiltation']['total_[m3pd]'])[0]   # define the amount of calculated infiltration
WB_ins = Precip + Dir_net_inf
stat_frame['pct_of_pcip'] = stat_frame["total_[m3pd]"]/WB_ins
stat_frame.to_csv(os.path.join(outspace, "stats_run7_{}m_cells.csv".format(cel_size)))

# how does the model balance? 
print("WATER BALANCE ratio: outs over ins water budget balanece =  {} % ".format(stat_frame['pct_of_pcip'].sum()-1))   # check water balance

os.chdir(os.path.join("..", 'run'))  # then back out to the home directory

# calculate statistics for individual watersheds
# note, for some reason will not overwrite csvs need to clear them out or recode to make this issue not an issue
#create workspace
outspace_table = os.path.join('..', 'output', 'post_prcessed_no_MFR', "tables")
if not os.path.exists(outspace_table):
    os.makedirs(outspace_table)
sheds = (os.path.join(GIS_FOLDER, 'Watersheds\\Runoff_zones_sheds_WGS2S_clip.shp'))

# process each raster layer into a table
for i in (os.listdir(outspace)):
    if i.endswith('.asc'):
        outZSaT = ZonalStatisticsAsTable(sheds, "SHED_NAME", os.path.join(outspace, i), os.path.join(outspace_table, "temptab.dbf"))  # in arc format
        arcpy.TableToTable_conversion(outZSaT, outspace_table, "Table_{}_1.csv".format(i))                                            # take table out of stupid arc format and put into csv format 
        
# this block takes each of the csvs, reads them and calculates water volumnes (m3/d) for each watershed
templist = []
for c in (os.listdir(os.path.join(outspace, "tables"))):
    if c.endswith('.csv'):
        data = pd.read_csv(os.path.join(outspace, "tables", c))
        keyname = c.split("Table_")[1].split("_annual")[0]                   # parameter being worked on
        data[keyname] = (data['MEAN']*.0254/365) * data['AREA'] 
        temp_frame = data[["SHED_NAME", keyname]]
        templist.append(temp_frame)
        
summarry_frame1 = data[['SHED_NAME']]                                        # this is just sticking them all together into one dataframe
for i in templist:
    summarry_frame1 = summarry_frame1.merge(i, on ='SHED_NAME', how='outer')
                          

# that was in actual volumns, now to convert each component into a fraction of the rainfall...
templist2 = []
summarry_frame2 = data[['SHED_NAME']]
for i in summarry_frame1.columns[1:]:
    temp_frame = data[['SHED_NAME']] ; temp_frame[i.split("-")[0]] = summarry_frame1[i]/summarry_frame1['rainfall']
    templist2.append(temp_frame)
    
summarry_frame3 = data[['SHED_NAME']]
for i in templist2:
    summarry_frame3 = summarry_frame3.merge(i, on ='SHED_NAME', how='outer')
                          
summarry_frame_4000 = summarry_frame1.set_index('SHED_NAME')
summarry_frame_4 = summarry_frame_4000.select_dtypes(exclude=['object'])*264.172/1000000   # convert to million gallons per day
    
summarry_frame3.to_csv(os.path.join(outspace, "watershed_summary_stats_percentages.csv"))
summarry_frame1.to_csv(os.path.join(outspace, "watershed_summary_stats_volume_m3pd.csv"))
summarry_frame_4.to_csv(os.path.join(outspace, "watershed_summary_stats_volumes_MGD.csv"))


### MFR calculations      
outspace = os.path.join('..', "output", 'post_prcessed_no_MFR')
if not os.path.exists(outspace):
    os.makedirs(outspace)

# caclulate how much runoff to dump into the MFR area
outspace_table = os.path.join('..', 'output', 'MFR_calcs', "tables")
if not os.path.exists(outspace_table):
    os.makedirs(outspace_table)
    
Contributing_area_leo = (os.path.join(GIS_FOLDER, 'MFR\\Contributing_MRF_Areas_leone.shp'))
Contributing_area_taf = (os.path.join(GIS_FOLDER, 'MFR\\Contributing_MRF_Areas_tafuna.shp'))

outZSaT_leo = ZonalStatisticsAsTable(Contributing_area_leo, "SHED_NAME", os.path.join(outspace, "runoff_annual.asc"), os.path.join(outspace_table, "temptab_leo.dbf"))  # in arc format
arcpy.TableToTable_conversion(outZSaT_leo, outspace_table, "runoff_MFR_leo.csv")                                            # take table out of stupid arc format and put into csv format 
outZSaT_leo = ZonalStatisticsAsTable(Contributing_area_taf, "SHED_NAME", os.path.join(outspace, "runoff_annual.asc"), os.path.join(outspace_table, "temptab_taf.dbf"))  # in arc format
arcpy.TableToTable_conversion(outZSaT_leo, outspace_table, "runoff_MFR_taf.csv") 

data_leo = pd.read_csv(os.path.join(outspace_table, "runoff_MFR_leo.csv"))
data_taf = pd.read_csv(os.path.join(outspace_table, "runoff_MFR_taf.csv"))

data_leo["AreaRunoff_m3pd"] = (data_leo['MEAN']*.0254/365) * data_leo['AREA']    # this is how much runoff is in each MFR contributionzone
data_taf["AreaRunoff_m3pd"] = (data_taf['MEAN']*.0254/365) * data_taf['AREA']    # this is how much runoff is in each MFR contributionzone
tot_MFR_leo = sum(data_leo['AreaRunoff_m3pd'])
tot_MFR_taf = sum(data_taf['AreaRunoff_m3pd'])

# calculate the MFR area and prepare input files
workspace = os.path.join(input_folder,  'MFR')
if not os.path.exists(workspace):
    os.makedirs(workspace)

arcpy.Project_management(os.path.join(GIS_FOLDER, 'MFR\\MFR_infiltration_area_leone.shp'),  os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'), sr_project) 
arcpy.AddField_management(os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'), "MFR_inch", "DOUBLE")    # add Active cell unit field
arcpy.AddGeometryAttributes_management(os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'), "AREA")
Total_MFR_area_leo = 0                                                                                                        # stupid block just to calculate the total area
with arcpy.da.SearchCursor(os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'), "POLY_AREA") as cursor:
    for row in cursor:
        Total_MFR_area_leo = Total_MFR_area_leo + row[0]

arcpy.Project_management(os.path.join(GIS_FOLDER, 'MFR\\MFR_infiltration_area_tafuna.shp'),  os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'), sr_project) 
arcpy.AddField_management(os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'), "MFR_inch", "DOUBLE")    # add Active cell unit field
arcpy.AddGeometryAttributes_management(os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'), "AREA")
Total_MFR_area_taf = 0                                                                                                        # stupid block just to calculate the total area
with arcpy.da.SearchCursor(os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'), "POLY_AREA") as cursor:
    for row in cursor:
        Total_MFR_area_taf = Total_MFR_area_taf + row[0]
        
Inches_of_MFR_across_leo = (tot_MFR_leo/Total_MFR_area_leo/0.0254) * 0.75   # note this 75% number if directly from Izuka 2007
Inches_of_MFR_across_taf = (tot_MFR_taf/Total_MFR_area_taf/0.0254) * 0.75   # note this 75% number if directly from Izuka 2007

arcpy.CalculateField_management(os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'), "MFR_inch", "!MFR_inch! + {}".format(Inches_of_MFR_across_leo), "PYTHON3") # calculate the appropriate amount of infitration in inches spread over all MFR zone
arcpy.CalculateField_management(os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'), "MFR_inch", "!MFR_inch! + {}".format(Inches_of_MFR_across_taf), "PYTHON3") # calculate the appropriate amount of infitration in inches spread over all MFR zone

arcpy.Erase_analysis(Grid_shp, os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'),  os.path.join(workspace, 'MFR_infiltration_area_leone_bound.shp'))
arcpy.Erase_analysis(Grid_shp, os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'),  os.path.join(workspace, 'MFR_infiltration_area_tafuna_bound.shp'))

arcpy.Merge_management([os.path.join(workspace, 'MFR_infiltration_area_leone_bound.shp'), os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp')], os.path.join(workspace, 'MFR_infiltration_area_leone_ready.shp'))
arcpy.Merge_management([os.path.join(workspace, 'MFR_infiltration_area_tafuna_bound.shp'), os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp')], os.path.join(workspace, 'MFR_infiltration_area_tafuna_ready.shp'))

arcpy.PolygonToRaster_conversion(os.path.join(workspace, 'MFR_infiltration_area_leone_ready.shp'), "MFR_inch", os.path.join(workspace, "MFR_Rast_L"), cell_assignment="MAXIMUM_AREA",  cellsize=cel_size)
arcpy.PolygonToRaster_conversion(os.path.join(workspace, 'MFR_infiltration_area_tafuna_ready.shp'), "MFR_inch", os.path.join(workspace, "MFR_Rast_T"), cell_assignment="MAXIMUM_AREA",  cellsize=cel_size)

arcpy.RasterToASCII_conversion(os.path.join(workspace, "MFR_Rast_L"), os.path.join(workspace, "MFR_Rast_L.asc"))
arcpy.RasterToASCII_conversion(os.path.join(workspace, "MFR_Rast_T"), os.path.join(workspace, "MFR_Rast_T.asc"))

arcpy.Delete_management(os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'))
arcpy.Delete_management(os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'))
arcpy.Delete_management(os.path.join(workspace, 'MFR_infiltration_area_leone_bound.shp'))
arcpy.Delete_management(os.path.join(workspace, 'MFR_infiltration_area_tafuna_bound.shp'))
arcpy.Delete_management(os.path.join(workspace, 'MFR_infiltration_area_leone_ready.shp'))
arcpy.Delete_management(os.path.join(workspace, 'MFR_infiltration_area_tafuna_ready.shp'))
arcpy.Delete_management(os.path.join(workspace, 'MFR_Rast_L'))
arcpy.Delete_management(os.path.join(workspace, 'MFR_Rast_T'))

# now combine the MFR raster into the other direct infiltration rasters
arcpy.Plus_3d(os.path.join(input_folder,  'MFR', "MFR_Rast_L.asc"), os.path.join(input_folder,  'Direct_infiltration', "WLOSDrast"), os.path.join(input_folder,  'Direct_infiltration', "temprast2"))
arcpy.Plus_3d(os.path.join(input_folder,  'MFR', "MFR_Rast_T.asc"), os.path.join(input_folder,  'Direct_infiltration', "temprast2"), os.path.join(input_folder,  'Direct_infiltration', "temprast3"))
arcpy.RasterToASCII_conversion(os.path.join(input_folder,  'Direct_infiltration', "temprast3"), os.path.join(input_folder,  'Direct_infiltration', "Total_inlf_in.asc"))

print('MFR leo in MGD is {}'.format(tot_MFR_leo*264.172/1000000))
print('MFR taf in MGD is {}'.format(tot_MFR_taf*264.172/1000000))
print('MFR total in MGD is {}'.format((tot_MFR_leo+tot_MFR_taf)*264.172/1000000))

# Run da Model again, this time including the MFR
# Executable and control file copies
shutil.copy2(os.path.join("." , 'swb2.exe') ,os.path.join('..', 'output')) 
shutil.copy2(os.path.join("." , Control_File_Name) ,os.path.join('..', 'output')) 

os.chdir(os.path.join("..", "output"))
subprocess.call('swb2.exe {}'.format(Control_File_Name), shell=True)
os.chdir(os.path.join("..", "run"))

# Post process the files again, this time with the MFR added 
outspace = os.path.join('..', "output", 'post_prcessed_with_MFR')
if not os.path.exists(outspace):
    os.makedirs(outspace)

# post process the whole model domain
os.chdir(os.path.join("..", 'output'))  # difficulty in making the path to the file so need to change into the output directory
var = []; tot = []; nclist = []

# Step 1: make list of files that you wish to process 
for i in Desired_files:
    Da_file = create_file_reference(i)
    nclist.append(Da_file)
    
# Step 2 average the daily dimension (len(t) is # of days in the run) to annual 
for i, f in enumerate(nclist):
    nc_data = nc.Dataset(nclist[i])
    nc_attrs, nc_dims, nc_vars = ncdump(nc_data)
    nc_var = nc_vars[3]
    t = nc_data.variables['time'][:]
    y = nc_data.variables['y'][:]
    x = nc_data.variables['x'][:]
    nt = len(t)
    nrow = len(y)
    ncol = len(x)
    rd = np.zeros((nrow, ncol))  # create 0 array of the proper shape
    for day in range(nt):
        r_temp = nc_data.variables[str(nc_var)][day, :, :]
        r_filled = np.ma.filled(r_temp, fill_value=0)    # fills in missing values with 0s (i think) 
        rd = rd+r_filled                                 # sequentially add each day's value in each cell to the empty frame  
    r = rd/nt*365 # to create a one year average from all the years in model.  if want to add leap years add 0.25 
    
    # step 3: write each yearly average array to a .asc file
    keyname = Desired_files[i] 
    writeArrayToArcGrid(r, os.path.join(outspace, "{}_annual.asc".format(keyname)), XLLCORNER, YLLCORNER, CELLSIZE, -999)
    
    # Step 4: calculate total amounts of water in cubic meters per day and create statistics dataframe
    m3pd = ((cel_size**2)*r.sum()*.0254)/365 
    print("{} total  {} [m3/d]".format(keyname, '%.1f' % m3pd))
    var.append(keyname) ; tot.append(m3pd)     # make lists to populate pandas dataframe
    
    nc_data.close()          # make sure to close the nc file so it doesnt stay open

stat_frame = pd.DataFrame({'Variable' : var, 'total_[m3pd]': tot})    #in case you want the max and min#, "Max_[in]": mx, "Min_[in]":mn})
stat_frame["total_[MGD]"] = stat_frame["total_[m3pd]"]/3785.41178       # put things in MGD if interested, 3785.41178 is number of gal in m3      
Precip = list(stat_frame[stat_frame['Variable'] == 'rainfall']['total_[m3pd]'])[0]   # define the amount of calculated Precip
Dir_net_inf = list(stat_frame[stat_frame['Variable'] == 'direct_net_infiltation']['total_[m3pd]'])[0]   # define the amount of calculated infiltration
WB_ins = Precip + Dir_net_inf
stat_frame['pct_of_pcip'] = stat_frame["total_[m3pd]"]/WB_ins
stat_frame.to_csv(os.path.join(outspace, "stats_run7_{}m_cells.csv".format(cel_size)))

# how does the model balance? 
print("WATER BALANCE ratio: outs over ins water budget balanece =  {} % ".format(stat_frame['pct_of_pcip'].sum()-1))   # check water balance

os.chdir(os.path.join("..", 'run'))  # then back out to the home directory

# calculate statistics for individual watersheds
# note, for some reason will not overwrite csvs need to clear them out or recode to make this issue not an issue
#create workspace
outspace_table = os.path.join('..', 'output', 'post_prcessed_with_MFR', "tables")
if not os.path.exists(outspace_table):
    os.makedirs(outspace_table)
sheds = (os.path.join(GIS_FOLDER, 'Watersheds\\Runoff_zones_sheds_WGS2S_clip.shp'))

# process each raster layer into a table
for i in (os.listdir(outspace)):
    if i.endswith('.asc'):
        outZSaT = ZonalStatisticsAsTable(sheds, "SHED_NAME", os.path.join(outspace, i), os.path.join(outspace_table, "temptab.dbf"))  # in arc format
        arcpy.TableToTable_conversion(outZSaT, outspace_table, "Table_{}_1.csv".format(i))                                            # take table out of stupid arc format and put into csv format 
        
# this block takes each of the csvs, reads them and calculates water volumnes (m3/d) for each watershed
templist = []
for c in (os.listdir(os.path.join(outspace, "tables"))):
    if c.endswith('.csv'):
        data = pd.read_csv(os.path.join(outspace, "tables", c))
        keyname = c.split("Table_")[1].split("_annual")[0]                   # parameter being worked on
        data[keyname] = (data['MEAN']*.0254/365) * data['AREA'] 
        temp_frame = data[["SHED_NAME", keyname]]
        templist.append(temp_frame)
        
summarry_frame1 = data[['SHED_NAME']]                                        # this is just sticking them all together into one dataframe
for i in templist:
    summarry_frame1 = summarry_frame1.merge(i, on ='SHED_NAME', how='outer')
                          

# that was in actual volumns, now to convert each component into a fraction of the rainfall...
templist2 = []
summarry_frame2 = data[['SHED_NAME']]
for i in summarry_frame1.columns[1:]:
    temp_frame = data[['SHED_NAME']] ; temp_frame[i.split("-")[0]] = summarry_frame1[i]/summarry_frame1['rainfall']
    templist2.append(temp_frame)
    
summarry_frame3 = data[['SHED_NAME']]
for i in templist2:
    summarry_frame3 = summarry_frame3.merge(i, on ='SHED_NAME', how='outer')
                          
summarry_frame_4000 = summarry_frame1.set_index('SHED_NAME')
summarry_frame_4 = summarry_frame_4000.select_dtypes(exclude=['object'])*264.172/1000000   # convert to million gallons per day
    
summarry_frame3.to_csv(os.path.join(outspace, "watershed_summary_stats_percentages.csv"))
summarry_frame1.to_csv(os.path.join(outspace, "watershed_summary_stats_volume_m3pd.csv"))
summarry_frame_4.to_csv(os.path.join(outspace, "watershed_summary_stats_volumes_MGD.csv"))

actual_et total  221019.0 [m3/d]
direct_net_infiltation total  36247.6 [m3/d]
direct_soil_moisture total  1944.4 [m3/d]
interception total  127397.1 [m3/d]
net_infiltration total  909699.3 [m3/d]
rainfall total  1486348.1 [m3/d]
runoff total  277173.3 [m3/d]
WATER BALANCE ratio: outs over ins water budget balanece =  1.0096134155207488 % 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


MFR leo in MGD is 1.2435263472140021
MFR taf in MGD is 5.23504963553931
MFR total in MGD is 6.478575982753312
actual_et total  221019.0 [m3/d]
direct_net_infiltation total  54222.5 [m3/d]
direct_soil_moisture total  1944.4 [m3/d]
interception total  127397.1 [m3/d]
net_infiltration total  927674.3 [m3/d]
rainfall total  1486348.1 [m3/d]
runoff total  277173.3 [m3/d]
WATER BALANCE ratio: outs over ins water budget balanece =  1.0095012487368313 % 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [13]:


def run_model(name):
    print(name)

    #### import modules
    import numpy as np
    import arcpy
    import os
    import sys
   # from arcpy.sa import *
    import pandas as pd
    #import gdal
    from arcpy import env
    import shutil
    import numpy.ma as ma
    import netCDF4 as nc
    import subprocess
    import re
    # this is a list of additional functions to load up, as to not clutter the script
   # %run ../../Std_input/COMMON/plot_and_table_functions

    # set properties
    arcpy.env.overwriteOutput = True # make sure overwrite files is on
    # projection definition 
    sr_project = arcpy.SpatialReference(32702)   # Project dataset into WGS84
    cel_size = 100     # in m 
    Control_File_Name = 'Tutuila_cal_controlFile.ctl'


    #### General coverages and paths. More, basic model setup.
    GIS_FOLDER = os.path.join('..', '..', 'Raw_GIS_Data')
    STD_INPUT_FOLDER = os.path.join('..', '..', 'Std_input')
    # path to the grid bound
    Grid_shp = os.path.join(GIS_FOLDER, 'grid_bound.shp')

    if not os.path.exists(os.path.join('..', 'output//')):
        os.makedirs(os.path.join('..', 'output//'))


    input_folder = os.path.join("..", "..", "Model_workspace", "input") 

    # Move in other standard input files and modify control file to the shape of the current run
    # modify the control file grid for the given run   (note this uses dimensions from the rainfall adjustment grid in april)
    with open(os.path.join(input_folder, 'RF_adj_grids', 'rfadj_apr.asc'), 'r') as dims_file:   # open an ASC file and get the dimenstions out of it 
        dimsfile1 = dims_file.read().splitlines(True)
        x_dim = float(re.findall('\d+', dimsfile1[0])[-1])    
        y_dim = float(re.findall('\d+', dimsfile1[1])[-1])

    with open(os.path.join('.', Control_File_Name), 'r') as fin:   # open file 
        data = fin.read().splitlines(True)
    with open(os.path.join('.', Control_File_Name), 'w') as fout:     # delete first line
        fout.writelines(data[1:])
    new_first = 'GRID {} {} 515000. 8409000. {} '.format(x_dim, y_dim, cel_size)  # new first line 
    with open(os.path.join('.', Control_File_Name), 'r+') as file:                # add in new first line and save file  
        file_data = file.read()
        file. seek(0, 0)
        file. write(new_first + '\n' + file_data)

    # land use lookup file
    shutil.copy2(os.path.join(STD_INPUT_FOLDER, 'Landuse_lookup_maui_mod5.txt') ,os.path.join(input_folder))    

    # Simple RO : RF ratios file
    shutil.copy2(os.path.join(GIS_FOLDER, 'Runofftorainfall2\\RO_Rf_ratios_real_monthly3_2000_2010.txt') ,os.path.join(input_folder)) # note this is from the simplified version with zone IDs starting at 1

    # Rain Fragments file
    shutil.copy2(os.path.join(STD_INPUT_FOLDER, "Fragments", 'Rainfall_fragments_2001.prn') ,os.path.join(input_folder))  

    #  Fragments sequence file
    shutil.copy2(os.path.join(STD_INPUT_FOLDER, "Fragments", 'Sequence_file_2002.prn') ,os.path.join(input_folder)) 

    # need to run this before the 1st model run to re-fresh the direct net infiltration coverage to not include the MFR. 
    arcpy.Plus_3d(os.path.join(input_folder,  'Direct_infiltration',  "A_WL_Rast.asc"), os.path.join(input_folder,  'Direct_infiltration', "OSDS_inlf_in.asc"), os.path.join(input_folder,  'Direct_infiltration', "temprast"))
    arcpy.RasterToASCII_conversion(os.path.join(input_folder,  'Direct_infiltration', "temprast"), os.path.join(input_folder,  'Direct_infiltration', "Total_inlf_in.asc"))


    # RUN Da MODEL (with no MFR) 
    os.chdir(os.path.join("..", "run"))
    # Executable and control file copies
    shutil.copy2(os.path.join("." , 'swb2.exe') ,os.path.join('..', 'output')) 
    shutil.copy2(os.path.join("." , Control_File_Name) ,os.path.join('..', 'output')) 

    os.chdir(os.path.join("..", "output"))
    subprocess.call('swb2.exe {}'.format(Control_File_Name), shell=True)
    os.chdir(os.path.join("..", "run"))

    ### Post process da files
    outspace = os.path.join('..', "output", 'post_prcessed_no_MFR')
    if not os.path.exists(outspace):
        os.makedirs(outspace)

    # Parameters
    Desired_files = ['actual_et',  'direct_net_infiltation', 'direct_soil_moisture',
                 'interception', 'net_infiltration', 'rainfall', 'runoff'] # 'delta_soil_storage',  'irrigation', 
    XLLCORNER =      515000.000
    YLLCORNER =      8409000.000
    CELLSIZE  =      cel_size

    # functions
    def create_file_reference( component_name ):
        '''
        This is a simple convenience function that will form a path and filename to a
        given water budget component
        '''
        # specify the prefix, path to SWB2 output, timeframe, and resolution
        #output_path = os.path.join(os.getcwd(), "output")
        #prefix      = '\\'
        start_year  = '2000-01-01'
        end_year    = '2009-12-31'
        ncol        = str(int(x_dim))
        nrow        = str(int(y_dim))
        return(  component_name + '__' + start_year + '_' 
              + end_year + '__' + nrow + '_by_' + ncol + '.nc' )

    # some other functions to post process stuff

    def ncdump(nc_fid):
        '''ncdump outputs dimensions, variables and their attribute information of netCDF4 files'''
        nc_attrs = nc_fid.ncattrs()
        nc_dims = [dim for dim in nc_fid.dimensions]  
        nc_vars = [var for var in nc_fid.variables] 
        return nc_attrs, nc_dims, nc_vars

    def writeArrayToArcGrid(arr,filename,xll,yll,cellsize,no_data_val):
        """ this takes a 2d numpy array and turns it into an .asc file """
        arr                = np.copy(arr)
        arr[np.isnan(arr)] = no_data_val
        headerstring       = bytes('NCOLS %d\nNROWS %d\nXLLCENTER %f\nYLLCENTER %f\nCELLSIZE %f\nNODATA_value %f\n' % 
            (arr.shape[1], arr.shape[0], xll, yll, cellsize, no_data_val), 'UTF-8')

        with open(filename,'wb') as fout:
            fout.write(headerstring)
            np.savetxt(fout,arr,'%5.2f')


    # post process the whole model domain
    os.chdir(os.path.join("..", 'output'))  # difficulty in making the path to the file so need to change into the output directory
    var = []; tot = []; nclist = []

    # Step 1: make list of files that you wish to process 
    for i in Desired_files:
        Da_file = create_file_reference(i)
        nclist.append(Da_file)

    # Step 2 average the daily dimension (len(t) is # of days in the run) to annual 
    for i, f in enumerate(nclist):
        nc_data = nc.Dataset(nclist[i])
        nc_attrs, nc_dims, nc_vars = ncdump(nc_data)
        nc_var = nc_vars[3]
        t = nc_data.variables['time'][:]
        y = nc_data.variables['y'][:]
        x = nc_data.variables['x'][:]
        nt = len(t)
        nrow = len(y)
        ncol = len(x)
        rd = np.zeros((nrow, ncol))  # create 0 array of the proper shape
        for day in range(nt):
            r_temp = nc_data.variables[str(nc_var)][day, :, :]
            r_filled = np.ma.filled(r_temp, fill_value=0)    # fills in missing values with 0s (i think) 
            rd = rd+r_filled                                 # sequentially add each day's value in each cell to the empty frame  
        r = rd/nt*365 # to create a one year average from all the years in model.  if want to add leap years add 0.25 

        # step 3: write each yearly average array to a .asc file
        keyname = Desired_files[i] 
        writeArrayToArcGrid(r, os.path.join(outspace, "{}_annual.asc".format(keyname)), XLLCORNER, YLLCORNER, CELLSIZE, -999)

        # Step 4: calculate total amounts of water in cubic meters per day and create statistics dataframe
        m3pd = ((cel_size**2)*r.sum()*.0254)/365 
        print("{} total  {} [m3/d]".format(keyname, '%.1f' % m3pd))
        var.append(keyname) ; tot.append(m3pd)     # make lists to populate pandas dataframe

        nc_data.close()          # make sure to close the nc file so it doesnt stay open

    stat_frame = pd.DataFrame({'Variable' : var, 'total_[m3pd]': tot})    #in case you want the max and min#, "Max_[in]": mx, "Min_[in]":mn})
    stat_frame["total_[MGD]"] = stat_frame["total_[m3pd]"]/3785.41178       # put things in MGD if interested, 3785.41178 is number of gal in m3      
    Precip = list(stat_frame[stat_frame['Variable'] == 'rainfall']['total_[m3pd]'])[0]   # define the amount of calculated Precip
    Dir_net_inf = list(stat_frame[stat_frame['Variable'] == 'direct_net_infiltation']['total_[m3pd]'])[0]   # define the amount of calculated infiltration
    WB_ins = Precip + Dir_net_inf
    stat_frame['pct_of_pcip'] = stat_frame["total_[m3pd]"]/WB_ins
    stat_frame.to_csv(os.path.join(outspace, "stats_run7_{}m_cells.csv".format(cel_size)))

    # how does the model balance? 
    print("WATER BALANCE ratio: outs over ins water budget balanece =  {} % ".format(stat_frame['pct_of_pcip'].sum()-1))   # check water balance

    os.chdir(os.path.join("..", 'run'))  # then back out to the home directory

    # calculate statistics for individual watersheds
    # note, for some reason will not overwrite csvs need to clear them out or recode to make this issue not an issue
    #create workspace
    outspace_table = os.path.join('..', 'output', 'post_prcessed_no_MFR', "tables")
    if not os.path.exists(outspace_table):
        os.makedirs(outspace_table)
    sheds = (os.path.join(GIS_FOLDER, 'Watersheds\\Runoff_zones_sheds_WGS2S_clip.shp'))

    # process each raster layer into a table
    for i in (os.listdir(outspace)):
        if i.endswith('.asc'):
            outZSaT = ZonalStatisticsAsTable(sheds, "SHED_NAME", os.path.join(outspace, i), os.path.join(outspace_table, "temptab.dbf"))  # in arc format
            arcpy.TableToTable_conversion(outZSaT, outspace_table, "Table_{}_1.csv".format(i))                                            # take table out of stupid arc format and put into csv format 

    # this block takes each of the csvs, reads them and calculates water volumnes (m3/d) for each watershed
    templist = []
    for c in (os.listdir(os.path.join(outspace, "tables"))):
        if c.endswith('.csv'):
            data = pd.read_csv(os.path.join(outspace, "tables", c))
            keyname = c.split("Table_")[1].split("_annual")[0]                   # parameter being worked on
            data[keyname] = (data['MEAN']*.0254/365) * data['AREA'] 
            temp_frame = data[["SHED_NAME", keyname]]
            templist.append(temp_frame)

    summarry_frame1 = data[['SHED_NAME']]                                        # this is just sticking them all together into one dataframe
    for i in templist:
        summarry_frame1 = summarry_frame1.merge(i, on ='SHED_NAME', how='outer')


    # that was in actual volumns, now to convert each component into a fraction of the rainfall...
    templist2 = []
    summarry_frame2 = data[['SHED_NAME']]
    for i in summarry_frame1.columns[1:]:
        temp_frame = data[['SHED_NAME']] ; temp_frame[i.split("-")[0]] = summarry_frame1[i]/summarry_frame1['rainfall']
        templist2.append(temp_frame)

    summarry_frame3 = data[['SHED_NAME']]
    for i in templist2:
        summarry_frame3 = summarry_frame3.merge(i, on ='SHED_NAME', how='outer')

    summarry_frame_4000 = summarry_frame1.set_index('SHED_NAME')
    summarry_frame_4 = summarry_frame_4000.select_dtypes(exclude=['object'])*264.172/1000000   # convert to million gallons per day

    summarry_frame3.to_csv(os.path.join(outspace, "watershed_summary_stats_percentages.csv"))
    summarry_frame1.to_csv(os.path.join(outspace, "watershed_summary_stats_volume_m3pd.csv"))
    summarry_frame_4.to_csv(os.path.join(outspace, "watershed_summary_stats_volumes_MGD.csv"))


    ### MFR calculations      
    outspace = os.path.join('..', "output", 'post_prcessed_no_MFR')
    if not os.path.exists(outspace):
        os.makedirs(outspace)

    # caclulate how much runoff to dump into the MFR area
    outspace_table = os.path.join('..', 'output', 'MFR_calcs', "tables")
    if not os.path.exists(outspace_table):
        os.makedirs(outspace_table)

    Contributing_area_leo = (os.path.join(GIS_FOLDER, 'MFR\\Contributing_MRF_Areas_leone.shp'))
    Contributing_area_taf = (os.path.join(GIS_FOLDER, 'MFR\\Contributing_MRF_Areas_tafuna.shp'))

    outZSaT_leo = ZonalStatisticsAsTable(Contributing_area_leo, "SHED_NAME", os.path.join(outspace, "runoff_annual.asc"), os.path.join(outspace_table, "temptab_leo.dbf"))  # in arc format
    arcpy.TableToTable_conversion(outZSaT_leo, outspace_table, "runoff_MFR_leo.csv")                                            # take table out of stupid arc format and put into csv format 
    outZSaT_leo = ZonalStatisticsAsTable(Contributing_area_taf, "SHED_NAME", os.path.join(outspace, "runoff_annual.asc"), os.path.join(outspace_table, "temptab_taf.dbf"))  # in arc format
    arcpy.TableToTable_conversion(outZSaT_leo, outspace_table, "runoff_MFR_taf.csv") 

    data_leo = pd.read_csv(os.path.join(outspace_table, "runoff_MFR_leo.csv"))
    data_taf = pd.read_csv(os.path.join(outspace_table, "runoff_MFR_taf.csv"))

    data_leo["AreaRunoff_m3pd"] = (data_leo['MEAN']*.0254/365) * data_leo['AREA']    # this is how much runoff is in each MFR contributionzone
    data_taf["AreaRunoff_m3pd"] = (data_taf['MEAN']*.0254/365) * data_taf['AREA']    # this is how much runoff is in each MFR contributionzone
    tot_MFR_leo = sum(data_leo['AreaRunoff_m3pd'])
    tot_MFR_taf = sum(data_taf['AreaRunoff_m3pd'])

    # calculate the MFR area and prepare input files
    workspace = os.path.join(input_folder,  'MFR')
    if not os.path.exists(workspace):
        os.makedirs(workspace)

    arcpy.Project_management(os.path.join(GIS_FOLDER, 'MFR\\MFR_infiltration_area_leone.shp'),  os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'), sr_project) 
    arcpy.AddField_management(os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'), "MFR_inch", "DOUBLE")    # add Active cell unit field
    arcpy.AddGeometryAttributes_management(os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'), "AREA")
    Total_MFR_area_leo = 0                                                                                                        # stupid block just to calculate the total area
    with arcpy.da.SearchCursor(os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'), "POLY_AREA") as cursor:
        for row in cursor:
            Total_MFR_area_leo = Total_MFR_area_leo + row[0]

    arcpy.Project_management(os.path.join(GIS_FOLDER, 'MFR\\MFR_infiltration_area_tafuna.shp'),  os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'), sr_project) 
    arcpy.AddField_management(os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'), "MFR_inch", "DOUBLE")    # add Active cell unit field
    arcpy.AddGeometryAttributes_management(os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'), "AREA")
    Total_MFR_area_taf = 0                                                                                                        # stupid block just to calculate the total area
    with arcpy.da.SearchCursor(os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'), "POLY_AREA") as cursor:
        for row in cursor:
            Total_MFR_area_taf = Total_MFR_area_taf + row[0]

    Inches_of_MFR_across_leo = (tot_MFR_leo/Total_MFR_area_leo/0.0254) * 0.75   # note this 75% number if directly from Izuka 2007
    Inches_of_MFR_across_taf = (tot_MFR_taf/Total_MFR_area_taf/0.0254) * 0.75   # note this 75% number if directly from Izuka 2007

    arcpy.CalculateField_management(os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'), "MFR_inch", "!MFR_inch! + {}".format(Inches_of_MFR_across_leo), "PYTHON3") # calculate the appropriate amount of infitration in inches spread over all MFR zone
    arcpy.CalculateField_management(os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'), "MFR_inch", "!MFR_inch! + {}".format(Inches_of_MFR_across_taf), "PYTHON3") # calculate the appropriate amount of infitration in inches spread over all MFR zone

    arcpy.Erase_analysis(Grid_shp, os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'),  os.path.join(workspace, 'MFR_infiltration_area_leone_bound.shp'))
    arcpy.Erase_analysis(Grid_shp, os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'),  os.path.join(workspace, 'MFR_infiltration_area_tafuna_bound.shp'))

    arcpy.Merge_management([os.path.join(workspace, 'MFR_infiltration_area_leone_bound.shp'), os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp')], os.path.join(workspace, 'MFR_infiltration_area_leone_ready.shp'))
    arcpy.Merge_management([os.path.join(workspace, 'MFR_infiltration_area_tafuna_bound.shp'), os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp')], os.path.join(workspace, 'MFR_infiltration_area_tafuna_ready.shp'))

    arcpy.PolygonToRaster_conversion(os.path.join(workspace, 'MFR_infiltration_area_leone_ready.shp'), "MFR_inch", os.path.join(workspace, "MFR_Rast_L"), cell_assignment="MAXIMUM_AREA",  cellsize=cel_size)
    arcpy.PolygonToRaster_conversion(os.path.join(workspace, 'MFR_infiltration_area_tafuna_ready.shp'), "MFR_inch", os.path.join(workspace, "MFR_Rast_T"), cell_assignment="MAXIMUM_AREA",  cellsize=cel_size)

    arcpy.RasterToASCII_conversion(os.path.join(workspace, "MFR_Rast_L"), os.path.join(workspace, "MFR_Rast_L.asc"))
    arcpy.RasterToASCII_conversion(os.path.join(workspace, "MFR_Rast_T"), os.path.join(workspace, "MFR_Rast_T.asc"))

    arcpy.Delete_management(os.path.join(workspace, 'MFR_infiltration_area_leone_projected.shp'))
    arcpy.Delete_management(os.path.join(workspace, 'MFR_infiltration_area_tafuna_projected.shp'))
    arcpy.Delete_management(os.path.join(workspace, 'MFR_infiltration_area_leone_bound.shp'))
    arcpy.Delete_management(os.path.join(workspace, 'MFR_infiltration_area_tafuna_bound.shp'))
    arcpy.Delete_management(os.path.join(workspace, 'MFR_infiltration_area_leone_ready.shp'))
    arcpy.Delete_management(os.path.join(workspace, 'MFR_infiltration_area_tafuna_ready.shp'))
    arcpy.Delete_management(os.path.join(workspace, 'MFR_Rast_L'))
    arcpy.Delete_management(os.path.join(workspace, 'MFR_Rast_T'))

    # now combine the MFR raster into the other direct infiltration rasters
    arcpy.Plus_3d(os.path.join(input_folder,  'MFR', "MFR_Rast_L.asc"), os.path.join(input_folder,  'Direct_infiltration', "WLOSDrast"), os.path.join(input_folder,  'Direct_infiltration', "temprast2"))
    arcpy.Plus_3d(os.path.join(input_folder,  'MFR', "MFR_Rast_T.asc"), os.path.join(input_folder,  'Direct_infiltration', "temprast2"), os.path.join(input_folder,  'Direct_infiltration', "temprast3"))
    arcpy.RasterToASCII_conversion(os.path.join(input_folder,  'Direct_infiltration', "temprast3"), os.path.join(input_folder,  'Direct_infiltration', "Total_inlf_in.asc"))

    print('MFR leo in MGD is {}'.format(tot_MFR_leo*264.172/1000000))
    print('MFR taf in MGD is {}'.format(tot_MFR_taf*264.172/1000000))
    print('MFR total in MGD is {}'.format((tot_MFR_leo+tot_MFR_taf)*264.172/1000000))

    # Run da Model again, this time including the MFR
    # Executable and control file copies
    shutil.copy2(os.path.join("." , 'swb2.exe') ,os.path.join('..', 'output')) 
    shutil.copy2(os.path.join("." , Control_File_Name) ,os.path.join('..', 'output')) 

    os.chdir(os.path.join("..", "output"))
    subprocess.call('swb2.exe {}'.format(Control_File_Name), shell=True)
    os.chdir(os.path.join("..", "run"))

    # Post process the files again, this time with the MFR added 
    outspace = os.path.join('..', "output", 'post_prcessed_with_MFR')
    if not os.path.exists(outspace):
        os.makedirs(outspace)

    # post process the whole model domain
    os.chdir(os.path.join("..", 'output'))  # difficulty in making the path to the file so need to change into the output directory
    var = []; tot = []; nclist = []

    # Step 1: make list of files that you wish to process 
    for i in Desired_files:
        Da_file = create_file_reference(i)
        nclist.append(Da_file)

    # Step 2 average the daily dimension (len(t) is # of days in the run) to annual 
    for i, f in enumerate(nclist):
        nc_data = nc.Dataset(nclist[i])
        nc_attrs, nc_dims, nc_vars = ncdump(nc_data)
        nc_var = nc_vars[3]
        t = nc_data.variables['time'][:]
        y = nc_data.variables['y'][:]
        x = nc_data.variables['x'][:]
        nt = len(t)
        nrow = len(y)
        ncol = len(x)
        rd = np.zeros((nrow, ncol))  # create 0 array of the proper shape
        for day in range(nt):
            r_temp = nc_data.variables[str(nc_var)][day, :, :]
            r_filled = np.ma.filled(r_temp, fill_value=0)    # fills in missing values with 0s (i think) 
            rd = rd+r_filled                                 # sequentially add each day's value in each cell to the empty frame  
        r = rd/nt*365 # to create a one year average from all the years in model.  if want to add leap years add 0.25 

        # step 3: write each yearly average array to a .asc file
        keyname = Desired_files[i] 
        writeArrayToArcGrid(r, os.path.join(outspace, "{}_annual.asc".format(keyname)), XLLCORNER, YLLCORNER, CELLSIZE, -999)

        # Step 4: calculate total amounts of water in cubic meters per day and create statistics dataframe
        m3pd = ((cel_size**2)*r.sum()*.0254)/365 
        print("{} total  {} [m3/d]".format(keyname, '%.1f' % m3pd))
        var.append(keyname) ; tot.append(m3pd)     # make lists to populate pandas dataframe

        nc_data.close()          # make sure to close the nc file so it doesnt stay open

    stat_frame = pd.DataFrame({'Variable' : var, 'total_[m3pd]': tot})    #in case you want the max and min#, "Max_[in]": mx, "Min_[in]":mn})
    stat_frame["total_[MGD]"] = stat_frame["total_[m3pd]"]/3785.41178       # put things in MGD if interested, 3785.41178 is number of gal in m3      
    Precip = list(stat_frame[stat_frame['Variable'] == 'rainfall']['total_[m3pd]'])[0]   # define the amount of calculated Precip
    Dir_net_inf = list(stat_frame[stat_frame['Variable'] == 'direct_net_infiltation']['total_[m3pd]'])[0]   # define the amount of calculated infiltration
    WB_ins = Precip + Dir_net_inf
    stat_frame['pct_of_pcip'] = stat_frame["total_[m3pd]"]/WB_ins
    stat_frame.to_csv(os.path.join(outspace, "stats_run7_{}m_cells.csv".format(cel_size)))

    # how does the model balance? 
    print("WATER BALANCE ratio: outs over ins water budget balanece =  {} % ".format(stat_frame['pct_of_pcip'].sum()-1))   # check water balance

    os.chdir(os.path.join("..", 'run'))  # then back out to the home directory

    # calculate statistics for individual watersheds
    # note, for some reason will not overwrite csvs need to clear them out or recode to make this issue not an issue
    #create workspace
    outspace_table = os.path.join('..', 'output', 'post_prcessed_with_MFR', "tables")
    if not os.path.exists(outspace_table):
        os.makedirs(outspace_table)
    sheds = (os.path.join(GIS_FOLDER, 'Watersheds\\Runoff_zones_sheds_WGS2S_clip.shp'))

    # process each raster layer into a table
    for i in (os.listdir(outspace)):
        if i.endswith('.asc'):
            outZSaT = ZonalStatisticsAsTable(sheds, "SHED_NAME", os.path.join(outspace, i), os.path.join(outspace_table, "temptab.dbf"))  # in arc format
            arcpy.TableToTable_conversion(outZSaT, outspace_table, "Table_{}_1.csv".format(i))                                            # take table out of stupid arc format and put into csv format 

    # this block takes each of the csvs, reads them and calculates water volumnes (m3/d) for each watershed
    templist = []
    for c in (os.listdir(os.path.join(outspace, "tables"))):
        if c.endswith('.csv'):
            data = pd.read_csv(os.path.join(outspace, "tables", c))
            keyname = c.split("Table_")[1].split("_annual")[0]                   # parameter being worked on
            data[keyname] = (data['MEAN']*.0254/365) * data['AREA'] 
            temp_frame = data[["SHED_NAME", keyname]]
            templist.append(temp_frame)

    summarry_frame1 = data[['SHED_NAME']]                                        # this is just sticking them all together into one dataframe
    for i in templist:
        summarry_frame1 = summarry_frame1.merge(i, on ='SHED_NAME', how='outer')


    # that was in actual volumns, now to convert each component into a fraction of the rainfall...
    templist2 = []
    summarry_frame2 = data[['SHED_NAME']]
    for i in summarry_frame1.columns[1:]:
        temp_frame = data[['SHED_NAME']] ; temp_frame[i.split("-")[0]] = summarry_frame1[i]/summarry_frame1['rainfall']
        templist2.append(temp_frame)

    summarry_frame3 = data[['SHED_NAME']]
    for i in templist2:
        summarry_frame3 = summarry_frame3.merge(i, on ='SHED_NAME', how='outer')

    summarry_frame_4000 = summarry_frame1.set_index('SHED_NAME')
    summarry_frame_4 = summarry_frame_4000.select_dtypes(exclude=['object'])*264.172/1000000   # convert to million gallons per day

    summarry_frame3.to_csv(os.path.join(outspace, "watershed_summary_stats_percentages.csv"))
    summarry_frame1.to_csv(os.path.join(outspace, "watershed_summary_stats_volume_m3pd.csv"))
    summarry_frame_4.to_csv(os.path.join(outspace, "watershed_summary_stats_volumes_MGD.csv"))
    
    return summarry_frame1

In [14]:
run_model("poop")

poop
actual_et total  221019.0 [m3/d]
direct_net_infiltation total  36247.6 [m3/d]
direct_soil_moisture total  1944.4 [m3/d]
interception total  127397.1 [m3/d]
net_infiltration total  909699.3 [m3/d]
rainfall total  1486348.1 [m3/d]
runoff total  277173.3 [m3/d]
WATER BALANCE ratio: outs over ins water budget balanece =  1.0096134155207488 % 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  geom.getArea("PRESERVE_SHAPE"),


MFR leo in MGD is 1.2435263472140021
MFR taf in MGD is 5.23504963553931
MFR total in MGD is 6.478575982753312
actual_et total  221019.0 [m3/d]
direct_net_infiltation total  54222.5 [m3/d]
direct_soil_moisture total  1944.4 [m3/d]
interception total  127397.1 [m3/d]
net_infiltration total  927674.3 [m3/d]
rainfall total  1486348.1 [m3/d]
runoff total  277173.3 [m3/d]
WATER BALANCE ratio: outs over ins water budget balanece =  1.0095012487368313 % 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,SHED_NAME,actual_et,direct_net_infiltation,direct_soil_moisture,interception,net_infiltration,rainfall,runoff
0,Fagatuitui - Vaaogeoge,4284.047507,0.0,0.0,6655.140987,35491.663567,54583.048093,9635.006361
1,Vatia,5117.028339,518.633198,37.578082,5953.022336,34942.077169,54466.79973,10293.590099
2,Tula,2305.415338,503.017415,12.526027,1124.294465,5219.943557,8945.615566,847.371835
3,Onenoa,1194.530685,153.388161,8.350685,836.585534,3338.631672,5584.179574,436.06581
4,Afono,3839.50575,377.569253,12.526027,3122.147129,24460.506186,36551.24016,6381.252427
5,Masefau,3878.91403,795.722834,4.175342,3550.794745,22293.9349,33484.089385,5160.806813
6,Aoa,2715.225206,366.699449,8.350685,1823.358139,8390.455062,13876.06871,1480.583391
7,Masausi,1779.336118,470.769852,8.350685,1604.64674,7487.599901,11434.947808,1146.249809
8,Sailele,622.146905,181.425586,8.350685,525.327672,2340.369916,3759.262644,458.717039
9,Alao,2029.912329,482.252044,8.350685,1205.198686,5288.683622,8703.209081,637.693096


In [None]:
Results = os.path.join('..', "Workspace", 'SWB2_results')
if not os.path.exists(Results):
    os.makedirs(Results)
    
Results_table = os.path.join(Results, "tables")
if not os.path.exists(Results_table):
    os.makedirs(Results_table)
    
# process each raster layer into a table of annual statistics  
""" note this is where need to modify to process into monthly statistics """
for i in (os.listdir(SWB_asc_files)):
    if i.endswith('.asc'):
        outZSaT = ZonalStatisticsAsTable(sheds, "Uniqe_ID", os.path.join(SWB_asc_files, i), os.path.join(Results_table, "temptab.dbf"))  # in arc format
        arcpy.TableToTable_conversion(outZSaT, Results_table, "Table_{}_1.csv".format(i)) 

# this block takes the parameters of interests, reads them and calculates water volumnes (m3/d) for each watershed

templist = []
intrest_list = ['Table_netinfiltration_annual.asc_1.csv', 'Table_runoff_annual.asc_1.csv', 'Table_rainfall_annual.asc_1.csv'] 
for c in intrest_list:
    data = pd.read_csv(os.path.join(Results_table, c))
    keyname = c.split("Table_")[1].split("_annual")[0]                   # parameter being worked on
    data[keyname] = (data['MEAN']*.0254/365) * data['AREA'] 
    temp_frame = data[["Uniqe_ID", keyname]]
    templist.append(temp_frame)
        
summarry_frame1 = data[['Uniqe_ID']]                                        # this is just sticking them all together into one dataframe
for i in templist:
    summarry_frame1 = summarry_frame1.merge(i, on ='Uniqe_ID', how='outer')
    
summarry_frame1 = summarry_frame1[summarry_frame1.netinfiltration != 0]   # some of the values turned out tot be 0, this just selects rows that have non-zero values