In [1]:
import arcpy, pandas as pd
from arcpy import env
from arcpy.sa import *
from otherfunctions import folders_exist

In [2]:
# Paths to input datasets
root_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs"
tam_out_dir = r"Z:\PhD_Datasets&Analysis\Outputs\T&M_WBM"
tc_ds = root_folder + "\\TerraClimate"
out_geotiff = tc_ds + "\\GeoTIFF"
bands_gee = ["pr", "pet", "ro"] # band names in GEE - for comparison with GEE TerraClimate dataset
tc_vars = ["ppt", "pet", "q"] # variable names according to TerraClimate
serial_id = 'grdcno_int'

# Set arcpy environment variables
env.overwriteOutput = True
arcpy.CheckOutExtension("spatial")
# env.cellSize = "MINOF" # Avoided to prevent huge files
env.cellSize = out_geotiff + "\\ppt_2023_1.tif" # Use TerraClimate resolution as reference for cell size
env.workspace = r"Z:\PhD_Datasets&Analysis\_ProcessingCache"
env.outputCoordinateSystem = arcpy.SpatialReference("WGS 1984") # WGS 1984 (4326)

In [3]:
# Get the current environment's spatial reference
spatial_ref = env.outputCoordinateSystem

# Check if a spatial reference is set
if spatial_ref:
    print(f"Spatial Reference Name: {spatial_ref.name}")
    print(f"Spatial Reference WKID: {spatial_ref.factoryCode}")
else:
    print("No spatial reference is set in the current environment.")

Spatial Reference Name: GCS_WGS_1984
Spatial Reference WKID: 4326


In [16]:
# Read the Shapefile with the processed drainage areas
drain_areas = root_folder + "\\Streamflow_Sts_Drainage_Areas\GRDC_Watersheds\CSS-WATERSHEDS_FINAL_SELECTION.shp"

# Create a feature layer object
arcpy.MakeFeatureLayer_management(drain_areas, "drain_areas_lyr")

# Initialize an empty list to store the station IDs
sts_ids = []

# Use a SearchCursor to iterate through the rows of the feature layer
with arcpy.da.SearchCursor("drain_areas_lyr", [serial_id]) as cursor:
    for row in cursor:
        sts_ids.append(row[0])

sts_ids

[3617110,
 3617811,
 3617812,
 3617814,
 3618051,
 3618052,
 3618500,
 3618700,
 3618711,
 3618720,
 3618721,
 3618722,
 3618950,
 3618951,
 3621200,
 3623100,
 3625000,
 3626000,
 3627000,
 3628400,
 3628701,
 3628900,
 3629001,
 3630600,
 3631100,
 3632400,
 3633120,
 3633123,
 3633160,
 3633180,
 3633301,
 3634150,
 3634160,
 3634320,
 3634340,
 3634350,
 3634360,
 3634370,
 3635030,
 3635035,
 3635040,
 3635041,
 3635301,
 3635310,
 3635360,
 3635402,
 3635408,
 3635430,
 3635440,
 3635451,
 3635600,
 3635650,
 3635651,
 3637180,
 3637380,
 3637771,
 3637772,
 3637773,
 3637774,
 3637775,
 3637790,
 3637810,
 3637910,
 3638050,
 3638051,
 3638700,
 3649010,
 3649030,
 3649130,
 3649150,
 3649151,
 3649210,
 3649211,
 3649240,
 3649251,
 3649310,
 3649311,
 3649320,
 3649321,
 3649325,
 3649409,
 3649411,
 3649412,
 3649413,
 3649416,
 3649418,
 3649419,
 3649420,
 3649421,
 3649422,
 3649424,
 3649440,
 3649450,
 3649455,
 3649459,
 3649460,
 3649510,
 3649511,
 3649610,
 3649614,


In [17]:
######################################################
### Starting values for the water balance model - T&M
######################################################

# Initial variables
years = range(1958, 2023 + 1) # Years with available weather information to run the water balance
months = range(1, 12 + 1)

In [18]:
# Create folders for other variables of tam model
wyield_dir = tam_out_dir + '\\wyield'
folders_exist([wyield_dir])

# Folder with baseflow rasters resulting from the model
bflow_dir = tam_out_dir + '\\bflow'

In [None]:
def zonal_stastics_iteratively(wyield, year, month):
    """
    Function to calculate zonal statistics iteratively for each station ID.
    """

    print("\tCalculating zonal statistics of water yield......")

    sts_flows_sim = pd.DataFrame(columns=[serial_id, "YEAR", "MONTH", "COUNT", "AREA", "MIN", "MAX", "RANGE", "MEAN", "STD", "SUM", "MEDIAN", "PCT90"])  

    for st in sts_ids:
        
        # Select the current station ID in the feature layer
        arcpy.SelectLayerByAttribute_management("drain_areas_lyr", "NEW_SELECTION", f"{serial_id} = {st}")

        out_table = "zonal_wyield_" + str(st) + "_" + str(year) + "_" + str(month) + ".dbf"

        arcpy.sa.ZonalStatisticsAsTable("drain_areas_lyr", serial_id, wyield, out_table, "DATA", "ALL")

        # Convert the output table to a NumPy array
        array = arcpy.da.TableToNumPyArray(out_table, [serial_id, "COUNT", "AREA", "MIN", "MAX", "RANGE", "MEAN", "STD", "SUM", "MEDIAN", "PCT90"])

        # Convert the NumPy array to a pandas DataFrame
        df_sim = pd.DataFrame(array)

        df_sim["YEAR"] = year # Assign the year of simulation
        df_sim["MONTH"] = month # Assign the month of simulation
        df_sim = df_sim[[serial_id, "YEAR", "MONTH", "COUNT", "AREA", "MIN", "MAX", "RANGE", "MEAN", "STD", "SUM", "MEDIAN", "PCT90"]] # Reorder columns

        sts_flows_sim = pd.concat([sts_flows_sim, df_sim], ignore_index=True) # Concat all simulated stream flow station values

    return(sts_flows_sim)

In [None]:
print('\n############################################################')
print('\t\tINITIAL VARIABLES')
print('\tPeriod to be executed: ' + str(years[0]) + '-' + str(years[-1]))
print('############################################################')

sts_flows_sim = pd.DataFrame(columns=[serial_id, "YEAR", "MONTH", "COUNT", "AREA", "MIN", "MAX", "RANGE", "MEAN", "STD", "SUM", "MEDIAN", "PCT90"])   

years = [1958, 1959] # Uncomment this line to test the code with a smaller range of years
for year in years:

    print("\n**Executing THORNTHWAITE AND MATHER model for " + str(year) + "**")

    for month in months:

        print("\n\t*Executing water balance for month " + str(month) + "*\n")

        print("\tLoading baseflow and runoff files.....")

        ro_name = tc_vars[2] + "_" + str(year) + "_" + str(month)
        ro = Raster(out_geotiff + "\\" + ro_name + ".tif") # Read runoff raster of the month

        bflow = Float(Raster(bflow_dir + "\\bflow_" + str(year) + "_" + str(month) + ".tif")) # Read baseflow raster of the month

        print("\tCalculating water yield......")
        wyield = ro + bflow # Calculate water yield (runoff + baseflow)
        wyield.save(wyield_dir + "\\wyield_" + str(year) + "_" + str(month) + ".tif")

        # Calculate zonal statistics for the current month and year
        df_sim = zonal_stastics_iteratively(wyield, year, month)

        print("\tConverting zonal statistics results into dataframe......")
        sts_flows_sim = pd.concat([sts_flows_sim, df_sim], ignore_index=True) # Concat all simulated stream flow station values

    sts_flows_sim.to_csv(wyield_dir + "\\wyield_zonal_statistics.csv", index=False) # Save the results to a CSV file every year to avoid processing losses

arcpy.CheckInExtension("spatial")

# Clear the workspace environment
arcpy.ClearEnvironment("workspace")

print("\nDONE!!")