In [1]:
import arcpy, pandas as pd
from arcpy import env
from arcpy.sa import *
from otherfunctions import folders_exist

In [None]:
# Paths to input datasets
root_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs"
tam_out_dir = r"Z:\PhD_Datasets&Analysis\Outputs\T&M_WBM"
tc_ds = root_folder + "\\TerraClimate"
out_geotiff = tc_ds + "\\GeoTIFF"
serial_id = 'grdcno_int'

# Set arcpy environment variables
env.overwriteOutput = True
arcpy.CheckOutExtension("spatial")
# env.cellSize = "MINOF" # Avoided to prevent huge files
env.cellSize = out_geotiff + "\\ppt_2023_1.tif" # Use TerraClimate resolution as reference for cell size
env.workspace = r"Z:\PhD_Datasets&Analysis\_ProcessingCache"
env.outputCoordinateSystem = arcpy.SpatialReference("WGS 1984") # WGS 1984 (4326)

In [None]:
# Get the current environment's spatial reference
spatial_ref = env.outputCoordinateSystem

# Check if a spatial reference is set
if spatial_ref:
    print(f"Spatial Reference Name: {spatial_ref.name}")
    print(f"Spatial Reference WKID: {spatial_ref.factoryCode}")
else:
    print("No spatial reference is set in the current environment.")

In [None]:
# Read the Shapefile with the processed drainage areas
drain_areas = root_folder + "\\Streamflow_Sts_Drainage_Areas\GRDC_Watersheds\CSS-WATERSHEDS_FINAL_SELECTION.shp"

# Create a feature layer object
arcpy.MakeFeatureLayer_management(drain_areas, "drain_areas_lyr")

# Initialize an empty list to store the station IDs
sts_ids = []

# Use a SearchCursor to iterate through the rows of the feature layer
with arcpy.da.SearchCursor("drain_areas_lyr", [serial_id]) as cursor:
    for row in cursor:
        sts_ids.append(row[0])

sts_ids

In [None]:
######################################################
### Starting values for the water balance model - T&M
######################################################

# Initial variables
years = range(1958, 1967 + 1) # Years to process. This line can be used to execute this code for specific years in multiple runs.
months = range(1, 12 + 1)

In [None]:
# Create folders for other variables of tam model
wyield_dir = tam_out_dir + '\\wyield'
folders_exist([wyield_dir])

In [None]:
def zonal_stastics_iteratively(year):
    """
    Function to calculate zonal statistics iteratively for each station ID.
    """

    print("\tCalculating zonal statistics of water yield......")

    sts_flows_sim = pd.DataFrame(columns=[serial_id, "YEAR", "MONTH", "COUNT", "AREA", "MIN", "MAX", "RANGE", "MEAN", "STD", "SUM", "MEDIAN", "PCT90"])  

    for st in sts_ids:

        print(f"\t\tStation ID: {st}")
        
        # Select the current station ID in the feature layer
        arcpy.SelectLayerByAttribute_management("drain_areas_lyr", "NEW_SELECTION", f"{serial_id} = {st}")

        for month in months:

            wyield = wyield_dir + "\\wyield_" + str(year) + "_" + str(month) + ".tif"
            out_table = "in_memory\\zonal_wyield_" + str(st) + "_" + str(year) + "_" + str(month)

            arcpy.sa.ZonalStatisticsAsTable("drain_areas_lyr", serial_id, wyield, out_table, "DATA", "ALL")

            # Convert the output table to a NumPy array
            array = arcpy.da.TableToNumPyArray(out_table, [serial_id, "COUNT", "AREA", "MIN", "MAX", "RANGE", "MEAN", "STD", "SUM", "MEDIAN", "PCT90"])

            # Convert the NumPy array to a pandas DataFrame
            df_sim = pd.DataFrame(array)

            df_sim["YEAR"] = year # Assign the year of simulation
            df_sim["MONTH"] = month # Assign the month of simulation
            df_sim = df_sim[[serial_id, "YEAR", "MONTH", "COUNT", "AREA", "MIN", "MAX", "RANGE", "MEAN", "STD", "SUM", "MEDIAN", "PCT90"]] # Reorder columns

            sts_flows_sim = pd.concat([sts_flows_sim, df_sim], ignore_index=True) # Concat all simulated stream flow station values

            arcpy.Delete_management(out_table) # Delete the output table to save space

    return(sts_flows_sim)

In [None]:
print('\n############################################################')
print('\t\tINITIAL VARIABLES')
print('\tPeriod to be executed: ' + str(years[0]) + '-' + str(years[-1]))
print('############################################################')

for year in years:

    print("\n**Executing calculation for " + str(year) + "**")

    # Calculate zonal statistics for the current month and year
    df_sim = zonal_stastics_iteratively(year)

    print("\tSaving zonal statistics results into CSV......")
 
    df_sim.to_csv(wyield_dir + "\\wyield_zonal_statistics_" + str(year) + ".csv", index=False) # Save the results to a CSV file every year to avoid processing losses

arcpy.CheckInExtension("spatial")

# Clear the workspace environment
arcpy.ClearEnvironment("workspace")

print("\nDONE!!")