In [1]:
import arcpy, pandas as pd
from arcpy import env
from arcpy.sa import *
from otherfunctions import folders_exist

In [None]:
# Paths to input datasets
root_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs"
css_folder = root_folder + "\\Streamflow_Sts_Drainage_Areas\GRDC_Watersheds"
tam_out_dir = r"Z:\PhD_Datasets&Analysis\Outputs\T&M_WBM"
tc_ds = root_folder + "\\TerraClimate"
out_geotiff = tc_ds + "\\GeoTIFF"
serial_id = 'grdcno_int'
tc_vars = ["ppt", "pet", "q"] # variable names according to TerraClimate
wb_var = 'wyield2' # Change this to the variable you want to process, e.g., 'wyield' for water yield

In [11]:
# Check out spatial analyst extension
if arcpy.CheckExtension("spatial") == "Available":
    arcpy.CheckOutExtension("spatial")
else:
    print("Spatial Analyst extension not available")

# Set arcpy environment variables
env.parallelProcessingFactor = "80%"  # 100% uses all available cores
env.overwriteOutput = True
# env.cellSize = "MINOF" # Avoided to prevent huge files
env.cellSize = out_geotiff + "\\ppt_2023_1.tif" # Use TerraClimate resolution as reference for cell size
env.workspace = r"Z:\PhD_Datasets&Analysis\_ProcessingCache"
env.outputCoordinateSystem = arcpy.SpatialReference("WGS 1984") # WGS 1984 (4326)

In [13]:
# Get the current environment's spatial reference
spatial_ref = env.outputCoordinateSystem

# Check if a spatial reference is set
if spatial_ref:
    print(f"Spatial Reference Name: {spatial_ref.name}")
    print(f"Spatial Reference WKID: {spatial_ref.factoryCode}")
else:
    print("No spatial reference is set in the current environment.")

Spatial Reference Name: GCS_WGS_1984
Spatial Reference WKID: 4326


In [15]:
# Read the Shapefile with the processed drainage areas
#drain_areas = root_folder + "\\Streamflow_Sts_Drainage_Areas\GRDC_Watersheds\CSS-WATERSHEDS_FINAL_SELECTION.shp" # Drainage areas shapefile filtered only with daily k recessions
drain_areas = root_folder + "\\Streamflow_Sts_Drainage_Areas\GRDC_Watersheds\CSS-WATERSHEDS-MERGE_FINAL_SELECTION.shp" # Drainage areas shapefile filtered with the merge of monthly and daily k recessions

# Create a feature layer object
arcpy.MakeFeatureLayer_management(drain_areas, "drain_areas_lyr")

# Initialize an empty list to store the station IDs
sts_ids = []

sql_field = "has_monthl" # Field to filter rows with monthly k recessions
#sql_field = "has_daily_" # Field to filter rows with daily k recessions
sql_query = f'{sql_field} = \'Yes\''

# Use a SearchCursor to iterate through the rows of the feature layer with a SQL condition
#with arcpy.da.SearchCursor("drain_areas_lyr", [serial_id]) as cursor:
with arcpy.da.SearchCursor("drain_areas_lyr", [serial_id], sql_query) as cursor:
    for row in cursor:
        sts_ids.append(row[0])

# Display the list of station IDs and its length
print(sts_ids)
print(len(sts_ids))

[3617110, 3617811, 3617812, 3617814, 3618051, 3618052, 3618500, 3618700, 3618711, 3618720, 3618721, 3618722, 3618950, 3618951, 3621200, 3623100, 3625000, 3626000, 3627000, 3628400, 3628701, 3628900, 3629001, 3630600, 3631100, 3632400, 3633120, 3633123, 3633160, 3633180, 3633301, 3634150, 3634160, 3634320, 3634340, 3634350, 3634360, 3634370, 3635030, 3635035, 3635040, 3635041, 3635301, 3635310, 3635360, 3635402, 3635408, 3635430, 3635440, 3635451, 3635600, 3635650, 3635651, 3637180, 3637380, 3637771, 3637772, 3637773, 3637774, 3637775, 3637790, 3637810, 3637910, 3638050, 3638051, 3638700, 3649010, 3649030, 3649050, 3649110, 3649130, 3649150, 3649151, 3649160, 3649210, 3649211, 3649240, 3649251, 3649310, 3649311, 3649320, 3649321, 3649325, 3649409, 3649411, 3649412, 3649413, 3649416, 3649418, 3649419, 3649420, 3649421, 3649422, 3649423, 3649424, 3649440, 3649450, 3649455, 3649459, 3649460, 3649510, 3649511, 3649610, 3649614, 3649617, 3649619, 3649850, 3649855, 3649901, 3649902, 3649904, 

In [17]:
######################################################
### Starting values for the water balance model - T&M
######################################################

# Initial variables
years = range(1958, 2023 + 1) # Years to process. This line can be used to execute this code for specific years in multiple runs.
months = range(1, 12 + 1)

In [None]:
# Folder where the processed water balance model variable was stored
processing_dir = tam_out_dir + '\\' + wb_var

if wb_var in ["bflow2", "wyield2"]:
    # If the variable is 'bflow2' or 'wyield2', we need to remove the last character '2' from the variable name
    wb_var = wb_var[:-1]  # Remove the last character '2' from the variable name

# Folder with drainage areas in raster format resulting from the filtering process
raster_dir = css_folder + '\\Final_Rasters'

In [22]:
def zonal_stastics_iteratively(year):
    """
    Function to calculate zonal statistics iteratively for each station ID.
    """

    print("\tCalculating zonal statistics of '" + wb_var + "'......")

    sts_flows_sim = pd.DataFrame(columns=[serial_id, "YEAR", "MONTH", "COUNT", "MEAN"])  

    total_stations = len(sts_ids)  # Get the total number of stations

    for idx, st in enumerate(sts_ids, start=1):

        print(f"\t\tProcessing station {idx}/{total_stations}: Station ID {st}")
        
        for month in months:

            if wb_var not in tc_vars:
                # If the variable is not in TerraClimate variables, use the processed variable from the T&M model
                processing_var = processing_dir + "\\" + wb_var + "_" + str(year) + "_" + str(month) + ".tif"
            else:
                # If the variable is in TerraClimate variables, use the original GeoTIFF from TerraClimate
                processing_var = out_geotiff + "\\" + wb_var + "_" + str(year) + "_" + str(month) + ".tif"  

            out_table = "in_memory\\zonal_" + wb_var + "_" + str(st) + "_" + str(year) + "_" + str(month)

            arcpy.sa.ZonalStatisticsAsTable(raster_dir + f"\\{st}_DA.tif", "Value", processing_var, out_table, "DATA", "MEAN")

            # Convert the output table to a NumPy array
            array = arcpy.da.TableToNumPyArray(out_table, ["Value", "COUNT", "MEAN"])

            # Convert the NumPy array to a pandas DataFrame
            df_sim = pd.DataFrame(array)

            df_sim["YEAR"] = year # Assign the year of simulation
            df_sim["MONTH"] = month # Assign the month of simulation
            df_sim.rename(columns={"Value": serial_id}, inplace=True) # Rename the column to the station ID
            df_sim = df_sim[[serial_id, "YEAR", "MONTH", "COUNT", "MEAN"]] # Reorder columns

            sts_flows_sim = pd.concat([sts_flows_sim, df_sim], ignore_index=True) # Concat all simulated stream flow station values

            arcpy.Delete_management(out_table) # Delete the output table to save space

    return(sts_flows_sim)

In [None]:
print('\n############################################################')
print('\t\tINITIAL VARIABLES')
print('\tPeriod to be executed: ' + str(years[0]) + '-' + str(years[-1]))
print('############################################################')

for year in years:

    print("\n**Executing calculation for " + str(year) + "**")

    # Calculate zonal statistics for the current month and year
    df_sim = zonal_stastics_iteratively(year)

    print("\tSaving zonal statistics results into CSV......")
 
    # Save the results to a CSV file every year to avoid processing losses
    if wb_var not in tc_vars:
        # If the variable is not in TerraClimate variables, save the results in the processing directory
        df_sim.to_csv(processing_dir + "\\" + wb_var + "_zonal_statistics_" + str(year) + ".csv", index=False)
    else:
        # If the variable is in TerraClimate variables, save the results in the output directory
        df_sim.to_csv(out_geotiff + "\\" + wb_var + "_zonal_statistics_" + str(year) + ".csv", index=False)

arcpy.CheckInExtension("spatial")

# Clear the workspace environment
arcpy.ClearEnvironment("workspace")

print("\nDONE!!")


############################################################
		INITIAL VARIABLES
	Period to be executed: 1958-2023
############################################################

**Executing calculation for 1958**
	Calculating zonal statistics of 'wyield'......
		Processing station 1/808: Station ID 3617110


  sts_flows_sim = pd.concat([sts_flows_sim, df_sim], ignore_index=True) # Concat all simulated stream flow station values
