In [None]:
# If not installed, run this cell. It is needed to read DBF files
#!pip install dbfread
#!pip show dbfread

In [None]:
import arcpy, pandas as pd
from arcpy import env
from arcpy.sa import *
from dbfread import DBF

In [None]:
# Paths to input datasets
root_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs"
tc_ds = root_folder + "\\TerraClimate"
str_flows_folder = root_folder + "\\Streamflow_Stations\\_DataFrames"
str_flows_folder2 = r"D:\OneDrive - CGIAR\Documents\PhD_JLU Giessen\Papers\Paper1\Processing"
drain_areas_folder = root_folder + "\\Streamflow_Sts_Drainage_Areas"
drain_areas_folder2 = str_flows_folder2
bands_gee = ["pr", "pet", "ro"] # band names in gee
tc_vars = ["ppt", "pet", "q"] # variable names according to TerraClimate
serial_id = 'gridcode'


# Set arcpy environment variables
env.overwriteOutput = True
arcpy.CheckOutExtension("spatial")
env.cellSize = "MINOF"
env.workspace = r"Z:\PhD_Datasets&Analysis\_ProcessingCache"

In [None]:
# Dataframe with streamflow datasets
#str_flows_obs = pd.read_csv(str_flows_folder + "\\Joined_Sts_DFs.csv")
str_flows_obs = pd.read_csv(str_flows_folder2 + "\\Joined_Sts_DFs.csv")
str_flows_obs

In [None]:
str_flows_stats = str_flows_obs.describe()
str_flows_stats

In [None]:
# Read the DBF file
#drain_areas_tb = DBF(drain_areas_folder + "\\Calibration_Basins_Final_Generalized.dbf")
drain_areas_tb = DBF(drain_areas_folder2 + "\\Calibration_Basins_Final_Generalized\\Calibration_Basins_Final_Generalized.dbf")

# Convert to DataFrame
drain_areas_df = pd.DataFrame(iter(drain_areas_tb)).sort_values(by=serial_id)
drain_areas_df

In [None]:
drain_areas_df["SUFIX_ID"] = drain_areas_df["SUFIX"] + "_" + drain_areas_df["ID"]
drain_areas_df

In [None]:
# Creating dictionary with ids of the basins
id_dict = dict(zip(drain_areas_df[serial_id], drain_areas_df['SUFIX_ID']))
id_dict

In [None]:
# Read the Shapefile with the basins
#drain_areas = drain_areas_folder + "\\Calibration_Basins_Final_Generalized.shp"
drain_areas = drain_areas_folder2 + "\\Calibration_Basins_Final_Generalized\\Calibration_Basins_Final_Generalized.shp"

In [None]:
# Limit the spatil processing extent
#buffer_zoi = drain_areas_folder + "\\Calibration_Basins_Final_Generalized_Buffer.shp"
buffer_zoi = drain_areas_folder2 + "\\Calibration_Basins_Final_Generalized_Buffer\\Calibration_Basins_Final_Generalized_Buffer.shp"
env.mask = buffer_zoi
env.extent = buffer_zoi
env.outputCoordinateSystem = arcpy.Describe(buffer_zoi).spatialReference # WGS 1984 (4326)

In [None]:
# Get the current environment's spatial reference
spatial_ref = env.outputCoordinateSystem

# Check if a spatial reference is set
if spatial_ref:
    print(f"Spatial Reference Name: {spatial_ref.name}")
    print(f"Spatial Reference WKID: {spatial_ref.factoryCode}")
else:
    print("No spatial reference is set in the current environment.")

In [None]:
whc = Float(Raster(r"Z:\PhD_Datasets&Analysis\Inputs\WHC_FC33_Final_WGS84.tif")) / 1000 # As the raster was originally multiplied by 1000
drain_areas_ids = Int(Raster(drain_areas_folder + "\\calib_ba_fin_gen.tif"))

In [None]:
######################################################
### Starting values for the water balance model - T&M
######################################################

# Initial variables
warmup_yrs = 5
years = range(1958, 2023 + 1) # Years with available weather information to run the water balance
months = range(1, 12 + 1)
ini_yr_wp = years[0] + warmup_yrs

# Base flow of the previous month (mm) (bf0)
bflow_ant = 10

In [None]:
# The recession constant (k) values determined from hydrographs derived from the gathered streamflow datasets in code "4.recession_constant.ipynb".
# For the rest of the world, k will be defined as 0.5
k_dict = {
    'MISS_07374525': 0.75,
    'ULUA_23': 0.9,
    'AMZN_17050001': 0.82,
    'NILE_1662100': 0.84,
    'YNGZ_2181900': 0.64,
    'MUDA_A4261110': 0.7,
    'DABE_6742900': 0.92,
    'DABE_42055': 0.89
 }

In [None]:
# Recession constant (k) values to be used for the reclassification process
remap_k = RemapValue([
    [i, int(round(k_dict[id_dict[i]] * 1000, 0))] for i in range(len(id_dict))
])
remap_k

In [None]:
# Recession constant (k) raster
k = Float(Reclassify(drain_areas_ids, "Value", remap_k, "NODATA")) / 1000

In [None]:
# Initial soil water storage expressed as a fraction of water holding capacity [0-1]
ffcb_dict = {
    'MISS_07374525': 0.1,
    'ULUA_23': 0.1,
    'AMZN_17050001': 0.1,
    'NILE_1662100': 0.1,
    'YNGZ_2181900': 0.1,
    'MUDA_A4261110': 0.1,
    'DABE_6742900': 0.1,
    'DABE_42055': 0.1
}

In [None]:
# Initial soil water storage fraction (ffcb) values to be used for the reclassification process
remap_ffcb = RemapValue([
    [i, int(round(ffcb_dict[id_dict[i]] * 1000, 0))] for i in range(len(id_dict))
])
remap_ffcb

In [None]:
# Initial soil water storage fraction (ffcb) raster
ffcb = Float(Reclassify(drain_areas_ids, "Value", remap_ffcb, "NODATA")) / 1000

In [None]:
# Initial soil water storage (st0)
sstor_ant = whc * ffcb

In [None]:
# raise SystemExit("Please be sure that all the above executions are succesful before running the following cell!")

In [None]:
print('\n############################################################')
print('\t\tINITIAL VARIABLES')
print('\tPeriod to be executed: ' + str(years[0]) + '-' + str(years[-1]))
print('############################################################')

years = [1980, 1981]
months = [1 , 2]
sts_flows_sim = pd.DataFrame(columns=[serial_id, "YEAR", "MONTH", "COUNT", "AREA", "MIN", "MAX", "RANGE", "MEAN", "STD", "SUM", "MEDIAN", "PCT90"])   

for year in years:

    print("\n**Executing THORNTHWAITE AND MATHER model for " + str(year) + "**")

    ppt_nc_file = tc_ds + "\\TerraClimate_ppt_" + str(year) + ".nc"
    pet_nc_file  = tc_ds + "\\TerraClimate_pet_" + str(year) + ".nc"
    q_nc_file = tc_ds + "\\TerraClimate_q_" + str(year) + ".nc"

    nc_FP = arcpy.NetCDFFileProperties(ppt_nc_file)

    for month in months:

        print("\n\t*Executing water balance for month " + str(month) + "*\n")

        print("\tLoading precipitation, potential evapo., and runoff rasters from TerraClimate NetCDF files.....")

        dimension_value = nc_FP.getDimensionValue("time", month-1)

        pr_name = "ppt_" + str(year) + "_" + str(month)
        pet_name = "pet_" + str(year) + "_" + str(month)
        ro_name = "q_" + str(year) + "_" + str(month)

        arcpy.MakeNetCDFRasterLayer_md(ppt_nc_file, "ppt", "lon", "lat", pr_name, "", [["time", dimension_value]], "BY_VALUE")
        arcpy.MakeNetCDFRasterLayer_md(pet_nc_file, "pet", "lon", "lat", pet_name, "", [["time", dimension_value]], "BY_VALUE")
        arcpy.MakeNetCDFRasterLayer_md(q_nc_file, "q", "lon", "lat", ro_name, "", [["time", dimension_value]], "BY_VALUE")

        pr = Raster(pr_name)
        pet = Raster(pet_name) * 0.1  # Scale factor: 0.1
        ro = Raster(ro_name)

        print("\tPrecipitation, potential evapo., and runoff are loaded")

        print("\tCalculating effective precipitation......")
        eprec = pr - ro

        print("\tCalculating soil storage......")
        sstor = Con(eprec <= pet, sstor_ant * Exp(-Abs(eprec - pet) / whc), Con(sstor_ant + (eprec - pet) > whc, whc, sstor_ant + (eprec - pet)))

        print("\tCalculating actual evapotranspiration......")
        aet = Con(eprec > pet, pet, eprec + sstor - sstor_ant)

        print("\tCalculating percolation......")
        perc = Con(eprec <= pet, 0, Con(sstor_ant + (eprec - pet) > whc, sstor_ant + (eprec - pet) - whc, 0))

        print("\tCalculating baseflow......")
        bflow = (k * bflow_ant) + ((1 - k) * perc)

        print("\tCalculating water yield......")
        wyield = ro + bflow

        out_table = "zonal_wyield_" + str(year) + "_" + str(month) + ".dbf"

        print("\tCalculating zonal statistics of water yield......")
        arcpy.sa.ZonalStatisticsAsTable(drain_areas, serial_id, wyield, out_table, "DATA", "ALL")

        print("\tConverting zonal statistics results into dataframe......")
        # Convert the output table to a NumPy array
        array = arcpy.da.TableToNumPyArray(out_table, [serial_id, "COUNT", "AREA", "MIN", "MAX", "RANGE", "MEAN", "STD", "SUM", "MEDIAN", "PCT90"])

        # Convert the NumPy array to a pandas DataFrame
        df_sim = pd.DataFrame(array)

        df_sim["YEAR"] = year # Assign the year of simulation
        df_sim["MONTH"] = month # Assign the month of simulation
        df_sim = df_sim[[serial_id, "YEAR", "MONTH", "COUNT", "AREA", "MIN", "MAX", "RANGE", "MEAN", "STD", "SUM", "MEDIAN", "PCT90"]] # Reorder columns

        sts_flows_sim = pd.concat([sts_flows_sim, df_sim], ignore_index=True) # Concat all simulated stream flow station values

        # sstor_ant is si-1 for other months different from the first one 
        sstor_ant = sstor
        bflow_ant = bflow

        # Delete variables to save memory
        print("\tDeleting variables......")
        del pr, pet, ro, eprec, sstor, aet, perc, bflow, wyield

arcpy.CheckInExtension("spatial")

# Clear the workspace environment
arcpy.ClearEnvironment("workspace")

print("\nDONE!!")