In [1]:
# If not installed, run this cell. It is needed for access to GEE
#!pip install ee
#!pip show ee

In [2]:
# If not installed, run this cell. It helps visualization of data
#!pip install geemap
#!pip show geemap

In [3]:
# If not installed, run this cell. It helps conversion from DBF to DF
#!pip install dbfread
#!pip show dbfread

In [4]:
# Import libraries
import ee, datetime, pandas as pd, geemap
# from IPython.display import Image
from otherfunctions import water_balance
from functools import partial
from dbfread  import DBF
from dateutil import relativedelta

In [5]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize(project='ee-jvg')

In [6]:
# Access the asset containing the calibration drainage areas
Map = geemap.Map()
calib_basins = ee.FeatureCollection('projects/ee-jvg/assets/calibration_basins')
Map.centerObject(calib_basins.geometry())
Map.addLayer(calib_basins, {"color": 'blue'}, 'Calibration Drainage Areas')

In [7]:
# HydroSHEDS v1 will be used. Specifically, its HydroBASINS
hydrobasins = ee.FeatureCollection('WWF/HydroSHEDS/v1/Basins/hybas_3')
hybas_id = "HYBAS_ID"
Map.addLayer(hydrobasins, {}, 'HydroBasins')
Map

Map(center=[6.282893423832647, 19.27832363811588], controls=(WidgetControl(options=['position', 'transparent_b…

In [8]:
# Load streamflow data series for the stations being used for the calibration process
cluster_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs"
excel_folder = cluster_folder + "//Streamflow_Stations//_DataFrames"
df_sts = pd.read_csv(excel_folder + "\Joined_Sts_DFs.csv", index_col="yr-mth")
df_sts

Unnamed: 0_level_0,MISS_07374525,ULUA_23,AMZN_17050001,NILE_1662100,YNGZ_2181900,MUDA_A4261110,DABE_6742900,DABE_42055
yr-mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1958-1,,,,866.0,8770.0,,5113.0,
1958-2,,,,777.0,9400.0,,4995.0,
1958-3,,,,810.0,12000.0,,8735.0,
1958-4,,,,925.0,22900.0,,10573.0,
1958-5,,,,660.0,45600.0,,11352.0,
...,...,...,...,...,...,...,...,...
2023-8,7495.469295,,,,,449.813,,
2023-9,4442.913232,,,,,335.637,,
2023-10,,,,,,199.097,,
2023-11,4935.626362,,,,,212.866,,


In [9]:
# Load attribute table of shapefile containing the calibration basins
dbf_folder = cluster_folder + "//Streamflow_Sts_Drainage_Areas"
dbf = DBF(dbf_folder + "//Calibration_Basins_Final.dbf")
df_drain_sts = pd.DataFrame(iter(dbf))
df_drain_sts

Unnamed: 0,gridcode,Area_km2,Length_km,ID,STATION,SUFIX,BASIN,LAT,LONG,CONTINENT
0,6,782116,8294,6742900,Ceatal Izmail,DABE,Danube,45.216667,28.716657,eu
1,7,514091,7272,42055,Smederevo,DABE,Danube,44.66678,20.9206,eu
2,0,3194997,18768,07374525,USGS 07374525 Mississippi River at Belle Chass...,MISS,Mississippi,29.857145,-89.97785,na
3,1,20210,943,23,Santiago,ULUA,Ulua,15.297001,-87.928217,na
4,2,4671596,17674,17050001,Obidos,AMZN,Amazon,-1.947222,-55.511108,sa
5,3,2606269,14350,1662100,Dongola,NILE,Nile,19.186,30.4895,af
6,4,1682893,13471,2181900,Datong,YNGZ,Yangtze,30.77,117.62,as
7,5,851359,8473,A4261110,River Murray downstream Morgan (AMTD 316.5km),MUDA,Murray-Darling,-34.055059,139.685774,au


In [10]:
def calculate_months(ini_date, fin_date):
    # convert string to date object
    s_date = datetime.datetime.strptime(ini_date, "%Y-%m")
    e_date = datetime.datetime.strptime(fin_date, "%Y-%m")

    # Get the relativedelta between two dates
    delta = relativedelta.relativedelta(e_date, s_date)
    # get months difference
    res_months = delta.months + (delta.years * 12) + 1
    return res_months

In [11]:
# Function to determine the simulation period and the reporting period
def determine_period(start_date, end_date, warmup_yrs = 0):
    ini_date = str((int(start_date.split("-")[0]) + int(warmup_yrs))) + "-" + start_date.split("-")[1]
    if (end_date.split("-")[1]) == "12":
        fin_date = str(int(end_date.split("-")[0]) + 1) + "-01"
    else:
        fin_date = str(int(end_date.split("-")[0])) + "-" + str(int(end_date.split("-")[1]) + 1)

    if (datetime.datetime(int(ini_date.split("-")[0]), int(ini_date.split("-")[1]), 1) >= datetime.datetime(int(fin_date.split("-")[0]), int(fin_date.split("-")[1]), 1)):
        if (warmup_yrs != 0):
            print ("Start date is finally set beyond or same as the end date! Please consider the implications of the warm-up years. Remember that end date is exclusive")
        else:
            print ("Start date is finally set beyond or same as the end date! Please review your input dates. Remember that end date is exclusive")
        raise SystemExit()

    months1 = calculate_months(start_date, end_date)
    months2 = calculate_months(ini_date, end_date)

    print ("Simulation period will be from " + start_date + " to " + end_date + " (" + str(months1) + " months)"
           + "; however, outputs will be provided for " + ini_date + " to " + end_date + " (" + str(months2) + " months)")
    return ini_date, fin_date

In [12]:
## Initial parameters ##

# It is recommended using at least 5 years of equilibration period - warm up years. This allows the model to get the water cycling properly.
# When a proper equilibration period is incorporated, "ffcb" value is not going to impact model results
warmup_yrs = 5
start_date = '2008-01' # Inclusive
end_date = '2023-12' # Inclusive
ini_date, fin_date = determine_period(start_date, end_date, warmup_yrs)

Simulation period will be from 2008-01 to 2023-12 (192 months); however, outputs will be provided for 2013-01 to 2023-12 (132 months)


In [13]:
######################################################
### Starting values for the water balance model - T&M
######################################################

# The recession constant (k) will be determined from hydrographs derived from the gathered streamflow datasets. For the rest of the world, k will be defined as 0.5
k_recession = 0.5 # Temporarily is defined as such

# Initial soil water storage expressed as a fraction of water holding capacity [0-1]
ffcb = 0.1 # 10%

# Base flow of the previous month (mm)
bflow_ant = 10

In [14]:
# Public link to the asset whc: https://code.earthengine.google.com/?asset=projects/ee-jvg/assets/whc_fc33
whc = ee.Image('projects/ee-jvg/assets/whc_fc33').divide(1000) # As the raster was originally multiplied by 1000
# It seems that cutting GEE datasets consumes so much memory, so it is more efficient to process all the datasets and finally cut them to the zoi
# whc = whc.clip(calib_basins)
""" Map.addLayer(whc, {
  'min': 0,
  'max': 1000,
  'palette': [
      '#AFEEEE',
      '#E0FFFF',
      '#00FFFF',
      '#87CEEB',
      '#000080'
  ]
  },
  'Water Holding Capacity') """

" Map.addLayer(whc, {\n  'min': 0,\n  'max': 1000,\n  'palette': [\n      '#AFEEEE',\n      '#E0FFFF',\n      '#00FFFF',\n      '#87CEEB',\n      '#000080'\n  ]\n  },\n  'Water Holding Capacity') "

In [15]:
# TerraClimate layers filtered with "start_date" (inclusive) and "end_date" (exclusive) and clipped to "zoi"
terraclimate = ee.ImageCollection('IDAHO_EPSCOR/TERRACLIMATE').filter(ee.Filter.date(start_date, fin_date))
# It seems that cutting GEE default datasets consumes so much memory, so it is more efficient to process all the datasets and finally cut them to the zoi
#terraclimate = ee.ImageCollection('IDAHO_EPSCOR/TERRACLIMATE').filter(ee.Filter.date(start_date, fin_date)).map(lambda image: image.clip(calib_basins))
#num_months = terraclimate.size().getInfo()
#num_months

In [16]:
# Initial images and time to start the water balance
k = ee.Image(k_recession) # Image of recession constant
st0 = whc.multiply(ffcb) # Initial soil water storage
bf0 = ee.Image(bflow_ant) # Previous baseflow
time0 = terraclimate.first().get("system:time_start") # Define the initial time (time0) according to the start of the collection
scale = terraclimate.first().projection().nominalScale() # This is the resolution used to standardize
crs = 'EPSG:3857' # WGS 84 / Pseudo-Mercator -- Spherical Mercator, Google Maps, OpenStreetMap, Bing, ArcGIS, ESRI

# Water balance variables
wb_vars = ["pr", "pet", "ro", "eprec", "aet", "sstor", "perc", "bflow", "wyield"]

In [17]:
# Initialization of ALL bands.
# Mandatory to cast the type of the data with a .float().

# Initialization of precipitation
initial_pr = ee.Image(0).set("system:time_start", time0).select([0], ["pr"]).float()

# Initialization of potential evapotranspiration
initial_pet = ee.Image(0).set("system:time_start", time0).select([0], ["pet"]).float()

# Initialization of runoff
initial_ro = ee.Image(0).set("system:time_start", time0).select([0], ["ro"]).float()

# Initialization of effective precipitation
initial_ep = ee.Image(0).set("system:time_start", time0).select([0], ["eprec"]).float()

# Initialization of actual evapotranspiration
initial_aet = ee.Image(0).set("system:time_start", time0).select([0], ["aet"]).float()

# Initialization of soil storage, which is considered to be a fraction of WCH
initial_st = st0.set("system:time_start", time0).select([0], ["sstor"]).float()

# Initialization of percolation
initial_pc = ee.Image(0).set("system:time_start", time0).select([0], ["perc"]).float()

# Initialization of base flow, which is considered as defined in Ulmen (2000)
initial_bf = bf0.set("system:time_start", time0).select([0], ["bflow"]).float()

# Initialization of water yield
initial_wy = ee.Image(0).set("system:time_start", time0).select([0], ["wyield"]).float()

In [18]:
# All initial bands are combined into one "ee.Image" by adding them to the first band (wy)
initial_image = initial_wy.addBands(
    ee.Image([initial_pr, initial_pet, initial_ro, initial_ep, initial_aet, initial_st, initial_pc, initial_bf])
)

# Initiatlization of list that will contain new images after each iteration
image_list = ee.List([initial_image])

In [19]:
#########################
# WATER BALANCE EXECUTION
#########################

partial_function = partial(water_balance, ee = ee, whc = whc, k = k)

# Iterate the user-supplied function to the terraclimate collection
wy_list = terraclimate.iterate(partial_function, image_list)

# Remove the initial image from our list
wy_list = ee.List(wy_list).remove(initial_image)

# Transform the list into an ee.ImageCollection
wy_coll = ee.ImageCollection(wy_list).filter(ee.Filter.date(ini_date, fin_date))

In [20]:
# Let's assign year and month to each image of the resulting image collection
wy_coll = wy_coll.map(lambda image: image.set("yr-mth", ee.Image(image).date().get("year").format().cat(ee.String("-")).cat(ee.Image(image).date().get("month").format())))

In [22]:
reduced_wy_coll = wy_coll.select("wyield")

# Function to split image collection into smaller chunks
def split_collection(collection, chunk_size):
    size = collection.size().getInfo()
    chunks = [collection.toList(chunk_size, offset) for offset in range(0, size, chunk_size)]
    return chunks

# Split the collection into smaller chunks (e.g., 12 images per chunk)
chunk_size = 12
chunks = split_collection(reduced_wy_coll, chunk_size)

# Initial list to be filled with appended results
df_zonal = []

# Define a function to add a new field to each feature
def add_field(feature, image):
    feature = feature.set('yr-mth', image.get("yr-mth"))
    #feature = feature.set('month', image.get("month"))
    return feature

# rescale features to help with memory error
calib_basins_scaled = calib_basins
#calib_basins_scaled = calib_basins.map(lambda feature: feature.simplify(100)).limit(1)

i = 1
for chunk in chunks:
    # Perform a grouped mean reduction
    def means(image):
        reduced_fc = image.reduceRegions(
            reducer=ee.Reducer.mean(),
            collection=calib_basins_scaled, # Specify the region's geometry
            scale=scale # Specify the scale in meters
            #tileScale=16
        )

        partial_function2 = partial(add_field, image=image)

        # Map the function over the FeatureCollection
        updated_fc = reduced_fc.map(partial_function2)

        # Select only the fields you want to keep
        cleaned_fc = updated_fc.select(["ID", "yr-mth", "mean"])

        return cleaned_fc

    # Apply the means function to each image in the image collection
    with_mean = ee.ImageCollection(chunk).map(means)
    results = with_mean.flatten()
    #df_zonal.append(geemap.ee_to_df(results))

    # Export the FeatureCollection.
    ee.batch.Export.table.toDrive(
        collection=results,
        description='zonal_' + str(i),
        folder='earth_engine_demos',
        fileFormat='CSV',
        selectors=["ID", "yr-mth", "mean"] 
    ).start()
    i = i + 1

# Convert the Earth Engine FeatureCollection to a pandas DataFrame
#df_zonal_final = pd.concat(df_zonal).sort_index()
#df_zonal_final