
# 📍 Make Continuum Point Files (Dams, Lakes, Sections)

<br>
<img style="float: left; padding-right: 15px; padding-left: 0px;" src="../sources/images/logo_continuum.png" width="260px" align=”left” >

<div style="text-align: justify">This is a Jupyter Notebook, a web-based interactive development environment that allows to create and share python codes.
This notebook loads parameters from a configuration file (`config.json`), saved in the `settings` folder and do a set of operations for the preparations of the static point data for Continuum model:

- uses the dams extracted from the GranD database to build the info_dam.txt files
- uses the lakes extracted from HydroSheds database to build the info_lake.txt file
- uses a set of points to build the info_section.txt file

The entire workflow is documented with clear explanations and visual plots to help you understand each step.


## 🔧 Preliminary Setup — Select Config

In [1]:
# Usa un JSON semplice e unificato
settings_file = "danakil_point.json"  # puoi cambiarlo qui

print("Using settings:", settings_file)


Using settings: danakil_point.json


#### Import libraries

In [2]:
import os, json, logging, math
import numpy as np
import geopandas as gpd
import rasterio as rio
from pathlib import Path


from plot_tools import single_plot
from osgeo import gdal, gdalconst

print("Imports OK")


Imports OK


#### Setup the folders

In [3]:
# Define the project root directory
project_root = str(Path().cwd()).replace('notebook','')

# Load configuration from the YAML file
with open(os.path.join(project_root, "settings", settings_file), 'r') as f:
    cfg = json.load(f)

domain = cfg["general"]["domain"]
# Generate folder tree based on the configuration
path_settings = cfg['path']
for key in path_settings.keys():
    path_settings[key] = os.path.join(project_root, "projects", cfg['general']['project'], path_settings[key])
    os.makedirs(path_settings[key], exist_ok=True)

# Input rasters (grid context)
choice_path = os.path.join(path_settings["data"], cfg["data"]["choice"])
pnt_path    = os.path.join(path_settings["data"], cfg["data"]["pnt"])

# Shapefiles
dams_file     = os.path.join(path_settings["data"], cfg["data"]["dam_points"])
lakes_file    = os.path.join(path_settings["data"], cfg["data"]["lake_points"])
sections_file = os.path.join(path_settings["data"], cfg["data"]["section_points"])

# Dams and lakes standard names
dams_lut = {
      "name_col": "DAM_NAME",
      "year_col": "YEAR",
      "cap_col": "CAP_MCM",
      "len_col": "DAM_LEN_M",
      "height_col": "DAM_HGT_M",
      "area_col": "AREA_SKM",
      "dis_avg_col": "DIS_AVG_LS"
    }
lakes_lut = {
      "name_col": "Lake_name",
      "res_time_col": "Res_time",
      "vol_total_col": "Vol_total",
      "dis_avg_col": "Dis_avg"
    }
        
# Outputs
os.makedirs(path_settings["output"], exist_ok=True)
out_dams    = os.path.join(path_settings["output"], f"{domain}.info_dam.txt")
out_lakes   = os.path.join(path_settings["output"], f"{domain}.info_lake.txt")
out_sects   = os.path.join(path_settings["output"], f"{domain}.info_section.txt")

print("Domain:", domain.replace("/home/continuumuser/workdir/",""))
print("Data:", path_settings["data"].replace("/home/continuumuser/workdir/",""))
print("Output:", path_settings["output"].replace("/home/continuumuser/workdir/",""))
print("choice:", choice_path.replace("/home/continuumuser/workdir/",""))
print("pnt   :", pnt_path.replace("/home/continuumuser/workdir/",""))
print("dams  :", dams_file.replace("/home/continuumuser/workdir/",""))
print("lakes :", lakes_file.replace("/home/continuumuser/workdir/",""))
print("sects :", sections_file.replace("/home/continuumuser/workdir/",""))

Domain: danakil
Data: projects/training_eth/data
Output: projects/training_eth/output/danakil
choice: projects/training_eth/data/danakil.choice.txt
pnt   : projects/training_eth/data/danakil.pnt.txt
dams  : projects/training_eth/data/dams_in.shp
lakes : projects/training_eth/data/lakes_in.shp
sects : projects/training_eth/data/danakil_river_flow.shp


#### Read the reference grids

In [4]:
# Read the arrays
with rio.open(choice_path) as src_choice:
    choice = src_choice.read(1)             # np.ndarray
    tf = src_choice.transform               # affine transform
    left, bottom, right, top = src_choice.bounds

with rio.open(pnt_path) as src_pnt:
    pnt = src_pnt.read(1)                   # np.ndarray

res = abs(tf.a)                             
x_ll = float(left - (res / 2))              # min X - half pixel
y_ul = float(top  + (res / 2))              # max Y + half pixel

print("Grid res:", res, " | x_ll:", x_ll, " | y_ul:", y_ul)

Grid res: 0.004496608  | x_ll: 39.354901312111004  | y_ul: 14.955939691819001


#### Define useful functions

In [5]:
def xy_to_hmc(x, y, x_ll, y_ul, res):
    Y_HMC = math.ceil(abs((x - x_ll)) / res)
    X_HMC = math.ceil(abs((y - y_ul)) / res)
    return int(X_HMC), int(Y_HMC)
def xy_to_hmc(x, y, tf):
    # 0-based coerente con rasterio (floor dagli spigoli)
    r0, c0 = rio.transform.rowcol(tf, x, y)
    # 1-based dall'alto-sinistra
    return int(r0 + 1), int(c0 + 1)
def ensure_on_network(choice_arr, X_HMC, Y_HMC, name, kind):
    if choice_arr[X_HMC - 1, Y_HMC - 1] < 1 :
        raise ValueError(f"DOMAIN: {domain}. The {kind} '{name}' ({X_HMC}-{Y_HMC}) is not on the network")

#### Load points

In [6]:
import geopandas as gpd

gdf_dams = gpd.read_file(dams_file)     if os.path.exists(dams_file)     else None
gdf_lakes = gpd.read_file(lakes_file)   if os.path.exists(lakes_file)    else None
gdf_sects = gpd.read_file(sections_file)if os.path.exists(sections_file) else None

print("Loaded → dams:", "File not found" if gdf_dams is None else len(gdf_dams),
      "| lakes:", "File not found" if gdf_lakes is None else len(gdf_lakes),
      "| sections:", "File not found" if gdf_sects is None else len(gdf_sects))

Loaded → dams: File not found | lakes: File not found | sections: File not found


## 💧 Generate output files

In [7]:
#### Dam file

In [8]:
DAM_NAME   = dams_lut["name_col"]
DAM_YEAR   = dams_lut["year_col"]
DAM_CAP    = dams_lut["cap_col"]
DAM_LEN    = dams_lut["len_col"]
DAM_HGT    = dams_lut["height_col"]
DAM_AREA   = dams_lut["area_col"]
DAM_DISAVG = dams_lut["dis_avg_col"]

if gdf_dams is not None and len(gdf_dams) > 0:
    gdf = gdf_dams.copy()
    # Normalize names
    unnamed_dam_code = 1
    index = []
    for name in gdf.get(DAM_NAME, [None]*len(gdf)):
        if name is not None and str(name).strip():
            index.append(str(name))
        else:
            index.append(f"Dam_{domain}{str(unnamed_dam_code).zfill(3)}")
            unnamed_dam_code += 1
    gdf[DAM_NAME] = index
    gdf = gdf.set_index(DAM_NAME)

    with open(out_dams, "w") as dam_file:
        dam_file.write(f"{len(gdf)}\t#Number of dams\n")
        dam_file.write(f"{len(gdf)}\t#Number of plants\n")
        dam_file.write("##################################################################################################\n")

        for dam_name in gdf.index.values:
            year_val = gdf.loc[dam_name].get(DAM_YEAR, -9999)
            dam_file.write(f"{dam_name}\t\t\t#Dam name {year_val}\n")

            X_HMC, Y_HMC = xy_to_hmc(gdf.loc[dam_name].geometry.x, gdf.loc[dam_name].geometry.y, tf)
            ensure_on_network(choice, X_HMC, Y_HMC, dam_name, "dam")
            dam_file.write(f"{int(X_HMC)} {int(Y_HMC)}\t\t\t#Row and column dam coordinates\n")

            dam_file.write("1\t\t\t#Number of plants downstream the dam\n")
            dam_file.write("-9999\t\t\t#Code of the reservoirs cells of the dam (if point dam set to -9999)\n")

            max_storage_m3 = float(gdf.loc[dam_name].get(DAM_CAP,0.0)) * (10**6)
            initial_storage_m3 = max_storage_m3 * 0.7
            dam_file.write(f"{max_storage_m3}\t\t\t#Max storage (m3)\n")
            dam_file.write(f"{initial_storage_m3}\t\t\t#Initial storage (m3)\n")

            dam_file.write("99999\t\t\t#Critical discharge for surface spillway\n")
            dam_len = float(gdf.loc[dam_name].get(DAM_LEN,0.0))
            spillway_len = 0.15 * dam_len if dam_len > 0 else dam_len
            dam_file.write(f"{spillway_len}\t\t\t#Equivalent length of surface spillway\n")

            max_depth = float(gdf.loc[dam_name].get(DAM_HGT, -1.0))
            if max_depth < 0:
                area_skm = float(gdf.loc[dam_name].get(DAM_AREA, 1.0))
                cap_mcm  = float(gdf.loc[dam_name].get(DAM_CAP, 0.0))
                max_depth = round(2 * cap_mcm / area_skm, 0) if area_skm > 0 else 0.0
            dam_file.write(f"{max_depth}\t\t\t#Maximum reservoir depth\n")

            dam_file.write("1e-006\t\t\t#Linear tank coefficient\n")
            dam_file.write("\t\t\t\t#Depth-volume curve file name\n")
            dam_file.write("\t\t\t\t#Turbines discharge file name\n")

            X_HMC_OUT = int(X_HMC) - (int((pnt[int(X_HMC)-1, int(Y_HMC)-1] - 1) / 3) - 1)
            Y_HMC_OUT = int(Y_HMC) + pnt[int(X_HMC)-1, int(Y_HMC)-1] - 5 - 3 * (int((pnt[int(X_HMC)-1, int(Y_HMC)-1] - 1) / 3) - 1)
            ensure_on_network(choice, X_HMC_OUT, Y_HMC_OUT, dam_name, "dam outlet")
            dam_file.write(f"{int(X_HMC_OUT)} {int(Y_HMC_OUT)}\t\t\t#Row and column outlet dam coordinates\n")

            max_discharge_m3_s =  0.001 * 2.5 * float(gdf.loc[dam_name].get(DAM_DISAVG,0.0))
            dam_file.write("-9999\t\t\t#Plant corrivation time (minutes)\n")
            dam_file.write(f"{max_discharge_m3_s}\t\t\t#Maximum plant discharge (m3/s)\n")
            dam_file.write("1\t\t\t#flag=1 if the plant discharge water\n")
            dam_file.write("##################################################################################################\n")
    print("Written:", out_dams)
else:
    print("No dams layer provided.")


No dams layer provided.


#### Lakes file

In [9]:
LAKE_NAME  = lakes_lut["name_col"]
LAKE_REST  = lakes_lut["res_time_col"]
LAKE_VOLT  = lakes_lut["vol_total_col"]
LAKE_DISA  = lakes_lut["dis_avg_col"]

if gdf_lakes is not None and len(gdf_lakes) > 0:
    gdf = gdf_lakes.copy()
    unnamed_lake_code = 1
    index = []
    for name in gdf.get(LAKE_NAME, [None]*len(gdf)):
        if name is not None and str(name).strip():
            index.append(str(name))
        else:
            index.append(f"Lake_{domain}{str(unnamed_lake_code).zfill(3)}")
            unnamed_lake_code += 1
    gdf[LAKE_NAME] = index
    gdf = gdf.set_index(LAKE_NAME)

    lakes_not_valid = []
    to_drop = gdf.index[gdf.get(LAKE_REST,-1) < 0] if LAKE_REST in gdf.columns else []
    if len(to_drop) > 0:
        lakes_not_valid += [f"{domain} : {i}" for i in to_drop]
        gdf = gdf.drop(to_drop)

    with open(out_lakes, "w") as lake_file:
        lake_file.write(f"{len(gdf)}\t#Number of lakes\n")
        lake_file.write("##################################################################################################\n")

        for lake_name in gdf.index.values:
            lake_file.write(f"{lake_name}\t\t\t#Lake name\n")           
            X_HMC, Y_HMC = xy_to_hmc(gdf.loc[lake_name].geometry.x, gdf.loc[lake_name].geometry.y, tf)
            ensure_on_network(choice, X_HMC, Y_HMC, lake_name, "lake")
            lake_file.write(f"{int(X_HMC)} {int(Y_HMC)}\t\t\t#Row and column dam coordinates\n")
            lake_file.write("-9999\t\t\t#Code of the lakes cells of the lake (if point dam set to -9999)\n")

            vol_tot = float(gdf.loc[lake_name].get(LAKE_VOLT,0.0)) * (10**6)
            dis_avg = float(gdf.loc[lake_name].get(LAKE_DISA,0.0))
            vol_min = 0.0
            vol_init = vol_tot

            res_time_day = float(gdf.loc[lake_name].get(LAKE_REST,1.0)) if LAKE_REST in gdf.columns else 1.0
            res_time_hr  = res_time_day * 24.0
            lake_const   = 1.0 / res_time_hr if res_time_hr > 0 else 0.0

            lake_file.write(f"{vol_min}\t\t\t#Minimum storage non-null discharge (m3)\n")
            lake_file.write(f"{vol_init}\t\t\t#Initial storage (m3)\n")
            lake_file.write(f"{lake_const}\t\t\t#Lake constant (1/h) \n")
            lake_file.write("##################################################################################################\n")
    if len(lakes_not_valid) > 0:
        print("WARNING! Lakes skipped because invalid residence time:", "; ".join(lakes_not_valid))
    print("Written:", out_lakes)
else:
    print("No lakes layer provided.")

No lakes layer provided.


####Sections file

In [10]:
if gdf_sects is not None and len(gdf_sects) > 0:
    gdf = gdf_sects.copy()
    # Column names from settings (flat JSON)
    name_col  = cfg["settings"]["sections_name_col"]
    river_col = cfg["settings"]["sections_river_col"]

    unnamed_code = 1
    names = []
    for name in gdf.get(name_col, [None]*len(gdf)):
        if name:
            names.append(str(name).replace(" ", "_"))
        else:
            names.append(f"Section_{domain}{str(unnamed_code).zfill(3)}")
            unnamed_code += 1
    gdf[name_col] = names
    gdf = gdf.set_index(name_col)

    with open(out_sects, "w") as sec_file:
        for sect_name in gdf.index.values:
            river = str(gdf.loc[sect_name, river_col]).replace(" ","_") if river_col in gdf.columns else domain
            X_HMC, Y_HMC = xy_to_hmc(gdf.loc[sect_name].geometry.x, gdf.loc[sect_name].geometry.y, tf)
            ensure_on_network(choice, X_HMC, Y_HMC, sect_name, "section")
            sec_file.write(f"{int(X_HMC)} {int(Y_HMC)} {river} {sect_name}\n")
    print("Written:", out_sects)
else:
    print("No sections layer provided.")


No sections layer provided.
