In [1]:
import pandas as pd
import networkx as nx
import xarray as xr
from pathlib import Path
import numpy as np
from tqdm import tqdm
import geopandas as gpd

In [2]:
from hydro_data.data_io import load_topo
from hydro_data.river_network_definition.river_network_manipulation import segment_river_network, load_and_map_kp, dump_graph
from hydro_data.river_network_definition.river_network_manipulation import *
from hydro_data.river_network_definition.river_network_manipulation import prepare_kp_coord, grid_node_idxs

In [3]:
import rioxarray

data_root = Path("../data/")
merit_out = data_root / "MERIT_processed"

In [4]:
fdir_ds = rioxarray.open_rasterio(merit_out / "dir.tif").drop("band").squeeze().load()
upa_ds = rioxarray.open_rasterio(merit_out / "upa.tif").drop("band").squeeze().load()

  fdir_ds = rioxarray.open_rasterio(merit_out / "dir.tif").drop("band").squeeze().load()
  upa_ds = rioxarray.open_rasterio(merit_out / "upa.tif").drop("band").squeeze().load()


### Extract MERIT graphs

Here we should do a loop on different graph segmentation configurations

In [5]:
df = pd.read_pickle(data_root / "keypoints/kp3.pkl")

In [6]:
kp_df = prepare_kp_coord(df, upa_ds)

kp_df["is_dam"]=False
kp_df["is_gauge"]=True
kp_df["has_discharge"]=True

In [9]:
#kp_df.to_pickle(data_root / "keypoints/kp4.pkl")
#kp_df.to_pickle(data_root / "keypoints/kp4.pkl")
#df = pd.read_pickle(data_root / "keypoints/kp3.pkl")

In [10]:
river_upa_thr=10

In [11]:
g_full = compute_graph(fdir_ds, upa_ds, thr=river_upa_thr)

In [12]:
river_upa_thr=10
split_max_area=30
split_max_chanel_length=4
simplify_min_area=1
simplify_min_channel_length=20

In [13]:
kp_df = kp_df.loc[kp_df.index.isin(g_full.nodes)]

In [14]:
g = simplify_graph(g_full, nodes_to_keep = set(kp_df.index))

  0%|          | 0/1690690 [00:00<?, ?it/s]

In [15]:
g = annotate_graph(g, g_full, kp_df)

  0%|          | 0/30345 [00:00<?, ?it/s]

In [16]:
g1 = break_up_large_catchments(g, g_full, kp_df, 
                               max_area=split_max_area, 
                               max_chanel_length=split_max_chanel_length)

  0%|          | 0/4774 [00:00<?, ?it/s]

  0%|          | 0/35119 [00:00<?, ?it/s]

  0%|          | 0/1428 [00:00<?, ?it/s]

  0%|          | 0/36547 [00:00<?, ?it/s]

IOStream.flush timed out


  0%|          | 0/209 [00:00<?, ?it/s]

  0%|          | 0/36756 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/36796 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

  0%|          | 0/36818 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/36829 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/36833 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/36836 [00:00<?, ?it/s]

In [17]:
g2 = simplify_small_catchments(g1, g_full, kp_df,
                               min_area=simplify_min_area,
                               min_chanel_length=simplify_min_channel_length)
analyze_small(g2)

  0%|          | 0/1824 [00:00<?, ?it/s]

  0%|          | 0/35083 [00:00<?, ?it/s]


    195 chanels are below the minimum size threshold. Among those:
    - 68 are output nodes that can not be merged into successors
    - 86 are key points
    - 41 are small area but long chanels
    
    


In [18]:
basin, catchments = compute_all_catchment_areas(g2, fdir_ds)

  0%|          | 0/35083 [00:00<?, ?it/s]

  0%|          | 0/35083 [00:00<?, ?it/s]

In [19]:
dump_graph("FR_10_base", g2, basin, catchments, kp_df)

In [16]:
def annotate_graph(simplified_g, original_g,  kp_df=pd.DataFrame(), check_pathes=True):
    """
    """
    simplified_g = simplified_g.copy()

    for c in ["lon", "lat", "x", "y"]:
        if c in kp_df.columns:
            kp_df = kp_df.drop(c, axis=1)
            
    nx.set_node_attributes(simplified_g, kp_df.T.to_dict("dict"))
    df = nodes_df(simplified_g)
        
    for node in tqdm(simplified_g.nodes):
        successors = list(simplified_g.successors(node))
        predecessors = list(simplified_g.predecessors(node))
        simplified_g.nodes[node]["is_root"] = len(successors) == 0
        simplified_g.nodes[node]["is_leaf"] = len(predecessors) == 0
        simplified_g.nodes[node]["drainage_area"] = df.loc[node, "upa"]
        simplified_g.nodes[node]["catchment_area"] = df.loc[node, "upa"] - df.loc[predecessors, "upa"].sum()
        
        if not simplified_g.nodes[node]["is_leaf"]:
            pathes = [nx.shortest_path(original_g, p, node)[1:] for p in predecessors]
            if check_pathes:
                assert all([len(p)==len(pathes[0]) for p in pathes])
            path = pathes[0]
        else:
            #subgraph = nx.subgraph(original_g, nx.ancestors(original_g, node) | {node})
            path = [node] #nx.dag_longest_path(subgraph)[::-1]
            
        simplified_g.nodes[node]["river_channel"] = path
        simplified_g.nodes[node]["out_node"] = path[-1]
        simplified_g.nodes[node]["channel_length"]=len(simplified_g.nodes[node]["river_channel"])
        
        # Key Point annotation
        simplified_g.nodes[node]["is_key_point"]=False
        simplified_g.nodes[node]["is_dam"]=False
        simplified_g.nodes[node]["is_gauge"]=False

    for node in kp_df.index:
        simplified_g.nodes[node]["is_key_point"] = True
        simplified_g.nodes[node]["is_dam"] = kp_df.loc[node, "is_dam"]
        simplified_g.nodes[node]["is_gauge"] = kp_df.loc[node, "is_gauge"]
        color = (simplified_g.nodes[node]["is_dam"], simplified_g.nodes[node]["is_gauge"])
        simplified_g.nodes[node]["color"]=NODE_COLOR[color]

    return simplified_g

In [None]:
g, basin, catchments = segment_river_network(upa_ds, fdir_ds, kp_df,
                                             river_upa_thr=10, 
                                             split_max_area=30, 
                                             split_max_chanel_length=3,
                                             simplify_min_area=1, 
                                             simplify_min_channel_length=20)
#dump_graph("MERIT_10_base", g, basin, catchments, kp_df)

  0%|          | 0/2804309 [00:00<?, ?it/s]

  0%|          | 0/49786 [00:00<?, ?it/s]

In [None]:
0

In [9]:
g, basin, catchments = segment_river_network(upa_ds, fdir_ds, kp_df,
                                             river_upa_thr=20, 
                                             split_max_area=40, 
                                             split_max_chanel_length=3,
                                             simplify_min_area=1, 
                                             simplify_min_channel_length=20)
dump_graph("MERIT_20_base", g, basin, catchments, kp_df)

  0%|          | 0/703301 [00:00<?, ?it/s]

  0%|          | 0/11472 [00:00<?, ?it/s]

  0%|          | 0/2963 [00:00<?, ?it/s]

  0%|          | 0/14435 [00:00<?, ?it/s]

  0%|          | 0/1530 [00:00<?, ?it/s]

  0%|          | 0/15965 [00:00<?, ?it/s]

  0%|          | 0/342 [00:00<?, ?it/s]

  0%|          | 0/16307 [00:00<?, ?it/s]

  0%|          | 0/54 [00:00<?, ?it/s]

  0%|          | 0/16361 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/16369 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/16371 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/16372 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/16373 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/16374 [00:00<?, ?it/s]

  0%|          | 0/569 [00:00<?, ?it/s]

  0%|          | 0/15873 [00:00<?, ?it/s]


    338 chanels are below the minimum size threshold. Among those:
    - 68 are output nodes that can not be merged into successors
    - 263 are key points
    - 7 are small area but long chanels
    
    


  0%|          | 0/15873 [00:00<?, ?it/s]

  0%|          | 0/15873 [00:00<?, ?it/s]

In [10]:
set([type(x) for x in catchments.values()])

{shapely.geometry.polygon.Polygon}

### Extract JFlow graphs

Here we should do a loop on different graph segmentation configurations

In [6]:
#%time fdir_j = load_topo("dir", "JFlow")
#%time upa_j = load_topo("upa", "JFlow")
upa_ds, fdir_ds = upa_j, fdir_j
kp_df = load_and_map_kp(upa_j)
kp_df = kp_df[~kp_df.index.duplicated()]

855
855
855
1842
1842
1842


  return GeometryArray(data, crs=_get_common_crs(to_concat))


In [None]:
g, kp, basin, catchments = segment_river_network(upa_ds, fdir_ds, kp_df,
                                             river_upa_thr=5, 
                                             split_max_area=15, 
                                             split_max_chanel_length=3,
                                             simplify_min_area=1, 
                                             simplify_min_channel_length=20)
dump_graph("JFlow_5_v1", g, basin, catchments, kp)

In [None]:
g, kp, basin, catchments = segment_river_network(upa_ds, fdir_ds, kp_df,
                                             river_upa_thr=10, 
                                             split_max_area=30, 
                                             split_max_chanel_length=3,
                                             simplify_min_area=2, 
                                             simplify_min_channel_length=40)
dump_graph("JFlow_10_v1", g, basin, catchments, kp)

  0%|          | 0/3133851 [00:00<?, ?it/s]

  0%|          | 0/21999 [00:00<?, ?it/s]

IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out


In [12]:
g, kp, basin, catchments = segment_river_network(upa_ds, fdir_ds, kp_df,
                                             river_upa_thr=20, 
                                             split_max_area=40, 
                                             split_max_chanel_length=3,
                                             simplify_min_area=3, 
                                             simplify_min_channel_length=60)
dump_graph("JFlow_20_v1", g, basin, catchments, kp)

  0%|          | 0/2200297 [00:00<?, ?it/s]

  0%|          | 0/12279 [00:00<?, ?it/s]

IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out


  0%|          | 0/2861 [00:00<?, ?it/s]

  0%|          | 0/15140 [00:00<?, ?it/s]

IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out


  0%|          | 0/1421 [00:00<?, ?it/s]

  0%|          | 0/16561 [00:00<?, ?it/s]

  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/16913 [00:00<?, ?it/s]

  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/16962 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/16967 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/16080 [00:00<?, ?it/s]


    541 chanels are below the minimum size threshold. Among those:
    - 77 are output nodes that can not be merged into successors
    - 397 are key points
    - 227 are small area but long chanels
    
    


  0%|          | 0/16080 [00:00<?, ?it/s]

  0%|          | 0/16080 [00:00<?, ?it/s]

In [7]:
g, kp, basin, catchments = segment_river_network(upa_ds, fdir_ds, kp_df,
                                             river_upa_thr=40, 
                                             split_max_area=80, 
                                             split_max_chanel_length=3,
                                             simplify_min_area=5, 
                                             simplify_min_channel_length=60)
dump_graph("JFlow_40_v1", g, basin, catchments, kp)

IOStream.flush timed out


  0%|          | 0/1553200 [00:00<?, ?it/s]

  0%|          | 0/7541 [00:00<?, ?it/s]

IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out


  0%|          | 0/1198 [00:00<?, ?it/s]

  0%|          | 0/8739 [00:00<?, ?it/s]

IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out


  0%|          | 0/531 [00:00<?, ?it/s]

  0%|          | 0/9270 [00:00<?, ?it/s]

  0%|          | 0/113 [00:00<?, ?it/s]

  0%|          | 0/9383 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/9391 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9392 [00:00<?, ?it/s]

IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out


  0%|          | 0/570 [00:00<?, ?it/s]

  0%|          | 0/8893 [00:00<?, ?it/s]


    383 chanels are below the minimum size threshold. Among those:
    - 51 are output nodes that can not be merged into successors
    - 291 are key points
    - 160 are small area but long chanels
    
    


  0%|          | 0/8893 [00:00<?, ?it/s]

  0%|          | 0/8893 [00:00<?, ?it/s]

### MERIT

- Maybe adjust the chanel length parameters based on scale?

In [None]:
#upa_ds, fdir_ds = upa_m, fdir_m
#kp_df = load_and_map_kp(upa_ds, name="merit_new")
#kp_df = kp_df[~kp_df.index.duplicated()]

In [None]:
g, basin, catchments = segment_river_network(upa_ds, fdir_ds, kp_df,
                                             river_upa_thr=5, 
                                             split_max_area=15, 
                                             split_max_chanel_length=3,
                                             simplify_min_area=1, 
                                             simplify_min_channel_length=20//3)
dump_graph("MERIT_5_latest", g, basin, catchments, kp_df)

In [None]:
g, basin, catchments = segment_river_network(upa_ds, fdir_ds, kp_df,
                                             river_upa_thr=10, 
                                             split_max_area=30, 
                                             split_max_chanel_length=3,
                                             simplify_min_area=2, 
                                             simplify_min_channel_length=40//3)
dump_graph("MERIT_10_latest", g, basin, catchments, kp_df)

In [None]:
g, basin, catchments = segment_river_network(upa_ds, fdir_ds, kp_df,
                                             river_upa_thr=20, 
                                             split_max_area=40, 
                                             split_max_chanel_length=3,
                                             simplify_min_area=3, 
                                             simplify_min_channel_length=60//3)
dump_graph("MERIT_20_latest", g, basin, catchments, kp_df)

In [None]:
g, basin, catchments = segment_river_network(upa_ds, fdir_ds, kp_df,
                                             river_upa_thr=40, 
                                             split_max_area=80, 
                                             split_max_chanel_length=3,
                                             simplify_min_area=5, 
                                             simplify_min_channel_length=60//3)
dump_graph("MERIT_40_latest", g, basin, catchments, kp_df)