In [22]:
from pathlib import Path
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import torch

import sys
sys.path.insert(0, "./DiffRoute")
sys.path.insert(0, "./DiffHydro")

from diffhydro import LTIStagedRouter, StagedCatchmentInterpolator, TimeSeriesThDF
from diffhydro.utils import Timer, nse_fn
from diffroute.io import read_rapid_graph, read_multiple_rapid_graphs

sync = torch.cuda.synchronize
%load_ext line_profiler

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [23]:
# graph partitioning
plength_thr = 10**4
node_thr = 10**4

# Routing model params
irf_fn = "muskingum"
sampling_mode = "avg"
max_delay = 32
dt = 1/24
block_size = 16
cascade = 1
block_f = 128

# Experiment variables
device = "cuda:7"
vpu = "305"

In [4]:
root = Path("/data_prediction005/SYSTEM/prediction002/home/tristan/data/geoflow/")

discharge_path = root / "retro_feather"
vpu_config_path = root / 'data' / 'configs'
runoff_path = root / "data" / "daily_sparse_runoff.feather"
interp_weight_path = root / "data" / "interp_weight.pkl"

In [5]:
all_runoff = pd.read_feather(runoff_path).loc[:"2019"]  / (3600. * 24) # Convert in m3 / s
all_interp_df = pd.read_pickle(interp_weight_path).set_index("river_id")

CPU times: user 57.5 s, sys: 2min 9s, total: 3min 7s
Wall time: 33.2 s
CPU times: user 196 ms, sys: 1.01 s, total: 1.21 s
Wall time: 1.58 s


### Evaluation per VPU against GEOGLOWS

In [18]:
q = pd.read_feather(discharge_path / f"{vpu}.feather")
lbl = TimeSeriesThDF.from_pandas(q).to(device)
g = read_rapid_graph(vpu_config_path / vpu, 
                     plength_thr=plength_thr, 
                     node_thr=node_thr).to(device)

interp_df = all_interp_df.loc[g.nodes]
pix_idxs = interp_df["pixel_idx"].unique()
runoff = TimeSeriesThDF.from_pandas(all_runoff[pix_idxs]).to(device)

cat = StagedCatchmentInterpolator(g, runoff, interp_df).to(device)
model = LTIStagedRouter(
            max_delay=max_delay,
            block_size=block_size,
            block_f=block_f,
            dt=dt,
            cascade=cascade,
            sampling_mode=sampling_mode
        ).to(device)

#### Upstream stats computations ... ####


Computing breakpoints:   0%|          | 0/15562 [00:00<?, ?it/s]

#### Segmentation into subgraphs ... ####
Removing edges...


  0%|          | 0/15562 [00:00<?, ?it/s]

Segment graph into connected components....
Build subgraphs for each cluster and node-cluster map...


  0%|          | 0/207 [00:00<?, ?it/s]

Establish dependencies between clusters...


  0%|          | 0/153 [00:00<?, ?it/s]

#### Grouping subgraphs to cluster and infering dependencies ... ####
Initialize dependencies...
Associate clusters for remaining subgraphs...


0it [00:00, ?it/s]

Merging graphs...


  0%|          | 0/19 [00:00<?, ?it/s]

Computing merged graphs node idxs...


  0%|          | 0/19 [00:00<?, ?it/s]

Match breakpoint nodes across clusters...


  0%|          | 0/19 [00:00<?, ?it/s]

  0%|          | 0/19 [00:00<?, ?it/s]

In [21]:
nses = []
runoffs = cat.yield_all_runoffs(runoff)


for output in model.route_all_clusters_yield(runoffs, g, 
                                             display_progress=True):
    y = lbl[output.columns]
    nse = nse_fn(output.values, y.values)
    nses.append(pd.Series(nse.squeeze().cpu().numpy(), index=output.columns))

0it [00:00, ?it/s]