In [1]:
%load_ext autoreload
%autoreload 2
%load_ext tensorboard

In [2]:
import glob
import gc

import torch
import xarray as xr
from neuralhydrology.nh_run import start_run, eval_run
from scripts.file_manipulator import file_rewriter

from pathlib import Path

import random
random.seed(42)
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')
ts_dir = Path('../geo_data/time_series')

Using device: cuda

NVIDIA GeForce RTX 3080 Ti
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


### Run for different configs


#### ERA5 precipitation

##### train

In [7]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_e5'])

if torch.cuda.is_available():
    start_run(config_file=Path("./static_configs/era5_static_qmm.yml"))


2023-04-06 08:23:15,375: Logging to runs_no_static/era5_prcp_0604_082315/output.log initialized.
2023-04-06 08:23:15,375: ### Folder structure created at runs_no_static/era5_prcp_0604_082315
2023-04-06 08:23:15,376: ### Run configurations for era5_prcp
2023-04-06 08:23:15,376: experiment_name: era5_prcp
2023-04-06 08:23:15,377: run_dir: runs_no_static/era5_prcp_0604_082315
2023-04-06 08:23:15,377: train_basin_file: openf_basins.txt
2023-04-06 08:23:15,377: validation_basin_file: openf_basins.txt
2023-04-06 08:23:15,378: test_basin_file: openf_basins.txt
2023-04-06 08:23:15,378: train_start_date: 2009-01-01 00:00:00
2023-04-06 08:23:15,379: train_end_date: 2016-12-31 00:00:00
2023-04-06 08:23:15,379: validation_start_date: 2017-01-01 00:00:00
2023-04-06 08:23:15,380: validation_end_date: 2018-12-31 00:00:00
2023-04-06 08:23:15,380: test_start_date: 2019-01-01 00:00:00
2023-04-06 08:23:15,380: test_end_date: 2020-12-31 00:00:00
2023-04-06 08:23:15,381: per_basin_train_periods_file: None


  per_basin_target_stds = torch.tensor([np.nanstd(obs, axis=1)], dtype=torch.float32)


100%|██████████| 1106/1106 [00:00<00:00, 1710.54it/s]
2023-04-06 08:23:39,581: Create lookup table and convert to pytorch tensor
100%|██████████| 1106/1106 [00:14<00:00, 77.26it/s] 
# Epoch 1: 100%|██████████| 12624/12624 [05:36<00:00, 37.46it/s, Loss: 0.0001]
2023-04-06 08:29:33,395: Epoch 1 average loss: 0.0012518834520092812
# Epoch 2: 100%|██████████| 12624/12624 [05:49<00:00, 36.13it/s, Loss: 0.0001]
2023-04-06 08:35:22,768: Epoch 2 average loss: 9.055587600550604e-05
# Epoch 3: 100%|██████████| 12624/12624 [05:15<00:00, 40.05it/s, Loss: 0.0001]
2023-04-06 08:40:37,991: Epoch 3 average loss: 7.986388505711523e-05
# Validation: 100%|██████████| 1106/1106 [01:08<00:00, 16.04it/s]
2023-04-06 08:41:47,100: Stored results at runs_no_static/era5_prcp_0604_082315/validation/model_epoch003/validation_results.p
2023-04-06 08:41:47,108: Epoch 3 average validation loss: 0.00011 -- Median validation metrics: NSE: 0.41183, KGE: 0.42898
# Epoch 4: 100%|██████████| 12624/12624 [05:19<00:00, 39.5

##### test

In [None]:
run_dir = Path("./runs_q_mm/era5_prcp_0404_074943/")
eval_run(run_dir=run_dir, period="test")

#### ERA5-Land precipitation

##### train

In [8]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])
if torch.cuda.is_available():
    start_run(config_file=Path("./no_static_configs/era5Land_qmm.yml"))

2023-04-06 11:13:47,918: Logging to runs_no_static/era5Land_prcp_0604_111347/output.log initialized.
2023-04-06 11:13:47,919: ### Folder structure created at runs_no_static/era5Land_prcp_0604_111347
2023-04-06 11:13:47,919: ### Run configurations for era5Land_prcp
2023-04-06 11:13:47,920: experiment_name: era5Land_prcp
2023-04-06 11:13:47,920: run_dir: runs_no_static/era5Land_prcp_0604_111347
2023-04-06 11:13:47,921: train_basin_file: openf_basins.txt
2023-04-06 11:13:47,921: validation_basin_file: openf_basins.txt
2023-04-06 11:13:47,921: test_basin_file: openf_basins.txt
2023-04-06 11:13:47,922: train_start_date: 2009-01-01 00:00:00
2023-04-06 11:13:47,922: train_end_date: 2016-12-31 00:00:00
2023-04-06 11:13:47,922: validation_start_date: 2017-01-01 00:00:00
2023-04-06 11:13:47,923: validation_end_date: 2018-12-31 00:00:00
2023-04-06 11:13:47,923: test_start_date: 2019-01-01 00:00:00
2023-04-06 11:13:47,923: test_end_date: 2020-12-31 00:00:00
2023-04-06 11:13:47,924: per_basin_train

##### test

In [None]:
run_dir = Path("./runs_q_cms/era5_land_prcp_2803_125025")
eval_run(run_dir=run_dir, period="test")

#### GPCP precipitation

##### train

In [9]:
# gpcp
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_gpcp'])
if torch.cuda.is_available():
    start_run(config_file=Path("./no_static_configs/gpcp_qmm.yml"))
gc.collect()

2023-04-06 13:57:41,483: Logging to runs_no_static/gpcp_qmm_0604_135741/output.log initialized.
2023-04-06 13:57:41,484: ### Folder structure created at runs_no_static/gpcp_qmm_0604_135741
2023-04-06 13:57:41,484: ### Run configurations for gpcp_qmm
2023-04-06 13:57:41,485: experiment_name: gpcp_qmm
2023-04-06 13:57:41,485: run_dir: runs_no_static/gpcp_qmm_0604_135741
2023-04-06 13:57:41,486: train_basin_file: openf_basins.txt
2023-04-06 13:57:41,486: validation_basin_file: openf_basins.txt
2023-04-06 13:57:41,486: test_basin_file: openf_basins.txt
2023-04-06 13:57:41,487: train_start_date: 2009-01-01 00:00:00
2023-04-06 13:57:41,487: train_end_date: 2016-12-31 00:00:00
2023-04-06 13:57:41,488: validation_start_date: 2017-01-01 00:00:00
2023-04-06 13:57:41,488: validation_end_date: 2018-12-31 00:00:00
2023-04-06 13:57:41,489: test_start_date: 2019-01-01 00:00:00
2023-04-06 13:57:41,489: test_end_date: 2020-12-31 00:00:00
2023-04-06 13:57:41,490: per_basin_train_periods_file: None
2023-

0

##### test

In [6]:
run_dir = Path("./runs_q_cms/gpcp_prcp_3003_063422")
eval_run(run_dir=run_dir, period="test")

# Evaluation: 100%|██████████| 1136/1136 [02:14<00:00,  8.43it/s]


#### IMERG precipitation

##### train

In [10]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_imerg'])
if torch.cuda.is_available():
    start_run(config_file=Path("./no_static_configs/imerg_qmm.yml"))
gc.collect()


2023-04-06 16:41:49,717: Logging to runs_no_static/imerg_qmm_0604_164149/output.log initialized.
2023-04-06 16:41:49,717: ### Folder structure created at runs_no_static/imerg_qmm_0604_164149
2023-04-06 16:41:49,718: ### Run configurations for imerg_qmm
2023-04-06 16:41:49,718: experiment_name: imerg_qmm
2023-04-06 16:41:49,719: run_dir: runs_no_static/imerg_qmm_0604_164149
2023-04-06 16:41:49,719: train_basin_file: openf_basins.txt
2023-04-06 16:41:49,720: validation_basin_file: openf_basins.txt
2023-04-06 16:41:49,720: test_basin_file: openf_basins.txt
2023-04-06 16:41:49,720: train_start_date: 2009-01-01 00:00:00
2023-04-06 16:41:49,721: train_end_date: 2016-12-31 00:00:00
2023-04-06 16:41:49,721: validation_start_date: 2017-01-01 00:00:00
2023-04-06 16:41:49,722: validation_end_date: 2018-12-31 00:00:00
2023-04-06 16:41:49,722: test_start_date: 2019-01-01 00:00:00
2023-04-06 16:41:49,723: test_end_date: 2020-12-31 00:00:00
2023-04-06 16:41:49,723: per_basin_train_periods_file: None


28

##### test

In [None]:
run_dir = Path("./runs_q_cms/imerg_prcp_3003_135405")
eval_run(run_dir=run_dir, period="test")

#### MSWEP precipitation

##### train

In [13]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_mswep'])
if torch.cuda.is_available():
    start_run(config_file=Path("./no_static_configs/mswep_qmm.yml"))
gc.collect()

2023-04-07 11:35:47,292: Logging to runs_no_static/mswep_qmm_0704_113547/output.log initialized.
2023-04-07 11:35:47,292: ### Folder structure created at runs_no_static/mswep_qmm_0704_113547
2023-04-07 11:35:47,292: ### Run configurations for mswep_qmm
2023-04-07 11:35:47,293: experiment_name: mswep_qmm
2023-04-07 11:35:47,293: run_dir: runs_no_static/mswep_qmm_0704_113547
2023-04-07 11:35:47,293: train_basin_file: openf_basins.txt
2023-04-07 11:35:47,294: validation_basin_file: openf_basins.txt
2023-04-07 11:35:47,294: test_basin_file: openf_basins.txt
2023-04-07 11:35:47,294: train_start_date: 2009-01-01 00:00:00
2023-04-07 11:35:47,295: train_end_date: 2016-12-31 00:00:00
2023-04-07 11:35:47,295: validation_start_date: 2017-01-01 00:00:00
2023-04-07 11:35:47,295: validation_end_date: 2018-12-31 00:00:00
2023-04-07 11:35:47,295: test_start_date: 2019-01-01 00:00:00
2023-04-07 11:35:47,296: test_end_date: 2020-12-31 00:00:00
2023-04-07 11:35:47,297: per_basin_train_periods_file: None


  per_basin_target_stds = torch.tensor([np.nanstd(obs, axis=1)], dtype=torch.float32)


100%|██████████| 1106/1106 [00:00<00:00, 1708.92it/s]
2023-04-07 11:36:11,540: Create lookup table and convert to pytorch tensor
100%|██████████| 1106/1106 [00:13<00:00, 79.16it/s] 


##### test

In [8]:
run_dir = Path("./runs_q_cms/mswep_prcp_3103_073833")
eval_run(run_dir=run_dir, period="test")

# Evaluation: 100%|██████████| 1136/1136 [02:14<00:00,  8.46it/s]


#### Level

In [None]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_h/*.nc'),
              ts_dir=ts_dir,
              hydro_target='lvl_sm',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/e5l_sm.yml"))
gc.collect()

##### test

### Blind forecast

In [None]:
import geopandas as gpd

partial_gauges = gpd.read_file(
    '../geo_data/great_db/geometry/gauges_partial_q.gpkg')

with open('./basins_test.txt', 'w') as the_file:
    for gauge_name in partial_gauges['gauge_id']:
        the_file.write(f'{int(gauge_name)}\n')

2023-04-05 06:22:00,239: PROJ: internal_proj_identify: /opt/conda/share/proj/proj.db contains DATABASE.LAYOUT.VERSION.MINOR = 0 whereas a number >= 2 is expected. It comes from another PROJ installation.


In [None]:
q_pathes = glob.glob('../geo_data/great_db/nc_concat/*.nc')
for file in q_pathes:
    gauge_id = file.split('/')[-1][:-3]
    if gauge_id in list(partial_gauges['gauge_id']):
        ds = xr.open_dataset(file)
        filename = file.split('/')[-1]
        try:
            ds = ds.drop('gauge_id')
            ds.to_netcdf(f'{ts_dir}/{filename}')
        except ValueError:
            continue

file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])

In [None]:
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/e5l_mbs.yml"))
gc.collect()

### Bad regions