In [1]:
%load_ext autoreload
%autoreload 2
%load_ext tensorboard

In [2]:
import glob
import gc

import torch
import xarray as xr
from neuralhydrology.nh_run import start_run, eval_run
from scripts.file_manipulator import file_rewriter

from pathlib import Path

import random
random.seed(42)
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')
ts_dir = Path('../geo_data/time_series')

Using device: cuda

NVIDIA GeForce RTX 3080 Ti
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


### Run for different configs


#### ERA5 precipitation

##### train

In [6]:
len(glob.glob('../geo_data/great_db/nc_all_q/*.nc'))

1116

In [28]:
all([var in xr.open_dataset(glob.glob('../geo_data/great_db/nc_all_q/*.nc')[0]).variables
 for var in ['t_max_e5', 't_min_e5', 'prcp_e5']])

True

In [83]:
attrs = pd.read_csv('../geo_data/attributes/static_data.csv')
# attrs=attrs.drop('Unnamed: 0', axis=1)
attrs['gauge_id'] = attrs['gauge_id'].astype(str)
attrs = attrs.set_index('gauge_id')

for file in glob.glob('../geo_data/time_series/*.nc'):
    gauge = file.split('/')[-1][:-3]
    ds = xr.open_dataset(file)
    ds_pd = ds.to_dataframe()
    if all([var in ds.variables
            for var in ['t_max_e5', 't_min_e5', 'prcp_e5', 'q_mm_day']]):
        continue
    elif gauge in attrs.index:
        continue
    else:
        print(file)

In [10]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_e5'])


In [103]:
attrs.loc[attrs[attrs < 0].dropna(how='all').index, 'ws_area']

gauge_id
9053      302690.417
9049      291855.088
10315      10101.991
10317      14385.914
10323      33516.921
10571      17197.546
10591      25452.117
10573      24370.237
9079     1406306.389
9092     1771132.992
9803     2452604.691
11412     121610.536
11414     132451.598
11524      65615.438
11526      68702.021
72269      13178.835
72281      22345.495
72818     283948.524
7041      283757.992
7047      362042.635
7051      441594.193
7053      446837.064
8013      573491.466
8059      739093.108
8084      868251.194
8091     1038511.104
8376      787576.264
1534         331.703
70091      34944.213
70098      49280.571
70085      15727.437
70031       5512.106
5013     1645999.716
5652         167.717
6295      228883.393
19049      17160.853
19050      22524.799
3102      149465.125
3106      193194.962
78801     380757.715
10328      58166.498
70842      55984.374
70002      12880.181
70011      42682.725
70624       6715.494
70619      29753.028
77801    1393612.896
1162

In [106]:
attrs[attrs < 0].dropna(how='all').index[attrs.loc[attrs[attrs < 0].dropna(how='all').index, 'ws_area'].argmin()]

'5652'

In [107]:
attrs = attrs[['for_pc_sse', 'crp_pc_sse', 'pst_pc_sse', 'inu_pc_ult', 'ire_pc_sse', 'lka_pc_use', 'lkv_mc_usu', 'rev_mc_usu', 'gwt_cm_sav', 'prm_pc_sse', 'sgr_dk_sav', 'slp_dg_sav', 'cly_pc_sav', 'slt_pc_sav', 'snd_pc_sav', 'kar_pc_sse', 'urb_pc_sse', 'ws_area', 'ele_mt_sav']]

attrs[attrs < 0].dropna(how='all')

Unnamed: 0_level_0,for_pc_sse,crp_pc_sse,pst_pc_sse,inu_pc_ult,ire_pc_sse,lka_pc_use,lkv_mc_usu,rev_mc_usu,gwt_cm_sav,prm_pc_sse,sgr_dk_sav,slp_dg_sav,cly_pc_sav,slt_pc_sav,snd_pc_sav,kar_pc_sse,urb_pc_sse,ws_area,ele_mt_sav
gauge_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
9053,,,,,,,,,,,,,-13.082758,,,,,,
9049,,,,,,,,,,,,,-14.094808,,,,,,
10315,,,,,,,,,,,-85.31895,,,,,,,,
10317,,,,,,,,,,,-48.411137,,,,,,,,
10323,,,,,,,,,,,,,-20.282927,,-4.072448,,,,
10571,,,,,,,,,,,-32.565051,,,,,,,,
10591,,,,,,,,,,,-11.259746,,,,,,,,
10573,,,,,,,,,,,-12.547076,,,,,,,,
9079,,,,,,,,,,,,,-168.485144,-139.330761,-135.522901,,,,
9092,,,,,,,,,,,,,-130.913485,-100.640854,-99.324936,,,,


In [39]:
import pandas as pd

pd.read_csv('../geo_data/attributes/static_data.csv').isna().sum().sum()


0

In [24]:
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/era5_qmm.yml"))


2023-04-05 20:41:00,849: Logging to runs_q_mm/era5_prcp_0504_204100/output.log initialized.
2023-04-05 20:41:00,850: ### Folder structure created at runs_q_mm/era5_prcp_0504_204100
2023-04-05 20:41:00,850: ### Run configurations for era5_prcp
2023-04-05 20:41:00,850: experiment_name: era5_prcp
2023-04-05 20:41:00,851: run_dir: runs_q_mm/era5_prcp_0504_204100
2023-04-05 20:41:00,851: train_basin_file: openf_basins.txt
2023-04-05 20:41:00,851: validation_basin_file: openf_basins.txt
2023-04-05 20:41:00,852: test_basin_file: openf_basins.txt
2023-04-05 20:41:00,852: train_start_date: 2009-01-01 00:00:00
2023-04-05 20:41:00,852: train_end_date: 2016-12-31 00:00:00
2023-04-05 20:41:00,853: validation_start_date: 2017-01-01 00:00:00
2023-04-05 20:41:00,853: validation_end_date: 2018-12-31 00:00:00
2023-04-05 20:41:00,853: test_start_date: 2019-01-01 00:00:00
2023-04-05 20:41:00,854: test_end_date: 2020-12-31 00:00:00
2023-04-05 20:41:00,854: per_basin_train_periods_file: None
2023-04-05 20:4

ValueError: One or more of the specified variables cannot be found in this dataset

##### test

In [None]:
run_dir = Path("./runs_q_mm/era5_prcp_0404_074943/")
eval_run(run_dir=run_dir, period="test")

#### ERA5-Land precipitation

##### train

In [None]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_cms_s',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/era5_land_prcp_qms.yml"))

##### test

In [None]:
run_dir = Path("./runs_q_cms/era5_land_prcp_2803_125025")
eval_run(run_dir=run_dir, period="test")

#### GPCP precipitation

##### train

In [None]:
# gpcp
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_cms_s',
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_gpcp'])
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/gpcp_prcp_qms.yml"))
gc.collect()

##### test

In [6]:
run_dir = Path("./runs_q_cms/gpcp_prcp_3003_063422")
eval_run(run_dir=run_dir, period="test")

# Evaluation: 100%|██████████| 1136/1136 [02:14<00:00,  8.43it/s]


#### IMERG precipitation

##### train

In [None]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_cms_s',
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_imerg'])
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/imerg_prcp_qms.yml"))
gc.collect()

##### test

In [None]:
run_dir = Path("./runs_q_cms/imerg_prcp_3003_135405")
eval_run(run_dir=run_dir, period="test")

#### MSWEP precipitation

##### train

In [None]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_mswep'])
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/mswep_prcp_qmm.yml"))
gc.collect()

##### test

In [8]:
run_dir = Path("./runs_q_cms/mswep_prcp_3103_073833")
eval_run(run_dir=run_dir, period="test")

# Evaluation: 100%|██████████| 1136/1136 [02:14<00:00,  8.46it/s]


#### Level

In [None]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_h/*.nc'),
              ts_dir=ts_dir,
              hydro_target='lvl_sm',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/e5l_sm.yml"))
gc.collect()

##### test

### Blind forecast

In [None]:
import geopandas as gpd

partial_gauges = gpd.read_file(
    '../geo_data/great_db/geometry/gauges_partial_q.gpkg')

with open('./basins_test.txt', 'w') as the_file:
    for gauge_name in partial_gauges['gauge_id']:
        the_file.write(f'{int(gauge_name)}\n')

2023-04-05 06:22:00,239: PROJ: internal_proj_identify: /opt/conda/share/proj/proj.db contains DATABASE.LAYOUT.VERSION.MINOR = 0 whereas a number >= 2 is expected. It comes from another PROJ installation.


In [None]:
q_pathes = glob.glob('../geo_data/great_db/nc_concat/*.nc')
for file in q_pathes:
    gauge_id = file.split('/')[-1][:-3]
    if gauge_id in list(partial_gauges['gauge_id']):
        ds = xr.open_dataset(file)
        filename = file.split('/')[-1]
        try:
            ds = ds.drop('gauge_id')
            ds.to_netcdf(f'{ts_dir}/{filename}')
        except ValueError:
            continue

file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])

In [None]:
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/e5l_mbs.yml"))
gc.collect()

### Bad regions