In [1]:
%load_ext autoreload
%autoreload 2
%load_ext tensorboard

In [2]:
import glob
import gc

import torch
import xarray as xr
from neuralhydrology.nh_run import start_run, eval_run
from scripts.file_manipulator import file_rewriter

from pathlib import Path
import geopandas as gpd

import random
random.seed(42)
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')
ts_dir = Path('../geo_data/time_series')

Using device: cuda

NVIDIA GeForce RTX 3080 Ti
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


### Small and Medium

In [3]:
ws_file = gpd.read_file('../geo_data/great_db/geometry/russia_ws.gpkg')
ws_file = ws_file.set_index('gauge_id')
ws_file = ws_file[ws_file['new_area'] <= 50000]

### Run for different configs


#### ERA5 precipitation

##### train

In [6]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              area_index=ws_file.index,
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_e5'])

if torch.cuda.is_available():
    start_run(config_file=Path("./static_configs/era5_static_qmm.yml"))


2023-05-12 11:24:33,525: Logging to runs_static/era5_SmallMedium_1205_112433/output.log initialized.
2023-05-12 11:24:33,526: ### Folder structure created at runs_static/era5_SmallMedium_1205_112433
2023-05-12 11:24:33,526: ### Run configurations for era5_SmallMedium
2023-05-12 11:24:33,526: experiment_name: era5_SmallMedium
2023-05-12 11:24:33,527: run_dir: runs_static/era5_SmallMedium_1205_112433
2023-05-12 11:24:33,527: train_basin_file: openf_basins.txt
2023-05-12 11:24:33,528: validation_basin_file: openf_basins.txt
2023-05-12 11:24:33,528: test_basin_file: openf_basins.txt
2023-05-12 11:24:33,529: train_start_date: 2009-01-01 00:00:00
2023-05-12 11:24:33,529: train_end_date: 2016-12-31 00:00:00
2023-05-12 11:24:33,529: validation_start_date: 2017-01-01 00:00:00
2023-05-12 11:24:33,529: validation_end_date: 2018-12-31 00:00:00
2023-05-12 11:24:33,530: test_start_date: 2019-01-01 00:00:00
2023-05-12 11:24:33,530: test_end_date: 2020-12-31 00:00:00
2023-05-12 11:24:33,531: per_basin

##### test

In [None]:
run_dir = Path("./runs_q_mm/era5_prcp_0404_074943/")
eval_run(run_dir=run_dir, period="test")

#### ERA5-Land precipitation

##### train

In [21]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              area_index=ws_file.index,
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])


In [23]:
if torch.cuda.is_available():
    start_run(config_file=Path("./static_configs/era5L_static_qmm.yml"))

2023-05-02 09:50:21,770: Logging to runs_static/era5L_static_SmallMedium_0205_095021/output.log initialized.
2023-05-02 09:50:21,770: ### Folder structure created at runs_static/era5L_static_SmallMedium_0205_095021
2023-05-02 09:50:21,771: ### Run configurations for era5L_static_SmallMedium
2023-05-02 09:50:21,771: experiment_name: era5L_static_SmallMedium
2023-05-02 09:50:21,771: run_dir: runs_static/era5L_static_SmallMedium_0205_095021
2023-05-02 09:50:21,772: train_basin_file: openf_basins.txt
2023-05-02 09:50:21,772: validation_basin_file: openf_basins.txt
2023-05-02 09:50:21,773: test_basin_file: openf_basins.txt
2023-05-02 09:50:21,773: train_start_date: 2009-01-01 00:00:00
2023-05-02 09:50:21,774: train_end_date: 2016-12-31 00:00:00
2023-05-02 09:50:21,774: validation_start_date: 2017-01-01 00:00:00
2023-05-02 09:50:21,774: validation_end_date: 2018-12-31 00:00:00
2023-05-02 09:50:21,775: test_start_date: 2019-01-01 00:00:00
2023-05-02 09:50:21,775: test_end_date: 2020-12-31 00:

  per_basin_target_stds = torch.tensor([np.nanstd(obs, axis=1)], dtype=torch.float32)


100%|██████████| 914/914 [00:00<00:00, 1655.41it/s]
2023-05-02 09:50:41,443: Create lookup table and convert to pytorch tensor
100%|██████████| 914/914 [00:11<00:00, 76.18it/s] 
# Epoch 1: 100%|██████████| 10433/10433 [05:34<00:00, 31.21it/s, Loss: 0.0000]
2023-05-02 09:56:30,748: Epoch 1 average loss: 0.0014522365490350837
# Epoch 2: 100%|██████████| 10433/10433 [05:36<00:00, 31.05it/s, Loss: 0.0001]
2023-05-02 10:02:06,802: Epoch 2 average loss: 0.00032801439835223586
# Epoch 3: 100%|██████████| 10433/10433 [05:36<00:00, 30.99it/s, Loss: 0.0000]
2023-05-02 10:07:43,473: Epoch 3 average loss: 8.853547631314221e-05
# Validation: 100%|██████████| 914/914 [01:26<00:00, 10.54it/s]
2023-05-02 10:09:10,320: Stored results at runs_static/era5L_static_SmallMedium_0205_095021/validation/model_epoch003/validation_results.p
2023-05-02 10:09:10,326: Epoch 3 average validation loss: 0.00011 -- Median validation metrics: NSE: 0.10338, KGE: 0.03886
# Epoch 4: 100%|██████████| 10433/10433 [05:38<00:0

##### test

In [None]:
run_dir = Path("./runs_static/era5L_static_prcp_0704_170903/")
eval_run(run_dir=run_dir, period="test", epoch=21)

#### GPCP precipitation

##### train

In [4]:
# gpcp
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              area_index=ws_file.index,
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_gpcp'])
if torch.cuda.is_available():
    start_run(config_file=Path("./static_configs/gpcp_static_qmm.yml"))
gc.collect()

2023-05-12 05:47:40,397: Logging to runs_static/gpcp_SmallMedium_1205_054740/output.log initialized.
2023-05-12 05:47:40,398: ### Folder structure created at runs_static/gpcp_SmallMedium_1205_054740
2023-05-12 05:47:40,398: ### Run configurations for gpcp_SmallMedium
2023-05-12 05:47:40,398: experiment_name: gpcp_SmallMedium
2023-05-12 05:47:40,399: run_dir: runs_static/gpcp_SmallMedium_1205_054740
2023-05-12 05:47:40,399: train_basin_file: openf_basins.txt
2023-05-12 05:47:40,400: validation_basin_file: openf_basins.txt
2023-05-12 05:47:40,400: test_basin_file: openf_basins.txt
2023-05-12 05:47:40,400: train_start_date: 2009-01-01 00:00:00
2023-05-12 05:47:40,401: train_end_date: 2016-12-31 00:00:00
2023-05-12 05:47:40,401: validation_start_date: 2017-01-01 00:00:00
2023-05-12 05:47:40,401: validation_end_date: 2018-12-31 00:00:00
2023-05-12 05:47:40,402: test_start_date: 2019-01-01 00:00:00
2023-05-12 05:47:40,402: test_end_date: 2020-12-31 00:00:00
2023-05-12 05:47:40,403: per_basin

  per_basin_target_stds = torch.tensor([np.nanstd(obs, axis=1)], dtype=torch.float32)


100%|██████████| 914/914 [00:00<00:00, 1769.52it/s]
2023-05-12 05:48:00,041: Create lookup table and convert to pytorch tensor
100%|██████████| 914/914 [00:11<00:00, 78.35it/s] 
# Epoch 1: 100%|██████████| 10433/10433 [05:42<00:00, 30.47it/s, Loss: 0.0000]
2023-05-12 05:53:56,068: Epoch 1 average loss: 0.0012091803943338223
# Epoch 2: 100%|██████████| 10433/10433 [06:47<00:00, 25.63it/s, Loss: 0.0000]
2023-05-12 06:00:43,196: Epoch 2 average loss: 6.222286897125228e-05
# Epoch 3: 100%|██████████| 10433/10433 [05:58<00:00, 29.09it/s, Loss: 0.0000]
2023-05-12 06:06:41,806: Epoch 3 average loss: 4.836768616450781e-05
# Validation: 100%|██████████| 914/914 [01:32<00:00,  9.83it/s]
2023-05-12 06:08:14,880: Stored results at runs_static/gpcp_SmallMedium_1205_054740/validation/model_epoch003/validation_results.p
2023-05-12 06:08:14,888: Epoch 3 average validation loss: 0.00005 -- Median validation metrics: NSE: 0.53910, KGE: 0.50302
# Epoch 4: 100%|██████████| 10433/10433 [06:00<00:00, 28.97i

0

##### test

In [None]:
run_dir = Path("./runs_q_cms/gpcp_prcp_3003_063422")
eval_run(run_dir=run_dir, period="test")

#### IMERG precipitation

##### train

In [5]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              area_index=ws_file.index,
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_imerg'])
if torch.cuda.is_available():
    start_run(config_file=Path("./static_configs/imerg_static_qmm.yml"))
gc.collect()


2023-05-12 08:56:02,522: Logging to runs_static/imerg_SmallMedium_1205_085602/output.log initialized.
2023-05-12 08:56:02,522: ### Folder structure created at runs_static/imerg_SmallMedium_1205_085602
2023-05-12 08:56:02,523: ### Run configurations for imerg_SmallMedium
2023-05-12 08:56:02,523: experiment_name: imerg_SmallMedium
2023-05-12 08:56:02,523: run_dir: runs_static/imerg_SmallMedium_1205_085602
2023-05-12 08:56:02,524: train_basin_file: openf_basins.txt
2023-05-12 08:56:02,524: validation_basin_file: openf_basins.txt
2023-05-12 08:56:02,525: test_basin_file: openf_basins.txt
2023-05-12 08:56:02,525: train_start_date: 2009-01-01 00:00:00
2023-05-12 08:56:02,525: train_end_date: 2016-12-31 00:00:00
2023-05-12 08:56:02,526: validation_start_date: 2017-01-01 00:00:00
2023-05-12 08:56:02,526: validation_end_date: 2018-12-31 00:00:00
2023-05-12 08:56:02,527: test_start_date: 2019-01-01 00:00:00
2023-05-12 08:56:02,527: test_end_date: 2020-12-31 00:00:00
2023-05-12 08:56:02,527: per_

0

##### test

In [None]:
run_dir = Path("./runs_q_cms/imerg_prcp_3003_135405")
eval_run(run_dir=run_dir, period="test")

#### MSWEP precipitation

##### train

In [24]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              area_index=ws_file.index,
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_mswep'])
if torch.cuda.is_available():
    start_run(config_file=Path("./no_static_configs/mswep_qmm.yml"))
gc.collect()

2023-05-02 12:48:37,863: Logging to runs_no_static/mswep_qmm_SmallMedium_0205_124837/output.log initialized.
2023-05-02 12:48:37,864: ### Folder structure created at runs_no_static/mswep_qmm_SmallMedium_0205_124837
2023-05-02 12:48:37,864: ### Run configurations for mswep_qmm_SmallMedium
2023-05-02 12:48:37,865: experiment_name: mswep_qmm_SmallMedium
2023-05-02 12:48:37,865: run_dir: runs_no_static/mswep_qmm_SmallMedium_0205_124837
2023-05-02 12:48:37,866: train_basin_file: openf_basins.txt
2023-05-02 12:48:37,866: validation_basin_file: openf_basins.txt
2023-05-02 12:48:37,867: test_basin_file: openf_basins.txt
2023-05-02 12:48:37,867: train_start_date: 2009-01-01 00:00:00
2023-05-02 12:48:37,867: train_end_date: 2016-12-31 00:00:00
2023-05-02 12:48:37,868: validation_start_date: 2017-01-01 00:00:00
2023-05-02 12:48:37,868: validation_end_date: 2018-12-31 00:00:00
2023-05-02 12:48:37,869: test_start_date: 2019-01-01 00:00:00
2023-05-02 12:48:37,869: test_end_date: 2020-12-31 00:00:00


0

In [25]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              area_index=ws_file.index,
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_mswep'])
if torch.cuda.is_available():
    start_run(config_file=Path("./static_configs/mswep_static_qmm.yml"))
gc.collect()

2023-05-02 15:32:44,342: Logging to runs_static/mswep_static_SmallMedium_0205_153244/output.log initialized.
2023-05-02 15:32:44,342: ### Folder structure created at runs_static/mswep_static_SmallMedium_0205_153244
2023-05-02 15:32:44,343: ### Run configurations for mswep_static_SmallMedium
2023-05-02 15:32:44,343: experiment_name: mswep_static_SmallMedium
2023-05-02 15:32:44,343: run_dir: runs_static/mswep_static_SmallMedium_0205_153244
2023-05-02 15:32:44,343: train_basin_file: openf_basins.txt
2023-05-02 15:32:44,344: validation_basin_file: openf_basins.txt
2023-05-02 15:32:44,344: test_basin_file: openf_basins.txt
2023-05-02 15:32:44,345: train_start_date: 2009-01-01 00:00:00
2023-05-02 15:32:44,345: train_end_date: 2016-12-31 00:00:00
2023-05-02 15:32:44,345: validation_start_date: 2017-01-01 00:00:00
2023-05-02 15:32:44,346: validation_end_date: 2018-12-31 00:00:00
2023-05-02 15:32:44,346: test_start_date: 2019-01-01 00:00:00
2023-05-02 15:32:44,346: test_end_date: 2020-12-31 00:

0

##### test

In [None]:
run_dir = Path("./runs_q_cms/mswep_prcp_3103_073833")
eval_run(run_dir=run_dir, period="test")

#### Level

In [14]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_h/*.nc'),
              ts_dir=ts_dir,
              hydro_target='lvl_mbs',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])
if torch.cuda.is_available():
    start_run(config_file=Path("./static_configs/era5L_static_level_mbs.yml"))
gc.collect()

2023-04-10 06:37:06,279: Logging to runs_static/era5L_static_mbs_60_epoch_1004_063706/output.log initialized.
2023-04-10 06:37:06,279: ### Folder structure created at runs_static/era5L_static_mbs_60_epoch_1004_063706
2023-04-10 06:37:06,280: ### Run configurations for era5L_static_mbs_60_epoch
2023-04-10 06:37:06,280: experiment_name: era5L_static_mbs_60_epoch
2023-04-10 06:37:06,281: run_dir: runs_static/era5L_static_mbs_60_epoch_1004_063706
2023-04-10 06:37:06,281: train_basin_file: openf_basins.txt
2023-04-10 06:37:06,281: validation_basin_file: openf_basins.txt
2023-04-10 06:37:06,282: test_basin_file: openf_basins.txt
2023-04-10 06:37:06,282: train_start_date: 2009-01-01 00:00:00
2023-04-10 06:37:06,283: train_end_date: 2016-12-31 00:00:00
2023-04-10 06:37:06,283: validation_start_date: 2017-01-01 00:00:00
2023-04-10 06:37:06,283: validation_end_date: 2018-12-31 00:00:00
2023-04-10 06:37:06,283: test_start_date: 2019-01-01 00:00:00
2023-04-10 06:37:06,284: test_end_date: 2020-12-3

0

In [None]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_h/*.nc'),
              ts_dir=ts_dir,
              hydro_target='lvl_sm',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])
if torch.cuda.is_available():
    start_run(config_file=Path("./static_configs/era5L_static_level_lvl_relative.yml"))
gc.collect()

### Blind forecast

In [None]:
partial_gauges = gpd.read_file(
    '../geo_data/great_db/geometry/gauges_partial_q.gpkg')

In [None]:
with open('./basins_test.txt', 'w') as the_file:
    for gauge_name in partial_gauges['gauge_id']:
        the_file.write(f'{int(gauge_name)}\n')
# add train
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])
# add test
q_pathes = glob.glob('../geo_data/great_db/nc_concat/*.nc')
for file in q_pathes:
    gauge_id = file.split('/')[-1][:-3]
    if gauge_id in list(partial_gauges['gauge_id']):
        ds = xr.open_dataset(file)
        filename = file.split('/')[-1]
        try:
            ds = ds.drop('gauge_id')
            ds.to_netcdf(f'{ts_dir}/{filename}')
        except ValueError:
            continue
        

if torch.cuda.is_available():
    start_run(config_file=Path("./static_configs/era5L_static_blind.yml"))
gc.collect()

In [None]:
# add test
q_pathes = glob.glob('../geo_data/great_db/nc_concat/*.nc')
for file in q_pathes:
    gauge_id = file.split('/')[-1][:-3]
    if gauge_id in list(partial_gauges['gauge_id']):
        ds = xr.open_dataset(file)
        filename = file.split('/')[-1]
        try:
            ds = ds.drop('gauge_id')
            ds = ds.rename({'index': 'date'})
            ds.to_netcdf(f'{ts_dir}/{filename}')
        except ValueError:
            continue
# for gauge in partial_gauges['gauge_id']:
    
#     ds = xr.open_dataset(f'../geo_data/time_series/{gauge}.nc')
    
#     ds.to_netcdf(f'../geo_data/time_series/{gauge}.nc')
    
    

In [None]:
run_dir = Path("./runs_static/era5L_static_prcp_0904_152029/")
eval_run(run_dir=run_dir, period="test", epoch=24)

### Bad regions

#### Kavkaz

In [11]:
gauges = gpd.read_file(
    '../paper_visualisation/poor_gauges/kavkaz.gpkg')
# add test
q_pathes = glob.glob('../geo_data/great_db/nc_all_q/*.nc')
gauge_list = list()
for file in q_pathes:
    gauge_id = file.split('/')[-1][:-3]
    if gauge_id in list(gauges['gauge_id']):
        gauge_list.append(file)
        
# add train
file_rewriter(q_pathes=gauge_list,
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])

if torch.cuda.is_available():
    start_run(config_file=Path("./poor_perfom_configs/era5L_kavkaz_qmm.yml"))
gc.collect()

2023-04-10 06:04:54,170: Logging to runs_bad_regions/era5L_kavkaz_1004_060454/output.log initialized.
2023-04-10 06:04:54,171: ### Folder structure created at runs_bad_regions/era5L_kavkaz_1004_060454
2023-04-10 06:04:54,171: ### Run configurations for era5L_kavkaz
2023-04-10 06:04:54,171: experiment_name: era5L_kavkaz
2023-04-10 06:04:54,172: run_dir: runs_bad_regions/era5L_kavkaz_1004_060454
2023-04-10 06:04:54,172: train_basin_file: openf_basins.txt
2023-04-10 06:04:54,173: validation_basin_file: openf_basins.txt
2023-04-10 06:04:54,173: test_basin_file: openf_basins.txt
2023-04-10 06:04:54,173: train_start_date: 2009-01-01 00:00:00
2023-04-10 06:04:54,174: train_end_date: 2016-12-31 00:00:00
2023-04-10 06:04:54,174: validation_start_date: 2017-01-01 00:00:00
2023-04-10 06:04:54,174: validation_end_date: 2018-12-31 00:00:00
2023-04-10 06:04:54,175: test_start_date: 2019-01-01 00:00:00
2023-04-10 06:04:54,175: test_end_date: 2020-12-31 00:00:00
2023-04-10 06:04:54,175: per_basin_trai

  per_basin_target_stds = torch.tensor([np.nanstd(obs, axis=1)], dtype=torch.float32)


100%|██████████| 24/24 [00:01<00:00, 17.17it/s]
# Epoch 1: 100%|██████████| 274/274 [00:09<00:00, 28.06it/s, Loss: 0.0443]
2023-04-10 06:05:08,684: Epoch 1 average loss: 0.14049608622045412
# Epoch 2: 100%|██████████| 274/274 [00:09<00:00, 29.20it/s, Loss: 0.0867]
2023-04-10 06:05:18,069: Epoch 2 average loss: 0.05810246545903004
# Epoch 3: 100%|██████████| 274/274 [00:08<00:00, 30.46it/s, Loss: 0.0509]
2023-04-10 06:05:27,068: Epoch 3 average loss: 0.054747352838842536
# Validation: 100%|██████████| 24/24 [00:02<00:00,  9.32it/s]
2023-04-10 06:05:29,662: Stored results at runs_bad_regions/era5L_kavkaz_1004_060454/validation/model_epoch003/validation_results.p
2023-04-10 06:05:29,663: Epoch 3 average validation loss: 0.21241 -- Median validation metrics: NSE: -0.20049, KGE: -0.16853
# Epoch 4: 100%|██████████| 274/274 [00:09<00:00, 30.22it/s, Loss: 0.0405]
2023-04-10 06:05:38,733: Epoch 4 average loss: 0.05252450480921208
# Epoch 5: 100%|██████████| 274/274 [00:09<00:00, 28.86it/s, Los

28

In [17]:
gauges = gpd.read_file(
    '../paper_visualisation/poor_gauges/volgo_don.gpkg')
# add test
q_pathes = glob.glob('../geo_data/great_db/nc_all_q/*.nc')
gauge_list = list()
for file in q_pathes:
    gauge_id = file.split('/')[-1][:-3]
    if gauge_id in list(gauges['gauge_id']):
        gauge_list.append(file)
        
# add train
file_rewriter(q_pathes=gauge_list,
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])

### Bad gauge filter

In [3]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_e5'])

for file in glob.glob(f'{ts_dir}/*.nc'):
    df = xr.open_dataset(file).to_dataframe()
    my_plot = df[['q_mm_day']].plot();
    
    my_plot.set_title(f'{file}')

  fig = self.plt.figure(figsize=self.figsize)


#### Ural

In [12]:
gauges = gpd.read_file(
    '../paper_visualisation/poor_gauges/ural.gpkg')
# add test
q_pathes = glob.glob('../geo_data/great_db/nc_all_q/*.nc')
gauge_list = list()
for file in q_pathes:
    gauge_id = file.split('/')[-1][:-3]
    if gauge_id in list(gauges['gauge_id']):
        gauge_list.append(file)
        
# add train
file_rewriter(q_pathes=gauge_list,
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])

if torch.cuda.is_available():
    start_run(config_file=Path("./poor_perfom_configs/era5L_ural_qmm.yml"))
gc.collect()

2023-04-10 06:09:49,184: PROJ: internal_proj_identify: /opt/conda/share/proj/proj.db contains DATABASE.LAYOUT.VERSION.MINOR = 0 whereas a number >= 2 is expected. It comes from another PROJ installation.
2023-04-10 06:09:50,795: Logging to runs_bad_regions/era5L_ural_1004_060950/output.log initialized.
2023-04-10 06:09:50,796: ### Folder structure created at runs_bad_regions/era5L_ural_1004_060950
2023-04-10 06:09:50,796: ### Run configurations for era5L_ural
2023-04-10 06:09:50,796: experiment_name: era5L_ural
2023-04-10 06:09:50,796: run_dir: runs_bad_regions/era5L_ural_1004_060950
2023-04-10 06:09:50,797: train_basin_file: openf_basins.txt
2023-04-10 06:09:50,797: validation_basin_file: openf_basins.txt
2023-04-10 06:09:50,798: test_basin_file: openf_basins.txt
2023-04-10 06:09:50,798: train_start_date: 2009-01-01 00:00:00
2023-04-10 06:09:50,799: train_end_date: 2016-12-31 00:00:00
2023-04-10 06:09:50,799: validation_start_date: 2017-01-01 00:00:00
2023-04-10 06:09:50,800: validati

28

#### VolgoDon

In [13]:
gauges = gpd.read_file(
    '../paper_visualisation/poor_gauges/volgo_don.gpkg')
# add test
q_pathes = glob.glob('../geo_data/great_db/nc_all_q/*.nc')
gauge_list = list()
for file in q_pathes:
    gauge_id = file.split('/')[-1][:-3]
    if gauge_id in list(gauges['gauge_id']):
        gauge_list.append(file)
        
# add train
file_rewriter(q_pathes=gauge_list,
              ts_dir=ts_dir,
              hydro_target='q_mm_day',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])

if torch.cuda.is_available():
    start_run(config_file=Path("./poor_perfom_configs/era5L_volgoDon_qmm.yml"))
gc.collect()

2023-04-10 06:18:15,646: PROJ: internal_proj_identify: /opt/conda/share/proj/proj.db contains DATABASE.LAYOUT.VERSION.MINOR = 0 whereas a number >= 2 is expected. It comes from another PROJ installation.
2023-04-10 06:18:19,207: Logging to runs_bad_regions/era5L_volgoDon_1004_061819/output.log initialized.
2023-04-10 06:18:19,208: ### Folder structure created at runs_bad_regions/era5L_volgoDon_1004_061819
2023-04-10 06:18:19,208: ### Run configurations for era5L_volgoDon
2023-04-10 06:18:19,208: experiment_name: era5L_volgoDon
2023-04-10 06:18:19,209: run_dir: runs_bad_regions/era5L_volgoDon_1004_061819
2023-04-10 06:18:19,209: train_basin_file: openf_basins.txt
2023-04-10 06:18:19,209: validation_basin_file: openf_basins.txt
2023-04-10 06:18:19,210: test_basin_file: openf_basins.txt
2023-04-10 06:18:19,210: train_start_date: 2009-01-01 00:00:00
2023-04-10 06:18:19,210: train_end_date: 2016-12-31 00:00:00
2023-04-10 06:18:19,211: validation_start_date: 2017-01-01 00:00:00
2023-04-10 06

0