In [1]:
%load_ext autoreload
%autoreload 2
%load_ext tensorboard

In [11]:
import pandas as pd
import glob
import xarray as xr
import glob

import torch
import xarray as xr
from neuralhydrology.nh_run import start_run, eval_run
from scripts.file_manipulator import file_rewriter

from pathlib import Path

import random
random.seed(42)
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')


Using device: cuda

NVIDIA GeForce RTX 3080 Ti
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [9]:
ts_dir = Path('../geo_data/time_series')

# gpcp
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_cms_s',
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_gpcp'])

### Basin file definition
select all id's where 100% of discharge observations exists

In [10]:
basins = [file.split('/')[-1][:-3] for
          file in glob.glob('../geo_data/time_series/*.nc')]
with open('./openf_basins.txt', 'w') as the_file:
    for gauge_name in basins:
        the_file.write(f'{int(gauge_name)}\n')

In [6]:
random.shuffle(basins)
gauge_size = len(basins)

train_gauges = basins[:int(len(basins) * 0.8)]
with open('./basins_train.txt', 'w') as the_file:
    for gauge_name in train_gauges:
        the_file.write(f'{gauge_name}\n')

val_gauges = basins[int(gauge_size * 0.8)
                        :int(gauge_size * 0.8) + int(gauge_size * 0.1)]
with open('./basins_val.txt', 'w') as the_file:
    for gauge_name in val_gauges:
        the_file.write(f'{gauge_name}\n')

test_gauges = basins[int(gauge_size * 0.8) + int(gauge_size * 0.1):]
with open('./basins_test.txt', 'w') as the_file:
    for gauge_name in test_gauges:
        the_file.write(f'{gauge_name}\n')


### Run for different configs


#### ERA5 precipitation

In [None]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_cms_s',
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_e5'])
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/era5_prcp_qms.yml"))

#### ERA5-Land precipitation

In [None]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_cms_s',
              meteo_predictors=['t_max_e5l', 't_min_e5l', 'prcp_e5l'])
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/era5_land_prcp_qms.yml"))

#### GPCP precipitation

In [None]:
# gpcp
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_cms_s',
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_gpcp'])
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/gpcp_prcp_qms.yml"))

#### IMERG precipitation

In [None]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_cms_s',
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_imerg'])
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/imerg_prcp_qms.yml"))

#### MSWEP precipitation

In [None]:
file_rewriter(q_pathes=glob.glob('../geo_data/great_db/nc_all_q/*.nc'),
              ts_dir=ts_dir,
              hydro_target='q_cms_s',
              meteo_predictors=['t_max_e5', 't_min_e5', 'prcp_mswep'])
if torch.cuda.is_available():
    start_run(config_file=Path("./configs/mswep_prcp_qms.yml"))