In [1]:
import pickle
from pathlib import Path
from tqdm.notebook import tqdm
import pandas as pd

import torch
from neuralhydrology.evaluation import metrics
from neuralhydrology.nh_run import start_run, eval_run
from neuralhydrology.utils.config import Config
from neuralhydrology.training.basetrainer import BaseTrainer
from neuralhydrology.nh_run import start_run, start_training, start_evaluation

import shutil

In [2]:
def _create_folder_structure(self):
    # create as subdirectory within run directory of base run
    if self.cfg.is_continue_training:
        folder_name = f"continue_training_from_epoch{self._epoch:03d}"

        # store dir of base run for easier access in weight loading
        self.cfg.base_run_dir = self.cfg.run_dir
        self.cfg.run_dir = self.cfg.run_dir / folder_name

    # create as new folder structure
    else:
        run_name = f'{self.cfg.idx}'

        # if no directory for the runs is specified, a 'runs' folder will be created in the current working dir
        if self.cfg.run_dir is None:
            self.cfg.run_dir = Path().cwd() / "runs" / run_name
        else:
            self.cfg.run_dir = self.cfg.run_dir / run_name

    # create folder + necessary subfolder
    if not self.cfg.run_dir.is_dir():
        self.cfg.train_dir = self.cfg.run_dir / "train_data"
        self.cfg.train_dir.mkdir(parents=True, exist_ok=True)
    else:
        raise RuntimeError(f"There is already a folder at {self.cfg.run_dir}")
    if self.cfg.log_n_figures is not None:
        self.cfg.img_log_dir = self.cfg.run_dir / "img_log"
        self.cfg.img_log_dir.mkdir(parents=True, exist_ok=True)


In [3]:
df = pd.read_pickle('data_details.pkl')
idxs = df[~(df['lack of data']) & (df['network exist']) & ~(df['other dams']) & (df['gauge available'])].index

BaseTrainer._create_folder_structure = _create_folder_structure

# train and validate LSTM

In [None]:
# %%capture
for idx in tqdm(idxs):
    if Path(f'./runs/{idx}/').exists():
        shutil.rmtree(Path(f'./runs/{idx}/'))
    # set config
    cfg = Config(Path('basin.yml'))
    cfg.device = f"cpu"
    cfg.idx = idx
    cfg._cfg['train_basin_file'] = Path(f'./CAMELSJP/dam_idxs/{idx}.txt')
    cfg._cfg['validation_basin_file'] = Path(f'./CAMELSJP/dam_idxs/{idx}.txt')
    cfg._cfg['test_basin_file'] = Path(f'./CAMELSJP/dam_idxs/{idx}.txt')

    # train
    start_training(cfg)

    # evaluate
    run_dir = Path(f"runs/{idx}")
    eval_run(run_dir=run_dir, period="test")