In [11]:
# standard imports
from pathlib import Path
from ruamel.yaml import YAML
from os import path
import pickle as pkl
import pandas as pd
from itertools import chain
import numpy as np
import spotpy
# import multiprocessing
# from joblib import Parallel, delayed

ModuleNotFoundError: No module named 'joblib'

In [2]:
# CAMELS imports
import model.camels_utilities as camels
from optimizer.optimizer import spotpy_setup

In [3]:
# run directory
run_dir = Path('/home/gsnearing/projects/lstm_based_hydrology/extreme_year_runs/')
out_dir = Path('./results/')

In [4]:
# load config files
config_files = list(run_dir.glob('**/config.yml'))
print(f'There are {len(config_files)} experiments.')

There are 192 experiments.


In [7]:
# optimizer hypers
max_model_runs = 1e3 # 1e5
dds_trials = 1e2

In [10]:
# loop over experiments
for f, config_file in enumerate(config_files):
    
    # read config file
    with config_file.open('r') as fp:
        yaml = YAML(typ="safe")
        yaml.allow_duplicate_keys = True
        cfg = yaml.load(fp)  
    
    # extract training dates
    with open(cfg['train_dates_file'], 'rb') as f:
        train_dates = pkl.load(f)

    # list all basins in this experiment    
    basins = list(train_dates['start_dates'].keys())
    assert len(basins) == 531

    # loop over basins
    for basin in basins:

        # training dates for this basin
        sd = train_dates['start_dates'][basin]
        ed = train_dates['end_dates'][basin]
        obj_fun_dates = pd.DataFrame(list(chain.from_iterable(pd.date_range(sdi, edi) for sdi, edi in zip(sd, ed))), columns = ('train_dates',))

        # load data
        mask_dates = obj_fun_dates['train_dates']
        attributes = camels.load_basin_attributes(basin)
        forcings, area = camels.load_forcings(basin)
        observations = camels.load_usgs(basin, area)

        # set up optimizer
        optimizer = spotpy_setup(forcings=forcings,
                                 observations=observations['QObs'],
                                 latitude=attributes['gauge_lat'],
                                 elevation=attributes['elev_mean'],
                                 mask_dates=mask_dates)

        # configure optimizer hyperparameters
        sampler=spotpy.algorithms.sceua(optimizer, 
                                        dbname='SCE', 
                                        dbformat='ram',
                                        parallel='seq',
                                        save_sim=False)

#          # configure optimizer hyperparameters
#         sampler=spotpy.algorithms.dds(optimizer, 
#                                       dbname='DDS', 
#                                       dbformat='ram',
#                                       parallel='seq',
#                                       save_sim=False)

        # run it
#         sampler.sample(repetitions=int(max_model_runs), ngs=len(optimizer.optimized_parameter_names))
        sampler.sample(repetitions=int(max_model_runs), trials=int(dds_trials))

        # get best parameters
        results = sampler.getdata()
        best_parameters = spotpy.analyser.get_best_parameterset(results,maximize=False)
        best_parameters_df = pd.DataFrame(best_parameters)
        for key in best_parameters_df.keys():
            new_key = key.split('par')[-1]
            best_parameters_df = best_parameters_df.rename(columns={key: new_key})
        best_parameters_series = best_parameters_df.transpose()[0]

        # get simulation with best parameters
        parm_vector = best_parameters_series.loc[optimizer.optimized_parameter_names].values
        sim = optimizer.simulation(parm_vector)

        # save output
        outfile = out_dir / f"{str(config_file).split('/')[-2][:-10]}_{basin}.pkl"
        with open(outfile, 'wb') as f:
            pkl.dump([best_parameters_series, sim], f)


Initializing the  Dynamically Dimensioned Search (DDS) algorithm  with  1000  repetitions
The objective function will be minimized
Starting the DDS algotrithm with 1000 repetitions...
Finding best starting point for trial 1 using 5 random samples.
Initialize database...
['csv', 'hdf5', 'ram', 'sql', 'custom', 'noData']
6 of 1000, maximal objective function=3.54478, time remaining: 00:05:14
12 of 1000, maximal objective function=3.54478, time remaining: 00:05:32
18 of 1000, maximal objective function=3.54478, time remaining: 00:05:39
24 of 1000, maximal objective function=3.54478, time remaining: 00:05:40
30 of 1000, maximal objective function=3.54478, time remaining: 00:05:40
36 of 1000, maximal objective function=3.54478, time remaining: 00:05:39
42 of 1000, maximal objective function=3.54478, time remaining: 00:05:38
48 of 1000, maximal objective function=3.54478, time remaining: 00:05:37
54 of 1000, maximal objective function=3.54478, time remaining: 00:05:35
60 of 1000, maximal obj

KeyboardInterrupt: 