In [1]:
# standard imports
from pathlib import Path
from ruamel.yaml import YAML
from os import path
import pickle as pkl
import pandas as pd
from itertools import chain
import numpy as np
import spotpy

In [2]:
# CAMELS imports
import model.camels_utilities as camels
from optimizer.optimizer import spotpy_setup

In [3]:
# load config files
run_dir = Path('/home/gsnearing/projects/lstm_based_hydrology/extreme_year_runs/')
config_files = list(run_dir.glob('**/config.yml'))
print(f'There are {len(config_files)} experiments.')

There are 192 experiments.


In [4]:
# load all config files for these experiments\n",
configs = []
for f, file in enumerate(config_files):
    with file.open('r') as fp:
        yaml = YAML(typ="safe")
        yaml.allow_duplicate_keys = True
        cfg = yaml.load(fp)  
#         sim_file = Path(cfg['run_dir']) / 'test' / 'model_epoch040' / 'test_results.p'
#         if path.isfile(sim_file):
        configs.append(cfg)
print(f'There are {len(configs)} experiments.') 

There are 192 experiments.


In [5]:
# extract training dates
cfg = configs[0]
with open(cfg['train_dates_file'], 'rb') as f:
    train_dates = pkl.load(f)
    
basins = list(train_dates['start_dates'].keys())
obj_fun_dates = {}
for basin in basins:
    sd = train_dates['start_dates'][basin]
    ed = train_dates['end_dates'][basin]
    obj_fun_dates[basin] = pd.DataFrame(list(chain.from_iterable(pd.date_range(sdi, edi) for sdi,edi in zip(sd, ed))), columns = ('train_dates',))
    

In [6]:
# Loop through basins
basin = '01054200'

# Load data
mask_dates = obj_fun_dates[basin]['train_dates']
attributes = camels.load_basin_attributes(basin)
forcings, area = camels.load_forcings(basin)
observations = camels.load_usgs(basin, area)

In [7]:
# Set up optimizer
optimizer = spotpy_setup(forcings=forcings,
                         observations=observations['QObs'],
                         latitude=attributes['gauge_lat'],
                         elevation=attributes['elev_mean'],
                         mask_dates=mask_dates)

In [8]:
# Configure optimizer hyperparameters
sampler=spotpy.algorithms.sceua(optimizer, 
                                dbname='SCE', 
                                dbformat='ram',
                                save_sim=True) # False
max_model_runs = 13 # 1e5
sampler.sample(max_model_runs, ngs=2) #ngs=20)

Initializing the  Shuffled Complex Evolution (SCE-UA) algorithm  with  13  repetitions
The objective function will be minimized
Starting burn-in sampling...
Initialize database...
['csv', 'hdf5', 'ram', 'sql', 'custom', 'noData']
4 of 13, minimal objective function=4.36836, time remaining: 00:00:03
10 of 13, minimal objective function=4.36836, time remaining: 00:00:01
Stopping samplig. Maximum number of repetitions reached already during burn-in
*** OPTIMIZATION SEARCH TERMINATED BECAUSE THE LIMIT
ON THE MAXIMUM NUMBER OF TRIALS 
13
HAS BEEN EXCEEDED.  SEARCH WAS STOPPED AT TRIAL NUMBER:
13
OF THE INITIAL LOOP!
Burn-in sampling completed...
Starting Complex Evolution...
SEARCH WAS STOPPED AT TRIAL NUMBER: 13
NUMBER OF DISCARDED TRIALS: 0
NORMALIZED GEOMETRIC RANGE = 0.986165
THE BEST POINT HAS IMPROVED IN LAST 100 LOOPS BY 100000.000000 PERCENT

*** Final SPOTPY summary ***
Total Duration: 5.23 seconds
Total Repetitions: 13
Minimal objective value: 4.36836
Corresponding parameter setti

In [15]:
# Find best simulation
likes=results['like1']
print(likes)
best=np.nanmin(likes)
index=np.where(likes==best)[0]
index[0]

[4.36835586 5.34913189 4.62027221 6.39251344 5.12697876 5.34913189
 5.34913189 5.34913189 5.34913189 5.34913189 5.34913189 5.34913189
 5.58957517]


0

In [22]:
fields=[word for word in results.dtype.names if word.startswith('par')]
best_parms = results[fields]
best_parms

array([(1.16022359, 2.7031638 , 0.45475788, 0.06787376,  366.66624571, -0.99259693, 0.18732558, 0.17935106, 0.07122309, 0.06602367, 0.18037517, 0.23402135, 0.57095016, 0.61169763, 0.76781638, 0.61986712, 0.88341877, 0.70695202, 0.94820381,  99.97988983, 775.22891475, 633.79934864, 681.27171281, 804.03315759, 0.53861369, 0.0092441 , 0.11671909, 113.78156994, 4.92675969, 0.73539117, 1.48495111,  12.26837703),
       (4.90021502, 1.0421147 , 0.18824527, 0.39435067,  582.11933951,  1.18881644, 0.16264794, 0.18828192, 0.23197404, 0.25714655, 0.13387491, 0.4288371 , 0.51736227, 0.63289542, 0.74999124, 0.74133429, 0.85624904, 0.71906137, 0.94173451,  70.87543186, 389.753748  ,  64.56914166, 223.52508308, 437.96875444, 0.17590331, 0.00054281, 0.11982938, 166.06872175, 5.79146281, 0.70562961, 2.94853476, 144.82229658),
       (1.1754891 , 2.85612718, 0.07667825, 0.15045075, 2443.51321011,  0.0051255 , 0.15281245, 0.12271157, 0.08320361, 0.00211879, 0.14905018, 0.40561795, 0.52549614, 0.65443225

In [23]:
# Get parameters and simulation from optimized model
results = sampler.getdata()
best_parameters = spotpy.analyser.get_best_parameterset(results,maximize=False)
best_parameters_df = pd.DataFrame(best_parameters)
for key in best_parameters_df.keys():
    new_key = key.split('par')[-1]
    best_parameters_df = best_parameters_df.rename(columns={key: new_key})
best_parameters_series = best_parameters_df.transpose()[0]

Best parameter set:
scf=1.160223591074816, mfmax=2.703163800880625, mfmin=0.45475788043588267, uadj=0.06787376295383167, si=366.66624570642574, pxtemp=-0.9925969292480485, nmf=0.18732557773974004, tipm=0.17935105619552677, plwhc=0.07122308970936511, daygm=0.06602366895908524, adc2=0.1803751734056389, adc3=0.23402134917417977, adc4=0.5709501581770325, adc5=0.6116976333048607, adc6=0.7678163800080602, adc7=0.6198671244724203, adc8=0.8834187674189942, adc9=0.7069520164393733, adc10=0.948203809019917, uztwm=99.97988983301963, uzfwm=775.228914750272, lztwm=633.7993486411198, lzfpm=681.2717128070358, lzfsm=804.0331575867449, uzk=0.5386136929528124, lzpk=0.009244100774687278, lzsk=0.1167190926539842, zperc=113.78156994360594, rexp=4.926759687358452, pfree=0.7353911733087057, unit_shape=1.4849511141297809, unit_scale=12.268377030191376


In [24]:
# Find best simulation
fields=[word for word in results.dtype.names if word.startswith('sim')]
sim_results = results[fields]
sim_results_df = pd.DataFrame(sim_results)
sim_results_series_values = sim_results_df.iloc[index[0]].values
sim_results_series = pd.Series(data=sim_results_series_values, index=forcings.index)
sim_results_series

Date
1980-01-01    2.339503
1980-01-02    4.100363
1980-01-03    5.534122
1980-01-04    6.798855
1980-01-05    7.905566
                ...   
2014-12-27    3.850482
2014-12-28    3.905621
2014-12-29    3.949840
2014-12-30    3.977303
2014-12-31    3.992227
Length: 12784, dtype: float64

In [25]:
# parameters = camels.load_sacsma_parameters(basin)
parms = best_parameters_series
parm_vector = parms.loc[optimizer.optimized_parameter_names].values
sim = optimizer.simulation(parm_vector)
sim

Date
1980-01-01    2.339503
1980-01-02    4.100363
1980-01-03    5.534122
1980-01-04    6.798855
1980-01-05    7.905566
                ...   
2014-12-27    3.850482
2014-12-28    3.905621
2014-12-29    3.949840
2014-12-30    3.977303
2014-12-31    3.992227
Length: 12784, dtype: float32

In [26]:
rmse = optimizer.objectivefunction(sim_results_series, optimizer.evaluation())
rmse

4.368355861975946

In [27]:
rmse = optimizer.objectivefunction(sim, optimizer.evaluation())
rmse

4.368355861975946