# Imports & Paths

In [1]:
import glob
import numpy as np
import os
import pathlib
import re
import shutil
from typing import Any, Mapping, Sequence

In [2]:
# Path to working directory.
working_dir = pathlib.Path('.')

# Paths to run directories.
run_dir = working_dir / 'runs/'
hypertuning_dir = run_dir / 'pub' / 'hyper_tuning'

if os.path.isdir(hypertuning_dir):
    shutil.rmtree(hypertuning_dir)
os.mkdir(hypertuning_dir)

# Experiment Parameters

In [7]:
# This is the hypersweep. This must be modified by hand.
assimilation_windows = [1, 3, 5, 20]
epochs = [5, 10, 100, 1000]
histories = [1, 5, 20]
learning_rates = [1e-4, 1e-3, 1e-2, 5e-2, 1e-1]
learning_rate_drop_factors = [0.1, 0.5, 0.9]
asssimilation_targets_lists = [
    ['c_n'], 
    # ['h_n'], 
    # ['x_d'], 
    # ['x_s'], 
    # ['c_n', 'h_n'], 
    # ['c_n', 'x_d'], 
    # ['x_d', 'x_s'], 
    # ['c_n', 'h_n', 'x_d', 'x_s']
]

In [8]:
# Default DA hyperparamters. Mostly not necessary, but some are used.
assimilation_config = {
    'assimilation_lead_time': 0,
    'assimilation_targets': ['c_n'],
    'assimilation_window': 5,
    'epochs': 1000,
    'history': 20,
    'learning_rate': 0.05,
    'learning_rate_drop_factor': 0.1,
    'learning_rate_epoch_drop': 1001,
    'loss': 'MSE',
    'model_dropout': 0.,
    'optimizer': 'Adam',
    'predict_last_n': 1,
    'regularization': [],
    'seq_length': 365,
    'target_variables': ['QObs(mm/d)'],05
}

## Create Assimilation Test Directories

In [9]:
def create_assim_dir(
    sim_dir: pathlib.Path, 
    assim_dir: pathlib.Path, 
    assimilation_config: Mapping[str, Any],
    epoch: int,
    history: int,
    drop_factor: float,
    learning_rate: float,
    window: int,
    targets: Sequence[str],
):

    # Copy if the directory does not already exist.
    if os.path.isdir(assim_dir):
        shutil.rmtree(assim_dir)
    shutil.copytree(sim_dir, assim_dir)
    print(f'Finished copying {assim_dir}')
    
    # Read the config file to modify it.
    config_file = f'{assim_dir}/config.yml'
    with open(config_file, "r") as file:  
        yaml_file_data = file.read()  

    yaml_file_data = yaml_file_data + f'assimilation_config:\n'
    yaml_file_data = yaml_file_data + f'  assimilation_lead_time: 1\n'
    yaml_file_data = yaml_file_data + f'  assimilation_targets:\n'
    for target in targets:
        yaml_file_data = yaml_file_data + f'  - {target}\n'
    yaml_file_data = yaml_file_data + f'  assimilation_window: {window}\n'
    yaml_file_data = yaml_file_data + f'  epochs: {epoch}\n'
    yaml_file_data = yaml_file_data + f'  history: {history}\n'
    yaml_file_data = yaml_file_data + f'  learning_rate: {learning_rate}\n'
    yaml_file_data = yaml_file_data + f'  learning_rate_drop_factor: {drop_factor}\n'
    yaml_file_data = yaml_file_data + f'  learning_rate_epoch_drop: {assimilation_config["learning_rate_epoch_drop"]}\n'
    yaml_file_data = yaml_file_data + f'  loss: {assimilation_config["loss"]}\n'
    yaml_file_data = yaml_file_data + f'  model_dropout: {assimilation_config["model_dropout"]}\n'
    yaml_file_data = yaml_file_data + f'  optimizer: {assimilation_config["optimizer"]}\n'
    yaml_file_data = yaml_file_data + f'  predict_last_n: {assimilation_config["predict_last_n"]}\n'
    yaml_file_data = yaml_file_data + f'  regularization: {assimilation_config["regularization"]}\n'
    yaml_file_data = yaml_file_data + f'  seq_length: {assimilation_config["seq_length"]}\n'
    yaml_file_data = yaml_file_data + f'  target_variables:\n'
    yaml_file_data = yaml_file_data + f'  - QObs(mm/d)\n'

    # Save the modified config file.
    with open(config_file, "w") as file:  
        file.write(yaml_file_data)

In [13]:
# Copy trained simulation directory into assimilation directories.
specific_sim_dir = './runs/pub/simulation/pub_simulation_kfold_0_seed_0_1304_022120'

count = 0
for epoch in epochs:
    for history in histories:
        for drop in learning_rate_drop_factors:
            for window in assimilation_windows:
                for rate in learning_rates:
                    for t, targets in enumerate(asssimilation_targets_lists):
                        if window * history > 300:
                            continue
                        count += 1
                        specific_assim_dir = hypertuning_dir  / f'window_{window}_epoch_{epoch}_history_{history}_drop_{drop}_rate_{rate}_targets_{t}'
                        create_assim_dir(
                            sim_dir=specific_sim_dir, 
                            assim_dir=specific_assim_dir,
                            epoch = epoch,
                            history=history,
                            drop_factor=drop,
                            learning_rate=rate,
                            targets=targets,
                            window=window,
                            assimilation_config=assimilation_config,
                        )

print(f'There are {count} runs in the hyper sweep.')

There are 304 runs in the hyper sweep.
This will cost 38.0 GPU cyles and take approximately 12.666666666666666 hours.


In [None]:
# Quick & very conservative estimate of runtime.
# On my machine (Tesla V100), it takes about 20 minutes to run one experiment with 1000 epochs.
# Fewer epochs will make experiments faster. 
n_gpus = 8
hours_per_gpu = 1/3
print(f'This will cost {count/n_gpus} GPU cyles and take approximately {count*hours_per_gpu/n_gpus} hours.')