In [1]:
import os
import pandas as pd
import pickle as pkl
import pathlib
import shutil

# Directory Structure

In [2]:
# Path to working directory.
working_dir = pathlib.Path('.')

# Other useful directories.
config_dir = working_dir / 'configs'

# Template config file. This must be in working directory to avoid being deleted.
template_config = working_dir / 'template_config.yml'

# Basin list files.
basin_list_dir = working_dir / 'basin_lists'
full_basin_list_file = basin_list_dir / 'caravan_basins.txt'

# Train/test split files.
train_periods_file = working_dir / 'basin_lists' / 'train_periods.pkl'
test_periods_file = working_dir / 'basin_lists' / 'test_periods.pkl'

In [3]:
# Ensure that the  config directory exists and starts empty.
if os.path.isdir(config_dir):
    shutil.rmtree(config_dir)
os.mkdir(config_dir)

# Global Experiment Parameters

In [4]:
# Create per-basin train/test dates files.
with open(full_basin_list_file, 'rt') as fp:
    basins = [basin.strip() for basin in fp.readlines()]
print(f'There are {len(basins)} basins.')

FileNotFoundError: [Errno 2] No such file or directory: 'basin_lists/caravan_basins.txt'

In [None]:
# Number of ensemble members. Each ensemble is initialized with an explicit seed.
n_ensemble = 1
seeds = list(range(n_ensemble))

# Number of PUB k-fold splits (from create_basin_list.ipynb notebook).
pub_n_kfold = 7

In [None]:
# List of countries in Caravan.
countries = list(set([basin.split('_')[0] for basin in basins]))
countries

['camelscl', 'camelsbr', 'lamah', 'camelsgb', 'camelsaus', 'camels']

In [None]:
train_periods = {basin: {
    'start_dates': [pd.Timestamp('1980-01-01'), pd.Timestamp('2000-01-01')],
    'end_dates': [pd.Timestamp('1989-01-01'), pd.Timestamp('2009-01-01')],
    } for basin in basins}

test_periods = {basin: {
    'start_dates': [pd.Timestamp('1990-01-01'), pd.Timestamp('2010-01-01')],
    'end_dates': [pd.Timestamp('1999-01-01'), pd.Timestamp('2020-12-31')],
    } for basin in basins}

with open(train_periods_file, 'wb') as fp:
    pkl.dump(train_periods, fp)

with open(test_periods_file, 'wb') as fp:
    pkl.dump(test_periods, fp)

# Full Model Run

In [None]:
count = 0
for ens in range(n_ensemble):

    # Count the number of files created.
    count += 1
    
    # Read the master config for simulation models.
    with open(template_config, 'r') as f:
        filedata = f.read()

    # Change the run directory.
    filedata = filedata.replace('run_dir: runs/dev', 'run_dir: runs/full_runs')
    
    # Change the random seed and experiment name.
    filedata = filedata.replace('seed:', f'seed: {seeds[ens]}')
    filedata = filedata.replace('experiment_name: template', f'experiment_name: {seeds[ens]}')
    
    # Save the resulting config file.
    new_config = config_dir / f'full_run_{seeds[ens]}.yml'
    with open(new_config, 'w') as f:
        f.write(filedata)

print(f'{count} config files were created.')

1 config files were created.


# Country-Specific Runs

In [None]:
count = 0
for ens in range(n_ensemble):
    for country in countries:

        # Count the number of files created.
        count += 1
        
        # Read the master config for simulation models.
        with open(template_config, 'r') as f:
            filedata = f.read()

        # Change the run directory.
        filedata = filedata.replace('run_dir: runs/dev', 'run_dir: runs/country_runs')
        
        # Change the random seed and experiment name.
        filedata = filedata.replace('seed:', f'seed: {seeds[ens]}')
        filedata = filedata.replace('experiment_name: template', f'experiment_name: {country}_{seeds[ens]}')
        
        # Change the random seed and experiment name.
        filedata = filedata.replace('train_basin_file: basin_lists/caravan_basins.txt', 
                                    f'train_basin_file: basin_lists/countries/caravan_{country}_basins.txt')
        
        # Save the resulting config file.
        new_config = config_dir / f'{country}_{seeds[ens]}.yml'
        with open(new_config, 'w') as f:
            f.write(filedata)

print(f'{count} config files were created.')

7 config files were created.


In [None]:
count = 0
for ens in range(n_ensemble):
    for country in countries:

        # Count the number of files created.
        count += 1
        
        # Read the master config for simulation models.
        with open(template_config, 'r') as f:
            filedata = f.read()

        # Change the run directory.
        filedata = filedata.replace('run_dir: runs/dev', 'run_dir: runs/country_runs')
        
        # Change the random seed and experiment name.
        filedata = filedata.replace('seed:', f'seed: {seeds[ens]}')
        filedata = filedata.replace('experiment_name: template', f'experiment_name: except_{country}_{seeds[ens]}')
        
        # Change the random seed and experiment name.
        filedata = filedata.replace('train_basin_file: basin_lists/caravan_basins.txt', 
                                    f'train_basin_file: basin_lists/countries/caravan_except_{country}_basins.txt')
        
        # Save the resulting config file.
        new_config = config_dir / f'except_{country}_{seeds[ens]}.yml'
        with open(new_config, 'w') as f:
            f.write(filedata)

print(f'{count} config files were created.')

7 config files were created.


# PUB Runs

In [None]:
count = 0
for ens in range(n_ensemble):
    for kfold in range(pub_n_kfold):

        # Count the number of files created.
        count += 1
        
        # Read the master config for simulation models.
        with open(template_config, 'r') as f:
            filedata = f.read()

        # Change the run directory.
        filedata = filedata.replace('run_dir: runs/dev', 'run_dir: runs/random_runs')
        
        # Change the random seed and experiment name.
        filedata = filedata.replace('seed:', f'seed: {seeds[ens]}')
        filedata = filedata.replace('experiment_name: template', f'experiment_name: pub_{kfold}_{seeds[ens]}')
        
        # Change the random seed and experiment name.
        filedata = filedata.replace('train_basin_file: basin_lists/caravan_basins.txt', 
                                    f'train_basin_file: basin_lists/pub/train_{kfold}_{ens}.txt')
                                    
        # Save the resulting config file.
        new_config = config_dir / f'pub_{kfold}_{seeds[ens]}.yml'

        with open(new_config, 'w') as f:
            f.write(filedata)

print(f'{count} config files were created.')

7 config files were created.
