# Papermill prepare_forcing_wflow_sbm.ipynb

In [1]:
from glob import glob
from pathlib import Path

import numpy as np
import papermill
from concurrent.futures import ThreadPoolExecutor

# Set Paths

In [2]:
# Snellius paths
ROOT = '/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/'
AUXDATA = Path(f"{ROOT}/aux_data")
FORCING = Path(f'{ROOT}/forcing/')
MODELS = Path(f'{ROOT}/wflow/data/')
NOTEBOOKS = Path(f'{ROOT}/papermill_notebooks')

# Config

In [3]:
# Get available basin IDs wflow_sbm
basin_dirs = glob(f'{MODELS}/*')
basin_ids = [s.split('/')[-1] for s in basin_dirs]
basin_ids.sort()

# Amount of available cores
cores_available = 10

# Papermill

In [104]:
run_in_parallel=True
tasklist = []

for basin_id in basin_ids:

    input_notebook = 'prepare_forcing_wflow_sbm.ipynb'
    output_notebook = f'{NOTEBOOKS}/preprocess_wflow_{basin_id}.ipynb'

    parameters = {'basin_id': basin_id}

    if run_in_parallel:
        tasklist.append([input_notebook, output_notebook, parameters])
    else:
        papermill.execute_notebook(input_notebook, output_notebook, parameters)



In [99]:
if run_in_parallel:
# Parallel execution works by mapping a function onto a list of arguments
    def execute_task(args):
        """Run papermill with the given args"""
        input_notebook, output_notebook, parameters = args
        papermill.execute_notebook(input_notebook, output_notebook, parameters)

        
    # Create intervals based on available cores
    task_interval = np.arange(0,len(tasklist)+cores_available, cores_available, dtype=int)
    # Batch tasks based on available cores
    for i, interval in enumerate(task_interval):
        if (i + 1 < len(task_interval) and i - 1 >= 0):
            tasks = tasklist[task_interval[i]:task_interval[i+1]]
            
            # Start tasks at once and wait for them to finish
            with ThreadPoolExecutor() as executor:
                executor.map(execute_task, tasks)       