# Evaluation run wflow_sbm calibrated

In [None]:
import os
import subprocess
import pandas as pd
import xarray as xr

from glob import glob
from pathlib import Path
from pathos.threading import ThreadPool as Pool

# Set Paths

In [None]:
# Set Paths
ROOT = Path('/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/')
MODELS = Path(f'{ROOT}/wflow/data/')
RESULTS = Path(f'{ROOT}/results/')
julia_path = '/gpfs/home6/jaerts/julia-1.7.3/bin/julia'

# Config

In [None]:
# Get available basin IDs wflow_sbm
calibration_file = f"{RESULTS}/wflow_sbm/calibration_overview_wflow.csv"
df = pd.read_csv(calibration_file, index_col='basin_id')
basin_ids = df.index.to_list()

# Set available cores
cores_available = 120

# Sort basins by size

In [None]:
# Sort by basin size
def sort_basin_ids_by_size(basin_ids):
    sizes = []
    for basin_id in basin_ids:
        size = os.path.getsize(f'{MODELS}/{basin_id}/staticmaps.nc')
        sizes.append(size)

    df = pd.DataFrame()
    df['basin_id'] = basin_ids
    df['size'] = sizes
    df = df.sort_values('size')

    basin_ids = df.basin_id.to_list()
    
    return basin_ids

basin_ids_sorted = sort_basin_ids_by_size(basin_ids)

# Model Run Functions

In [None]:
def wflow_runner_evaluation(julia_path, basin_id):
    print(f'Starting: {basin_id}')
    
    # Set config_file
    config_file = f'{MODELS}/{basin_id}/wflow_sbm_evaluation.toml'
    
    # Call wflow julia command line
    subprocess.call(
                    f'{julia_path} -e "using Wflow; Wflow.run()" {config_file}',
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT,
                    shell=True
                   )
    
    return print(f'Finished: {basin_id}')
    
def parallel_run(julia_path, basin_ids, threads=cores_available):
    
    # Set number of threads (cores) used for parallel run and map threads
    if threads is None:
        pool = Pool()
    else:
        pool = Pool(nodes=threads)
        
    # Run parallel models
    pool.map(wflow_runner_evaluation, julia_paths, basin_ids)
    return

# Check if output exists

In [None]:
df = pd.DataFrame()
basins = []
exists = []

for basin_id in basin_ids_sorted:
    basins.append(basin_id)

    # check if file exists
    sim_file = Path(f'{MODELS}/{basin_id}/evaluation/output.csv')
    if sim_file.is_file() is False:
        exists.append(False)
    else:
        df_sim = pd.read_csv(sim_file)
    
        # Check if csv containes output
        if len(df_sim) < 2000:
            exists.append(False)
        else:
            exists.append(True)
        
df['basin_id'] = basins
df['completed'] = exists
df = df.reset_index()
df = df[df['completed'] == True]

basin_ids_sorted = df.basin_id.to_list()

# Create lists and run function

In [None]:
julia_paths = [julia_path] * len(basin_ids_sorted)
parallel_run(julia_paths, basins_redo, threads=cores_available)