## Post-process PCR-GLOBWB output to match wflow_sbm grid

# ADJUST USING NEW DATA

In [1]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [4]:
from glob import glob
from pathlib import Path

import os
import iris
import xarray as xr
import pandas as pd
from esmvalcore.preprocessor import regrid
from pathos.threading import ThreadPool as Pool
from dask.diagnostics import ProgressBar

## Set Paths

In [5]:
# Set Paths
ROOT = Path('/gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/')
MODELS = Path(f'{ROOT}/model_parameters/')
AUXDIR = Path(f'{ROOT}/aux_data/')
RESULTS = Path(f"{ROOT}/results/streamflow_evaluation/wflow_sbm_calibration/")
OUTPUT = Path(f'{ROOT}/results/')

## Config

In [6]:
# Get available basin IDs wflow_sbm
calibration_file = f"{RESULTS}/wflow_calibration_objective_function_overview.csv"
df = pd.read_csv(calibration_file, index_col='basin_id')
basin_ids = df.index.to_list()

# Amount of available cores
cores_available = 1

## Convert saturation degree to volumetric soil moisture

In [7]:
# Multiply sat degree with porosity layer
soil_layer = f'{AUXDIR}/pcr-globwb/VMC/vmcSat_average_1_europe_30sec_v2.nc'
out_layer  = f'{MODELS}/pcr-globwb/uk_spinup_traveltime/netcdf/satDegUpp_dailyTot_output.nc'

ds_soil = xr.open_dataset(soil_layer).satVolWC1
ds_soil = ds_soil.rename({'latitude':'lat','longitude':'lon'})
ds_out = xr.open_dataset(out_layer).upper_soil_saturation_degree

ds_vmc = xr.Dataset()
ds_vmc['soil_moisture'] = ds_soil * ds_out
ds_vmc.to_netcdf(f'{MODELS}/pcr-globwb/uk_spinup_traveltime/netcdf/satDegUpp_VMC_dailyTot_output.nc')

## Post-process pcr-globwb soil moisture output (satDegUp)

In [8]:
def post_sm_output(basin_id):
    
    # Set basin directory
    BASINDIR = f'{MODELS}/wflow_sbm/{basin_id}/'

    # Open netCDF file as an example grid from the model directory
    cube_example = iris.load(f'{BASINDIR}/staticmaps.nc')[1]

    # Guess bounds   
    cube_example.coord('y').guess_bounds()
    cube_example.coord('x').guess_bounds()

    # Rename Coords
    cube_example.coord('y').rename('latitude')
    cube_example.coord('x').rename('longitude')

    cube_example.coord('latitude').units = 'degrees'
    cube_example.coord('longitude').units = 'degrees'

    # Load output netCDF files
    soil_file = f'{MODELS}/pcr-globwb/uk_spinup_traveltime/netcdf/satDegUpp_VMC_dailyTot_output.nc'

    # Load output
    cube_sim = iris.load(soil_file,'soil_moisture')[0]

    # Guess bounds   
    cube_sim.coord('latitude').guess_bounds()
    cube_sim.coord('longitude').guess_bounds()

    # Regrid observation cube
    cube_out = regrid(cube_sim, cube_example, scheme='area_weighted')

    # Create obs dataset
    da = xr.DataArray.from_iris(cube_out)

    # Create mask dataset
    mask = xr.open_dataset(f'{BASINDIR}/staticmaps.nc').mask
    mask = mask.rename({'y':'latitude','x':'longitude'})

    # Apply mask
    da = da.where(mask>0)

    # Calculate time series
    da = da.mean(['latitude','longitude'])
    da = da.chunk(chunks='auto')
    da = da.drop('spatial_ref')

    # Select time series that matches ref obs
    da = da.sel(time=slice('2015','2017'))

    # Output filename
    output_fname = f'{OUTPUT}/soil_moisture_evaluation/regridded_pcr-globwb_sm/{basin_id}_pcr-globwb_sm_2015_2017.nc'

    # Save to netcdf
    write_job = da.to_netcdf(output_fname, compute=False)
    with ProgressBar():
        write_job.compute()
    
    return print(f'{basin_id} finished: {output_fname}')

## Parellel run function

In [9]:
def parallel_run_sm(
    basin_ids,
    threads=cores_available,
    ):
    
    # Set number of threads (cores) used for parallel run and map threads
    if threads is None:
        pool = Pool()
    else:
        pool = Pool(nodes=threads)
    # Run parallel models
    pool.map(
        post_sm_output,
        basin_ids,
        )
    return

## Sort basins by size

In [10]:
# Sort by basin size
def sort_basin_ids_by_size(basin_ids):
    sizes = []
    for basin_id in basin_ids:
        size = os.path.getsize(f'{MODELS}/wflow_sbm/{basin_id}/staticmaps.nc')
        sizes.append(size)

    df = pd.DataFrame()
    df['basin_id'] = basin_ids
    df['size'] = sizes
    df = df.sort_values('size')

    basin_ids = df.basin_id.to_list()
    
    return basin_ids

basin_ids_sorted = sort_basin_ids_by_size(basin_ids)

## Run parallel function

In [11]:
# Run function
# parallel_run_sm(basin_ids_sorted)
parallel_run_sm(basin_ids)

[########################################] | 100% Completed |  4.7s
10003 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/results/soil_moisture_evaluation/regridded_pcr-globwb_sm/10003_pcr-globwb_sm_2015_2017.nc
[########################################] | 100% Completed |  3.3s
1001 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/results/soil_moisture_evaluation/regridded_pcr-globwb_sm/1001_pcr-globwb_sm_2015_2017.nc
[########################################] | 100% Completed |  3.4s
101002 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/results/soil_moisture_evaluation/regridded_pcr-globwb_sm/101002_pcr-globwb_sm_2015_2017.nc
[########################################] | 100% Completed |  2.9s
101005 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/results/soil_moisture_evaluation/regridded_pcr-globwb_sm/101005_pcr-globwb_sm_2015_2017.nc
[########################################] | 100% Completed |  2.9s
102001

## Post-process pcr-globwb evaporation output (totalEvaporation)

In [12]:
def post_et_output(basin_id):
    # Set basin directory
    BASINDIR = f'{MODELS}/wflow_sbm/{basin_id}/'

    # Open netCDF file as an example grid from the model directory
    cube_example = iris.load(f'{BASINDIR}/staticmaps.nc')[1]

    # Guess bounds   
    cube_example.coord('y').guess_bounds()
    cube_example.coord('x').guess_bounds()

    # Rename Coords
    cube_example.coord('y').rename('latitude')
    cube_example.coord('x').rename('longitude')

    cube_example.coord('latitude').units = 'degrees'
    cube_example.coord('longitude').units = 'degrees'

    # Load output netCDF files
    evap_file = f'{MODELS}/pcr-globwb/uk_spinup_traveltime/netcdf/totalEvaporation_dailyTot_output.nc'
    
    # Load output
    cube_sim = iris.load(evap_file)[0]

    # Guess bounds   
    cube_sim.coord('latitude').guess_bounds()
    cube_sim.coord('longitude').guess_bounds()

    # Regrid observation cube
    cube_out = regrid(cube_sim, cube_example, scheme='area_weighted')

    # Create obs dataset
    da = xr.DataArray.from_iris(cube_out)

    # Create mask dataset
    mask = xr.open_dataset(f'{BASINDIR}/staticmaps.nc').mask
    mask = mask.rename({'y':'latitude','x':'longitude'})

    # Apply mask
    da = da.where(mask>0)

    # Calculate time series
    da = da.mean(['latitude','longitude'])
    da = da.chunk(chunks='auto')
    da = da.drop('spatial_ref')

    # Convert m to mm
    da = da* 1000

    # Select time series that matches ref obs
    da = da.sel(time=slice('2008','2015'))

    # Resample to 8 daily interval per year
    sim_years = []
    years = da.resample(time='Y').mean().time.dt.year
    for year in years.values:
        ds_sim_year = da.sel(time=str(year))
        sim_years.append(ds_sim_year.resample(time='8D').mean())

    da = xr.concat(sim_years, dim='time')
    
    # Output filename
    output_fname = f'{OUTPUT}/evaporation_evaluation/regridded_pcr-globwb_evaporation/{basin_id}_pcr-globwb_et_2008_2015.nc'

    # Save to netcdf
    write_job = da.to_netcdf(output_fname, compute=False)
    with ProgressBar():
        write_job.compute()
        
    return print(f'{basin_id} finished: {output_fname}')
    # return da

## Parallel run function

In [13]:
def parallel_run_et(
    basin_ids,
    threads=cores_available,
    ):
    
    # Set number of threads (cores) used for parallel run and map threads
    if threads is None:
        pool = Pool()
    else:
        pool = Pool(nodes=threads)
    # Run parallel models
    pool.map(
        post_et_output,
        basin_ids,
        )
    return

## Sort basins by size

In [14]:
# Sort by basin size
def sort_basin_ids_by_size(basin_ids):
    sizes = []
    for basin_id in basin_ids:
        size = os.path.getsize(f'{MODELS}/wflow_sbm/{basin_id}/staticmaps.nc')
        sizes.append(size)

    df = pd.DataFrame()
    df['basin_id'] = basin_ids
    df['size'] = sizes
    df = df.sort_values('size')

    basin_ids = df.basin_id.to_list()
    
    return basin_ids

basin_ids_sorted = sort_basin_ids_by_size(basin_ids)

## Run parallel function

In [None]:
# Run function
# parallel_run_et(basin_ids_sorted)
parallel_run_et(basin_ids)

[########################################] | 100% Completed |  5.3s
10003 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/results/evaporation_evaluation/regridded_pcr-globwb_evaporation/10003_pcr-globwb_et_2008_2015.nc
[########################################] | 100% Completed |  4.3s
1001 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/results/evaporation_evaluation/regridded_pcr-globwb_evaporation/1001_pcr-globwb_et_2008_2015.nc
[########################################] | 100% Completed |  4.3s
101002 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/results/evaporation_evaluation/regridded_pcr-globwb_evaporation/101002_pcr-globwb_et_2008_2015.nc
[########################################] | 100% Completed |  4.2s
101005 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/results/evaporation_evaluation/regridded_pcr-globwb_evaporation/101005_pcr-globwb_et_2008_2015.nc
[########################################] | 1