## Preprocess soil moisture reference observations

In [7]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [8]:
from glob import glob
from pathlib import Path

import os
import iris
import xarray as xr
import pandas as pd
from esmvalcore.preprocessor import regrid
from pathos.threading import ThreadPool as Pool
from dask.diagnostics import ProgressBar

## Set Paths

In [9]:
# Set Paths
ROOT = Path('/gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/')
AUXDIR = Path(f"{ROOT}/aux_data/")
OBSDIR = Path(f"{ROOT}/observations/")
MODELS = Path(f'{ROOT}/model_parameters/wflow_sbm/')
OUTPUT = Path(f'{OBSDIR}/soil_moisture/regridded_HydroJULES/')

## Config

In [10]:
# Get available basin IDs wflow_sbm
basin_dirs = glob(f'{MODELS}/*')
basin_ids = [s.split('/')[-1] for s in basin_dirs]
basin_ids.sort()

# Amount of available cores
cores_available = 5

## Preprocess function

In [11]:
def prep_observations(basin_id):
    # Set basin directory
    BASINDIR = f'{MODELS}/{basin_id}/'

    # Open netCDF file as an example grid from the model directory
    cube_example = iris.load(f'{BASINDIR}/staticmaps.nc')[1]

    # Guess bounds   
    cube_example.coord('y').guess_bounds()
    cube_example.coord('x').guess_bounds()

    # Rename Coords
    cube_example.coord('y').rename('latitude')
    cube_example.coord('x').rename('longitude')

    cube_example.coord('latitude').units = 'degrees'
    cube_example.coord('longitude').units = 'degrees'

    # Load observation netCDF files
    file = f'{OBSDIR}/soil_moisture/HydroJULES_soil_moisture/merge_1km_tc_ref_smap.nc'
    cube_obs = iris.load(file)[0]

    # Guess bounds   
    cube_obs.coord('latitude').guess_bounds()
    cube_obs.coord('longitude').guess_bounds()

    # Regrid observation cube
    cube_out = regrid(cube_obs, cube_example, scheme='area_weighted')

    # Create obs dataset
    da = xr.DataArray.from_iris(cube_out)

    # Create mask dataset
    mask = xr.open_dataset(f'{BASINDIR}/staticmaps.nc').mask
    mask = mask.rename({'y':'latitude','x':'longitude'})

    # Apply mask
    da = da.where(mask>0)
    
    # Calculate time series
    da = da.mean(['latitude','longitude'])
    da = da.chunk(chunks='auto')
    da = da.drop('spatial_ref')
    
    # Output filename
    output_fname = f'{OUTPUT}/{basin_id}_soil_moisture_ref_2015_2017.nc'

    # Save to netcdf
    write_job = da.to_netcdf(output_fname, compute=False)
    with ProgressBar():
        write_job.compute()
    
    return print(f'{basin_id} finished: {output_fname}')

## Parallel Run Function

In [12]:
def parallel_run(
    basin_ids,
    threads=cores_available,
    ):
    
    # Set number of threads (cores) used for parallel run and map threads
    if threads is None:
        pool = Pool()
    else:
        pool = Pool(nodes=threads)
    # Run parallel models
    pool.map(
        prep_observations,
        basin_ids,
        )
    return

## Sort basins by size

In [13]:
# Sort by basin size
def sort_basin_ids_by_size(basin_ids):
    sizes = []
    for basin_id in basin_ids:
        size = os.path.getsize(f'{MODELS}/{basin_id}/staticmaps.nc')
        sizes.append(size)

    df = pd.DataFrame()
    df['basin_id'] = basin_ids
    df['size'] = sizes
    df = df.sort_values('size')

    basin_ids = df.basin_id.to_list()
    
    return basin_ids

basin_ids_sorted = sort_basin_ids_by_size(basin_ids)

# Run parallel function

In [14]:
# Run function
parallel_run(basin_ids_sorted)

[                                        ] | 0% Completed |  0.0s7s
41016 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/observations/soil_moisture/regridded_HydroJULES/41016_soil_moisture_ref_2015_2017.nc
[########################################] | 100% Completed | 59.9s
[#                                       ] | 2% Completed | 30.6s80005 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/observations/soil_moisture/regridded_HydroJULES/80005_soil_moisture_ref_2015_2017.nc
[######################                  ] | 56% Completed | 40.9ss
41027 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/observations/soil_moisture/regridded_HydroJULES/41027_soil_moisture_ref_2015_2017.nc
[########################################] | 100% Completed | 41.8s
40033 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/observations/soil_moisture/regridded_HydroJULES/40033_soil_moisture_ref_2015_2017.nc
[##############################

HDF5-DIAG: Error detected in HDF5 (1.12.2) thread 1:
  #000: H5F.c line 532 in H5Fcreate(): unable to create file
    major: File accessibility
    minor: Unable to open file
  #001: H5VLcallback.c line 3282 in H5VL_file_create(): file create failed
    major: Virtual Object Layer
    minor: Unable to create file
  #002: H5VLcallback.c line 3248 in H5VL__file_create(): file create failed
    major: Virtual Object Layer
    minor: Unable to create file
  #003: H5VLnative_file.c line 63 in H5VL__native_file_create(): unable to create file
    major: File accessibility
    minor: Unable to open file
  #004: H5Fint.c line 1898 in H5F_open(): unable to lock the file
    major: File accessibility
    minor: Unable to lock file
  #005: H5FD.c line 1625 in H5FD_lock(): driver lock request failed
    major: Virtual File Layer
    minor: Unable to lock file
  #006: H5FDsec2.c line 1002 in H5FD__sec2_lock(): unable to lock file, errno = 11, error message = 'Resource temporarily unavailable'
    m

[########################################] | 100% Completed | 14.9s
33023 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/observations/soil_moisture/regridded_HydroJULES/33023_soil_moisture_ref_2015_2017.nc
[########################################] | 100% Completed | 14.5s
25006 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/observations/soil_moisture/regridded_HydroJULES/25006_soil_moisture_ref_2015_2017.nc
[                                        ] | 0% Completed |  0.8s5s
24003 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/observations/soil_moisture/regridded_HydroJULES/24003_soil_moisture_ref_2015_2017.nc
[########################################] | 100% Completed | 14.7s
96004 finished: /gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/observations/soil_moisture/regridded_HydroJULES/96004_soil_moisture_ref_2015_2017.nc
[########################################] | 100% Completed | 14.6s
28091 finished: /gpfs/work1/

Exception ignored in: <function WeakSet.__init__.<locals>._remove at 0x150bc46755a0>
Traceback (most recent call last):
  File "/home/jaerts/miniconda3/envs/wflow_state_flux/lib/python3.10/_weakrefset.py", line 39, in _remove
    def _remove(item, selfref=ref(self)):
KeyboardInterrupt: 

KeyboardInterrupt



[                                        ] | 0% Completed |  0.0ss