# Pre-process pcr-globwb forcing for CAMELS-GB in parallel
## CEH-GEAR: pr, CHESS-PE: pet, CHESS-met: tas

In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
from glob import glob
from pathlib import Path

import os
import iris
import xarray as xr
import pandas as pd

import rasterio
import rioxarray

from esmvalcore.preprocessor import regrid
from pathos.threading import ThreadPool as Pool
from dask.diagnostics import ProgressBar

In [None]:
# Snellius paths
ROOT = Path('/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/')
AUXDATA = Path(f"{ROOT}/aux_data")
FORCING = Path(f'{ROOT}/forcing/')
MODELS = Path(f'{ROOT}/pcr-globwb/')

# Config

In [None]:
#Time Period
start_year = "2000"
end_year = "2017"

# Get available basin IDs wflow_sbm
basin_dirs = glob(f'{MODELS}/*')
basin_ids = [s.split('/')[-1] for s in basin_dirs]
basin_ids.remove('uk')
basin_ids.sort()

# Amount of available cores
cores_available = 20

# Preprocess forcing

In [None]:
basin_id = 'uk'

In [None]:
# Set basin directory
BASINDIR = f'{MODELS}/{basin_id}/'

# Open netCDF file as an example grid from the model directory
ds = xr.open_rasterio(f'{BASINDIR}/{basin_id}_30sec_clone.map')
cube_example = ds.squeeze('band').drop('band').to_iris()

cube_example.coord('y').rename('latitude')
cube_example.coord('x').rename('longitude')

# Guess bounds   
cube_example.coord('latitude').guess_bounds()
cube_example.coord('longitude').guess_bounds()

cube_example.coord('latitude').units = 'degrees'
cube_example.coord('longitude').units = 'degrees'

# Loop forcing variables
for variable in ['pet','tas','pr']:
    print(variable)

    # Load forcing file
    da_clim = xr.open_dataset(glob(f'{FORCING}/*{variable}*')[0])[variable]

    # Create climatology
    da_clim = da_clim.sel(time=slice('2000', '2007'))
    da_clim = da_clim.convert_calendar('365_day')
    da_clim = da_clim.groupby("time.dayofyear").mean('time')
    da_clim = da_clim.assign_coords(dayofyear=xr.date_range('2007-01-01','2007-12-31'))
    da_clim = da_clim.rename({'dayofyear':'time'})

    # Convert to cube
    cube_forcing = da_clim.to_iris()

    # Guess bounds
    cube_forcing.coord('latitude').guess_bounds()
    cube_forcing.coord('longitude').guess_bounds()

    # Regrid forcing file to example grid using conservative method
    print('Regridding...')
    cube_out = regrid(cube_forcing, cube_example, scheme='area_weighted')

    # Rename Coords
    cube_out.coord('latitude').rename('lat')
    cube_out.coord('longitude').rename('lon')

    # Convert to xarray
    da_clim = xr.DataArray.from_iris(cube_out)

    cube_forcing = None
    cube_out = None
    
    
    # Set attributes
    da_clim.lon.attrs = {'long_name': 'longitude',
                    'standard_name': 'longitude',
                    'units': 'degrees'}
    da_clim.lat.attrs = {'long_name': 'latitude',
                    'standard_name': 'latitude',
                    'units': 'degrees'}   
    da_clim.time.attrs = {'standard_name': 'time',
                     'long_name': 'time'}
    # Convert to dataset
    da_clim = da_clim.to_dataset()

    # convert to m*day
    if variable == 'pr':
        da_clim = da_clim * 0.001
    if variable == 'pr':
        da_clim = da_clim * 0.001
    if variable == 'tas':
        pass

    # Output filename
    output_fname = f'{BASINDIR}/ceh-gear_chess_camels-gb_{basin_id}_{variable}_clim2000-2007.nc'

    # Remove existing file
    if output_fname:
        OUTPUT = Path(output_fname)
        OUTPUT.unlink(output_fname)

    # Save to netcdf
    write_job = da_clim.to_netcdf(output_fname, encoding={f'{variable}': {'_FillValue': -9999, 'missing_value':-9999}}, compute=False)
    with ProgressBar():
        write_job.compute()

# Combine variables into single NetCDF

In [None]:
for variable in ['pet','pr','tas']:
    print(variable)
    basin_id = 'uk'
    files = glob.glob(f"/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/pcr-globwb/uk/ceh-gear_chess_camels-gb_uk_{variable}_*.nc")
    ds = xr.open_mfdataset(files, chunks={'time':1})
    # Output filename
    output_fname = f'/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/pcr-globwb/uk/ceh-gear_chess_camels-gb_{basin_id}_{variable}_clim2000-2007_2017.nc'

    # Remove existing file
    if output_fname:
        OUTPUT = Path(output_fname)
        OUTPUT.unlink(output_fname)

    # Save to netcdf
    write_job = ds.to_netcdf(output_fname, encoding={f'{variable}': {'_FillValue': -9999, 'missing_value':-9999}}, compute=False)
    with ProgressBar():
        write_job.compute()