# Prepare raw forcing data CAMELS-GB
## Reproject to wgs84, concatenate files
## CEH-GEAR: pr, CHESS-PE: pet, CHESS-met: tas

In [3]:
# This cell is only used to suppress some distracting output messages
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [4]:
import xarray as xr

from glob import glob
from pathlib import Path

import dask
import numpy as np
from dask.diagnostics import ProgressBar

# Set Paths

In [None]:
# Snellius paths
ROOT = '/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/'
FORCING_RAW = f'{ROOT}/forcing_raw/'
FORCING = f'{ROOT}/forcing/'

# Config

In [7]:
# Period
start_year = "2000"
end_year = "2017"

years = list(range(2000,2018))

# Prepare tas

In [16]:
datasets = []
for year in years:
    print(year)
    
    # Read forcing files
    forcing_files = glob(f'{FORCING_RAW}/tas/*_{year}*-{year}*')
    ds = xr.open_mfdataset(forcing_files)

    # Drop redundant variables
    ds = ds.drop_vars(['lat','lon','time_bnds', 'x_bnds', 'y_bnds', 'crsOSGB'])

    # Set ESPG:27700
    espg = '27700'

    # Write CRS
    ds = ds.rio.write_crs(f"epsg:{espg}", inplace=True)

    # Reproject
    ds = ds.rio.reproject("EPSG:4326")
    
    # Convert kelvin to celcius
    ds = ds - 272.15
       
    # Rename Coords
    ds = ds.rename({'x':'lon','y':'lat'})
    ds.encoding['_FillValue'] = -999
    ds = ds.where(ds < 10000)
    ds = ds.drop_vars(['crsOSGB'])
       
    datasets.append(ds)

2000
crsOSGB
time
2001
crsOSGB
time
2002


KeyboardInterrupt: 

In [13]:
ds = xr.concat(datasets, dim='time')
write_job = ds.to_netcdf(f'{FORCING}/ceh-gear_tas_gb_1km_daily_2000_2017.nc', compute=False)
with ProgressBar():
    write_job.compute()

[########################################] | 100% Completed | 101.76 ms


# Prepare pet

In [23]:
datasets = []

for year in years:
    print(year)
    # Read forcing files
    forcing_files = glob(f'{FORCING_RAW}/pet/*_{year}*-{year}*')
    ds = xr.open_mfdataset(forcing_files)

    # Drop redundant variables
    ds = ds.drop_vars(['lat','lon','time_bnds', 'x_bnds', 'y_bnds', 'crsOSGB'])

    # Set ESPG:27700
    espg = '27700'

    # Write CRS
    ds = ds.rio.write_crs(f"epsg:{espg}", inplace=True)

    # Reproject
    ds = ds.rio.reproject("EPSG:4326")
    # ds = ds.drop_vars(['crsOSGB'])
    
    # Rechunk dataset
    ds = ds.chunk(chunks='auto')
    ds = ds.unify_chunks()
    
    vars_list = list(ds.data_vars)  
    for var in vars_list:  
        del ds[var].attrs['grid_mapping']
    
    ds_time = xr.open_dataset(f'{FORCING}/ceh-gear_tas_gb_1km_daily_2000_2017.nc').sel(time=str(year))
    ds['time']=ds_time.time
    ds = ds.drop_vars(['crsOSGB'])
    ds = ds.rename({'x':'lon','y':'lat'})
    ds.encoding['_FillValue'] = -999
    ds = ds.where(ds < 10000)
    datasets.append(ds)

2000
crsOSGB
time
2001
crsOSGB
time
2002
crsOSGB
time
2003
crsOSGB
time
2004
crsOSGB
time
2005
crsOSGB
time
2006
crsOSGB
time
2007
crsOSGB
time
2008
crsOSGB
time
2009
crsOSGB
time
2010
crsOSGB
time
2011
crsOSGB
time
2012
crsOSGB
time
2013
crsOSGB
time
2014
crsOSGB
time
2015
crsOSGB
time
2016
crsOSGB
time
2017
crsOSGB
time


In [8]:
# Create mm dataset
ds = xr.concat(datasets, dim='time')
write_job = ds.to_netcdf(f'{FORCING}/ceh-gear_pet_gb_1km_daily_2000_2017.nc', compute=False)
with ProgressBar():
    write_job.compute()

[########################################] | 100% Completed | 196.62 s


In [27]:
# Create meter dataset
ds = xr.concat(datasets, dim='time')
ds = ds / 1000
write_job = ds.to_netcdf(f'{FORCING}/ceh-gear_pet_gb_1km_daily_2000_2017_meter.nc', compute=False)
with ProgressBar():
    write_job.compute()

[########################################] | 100% Completed | 264.50 s


# Prepare pr

In [5]:
datasets = []

for year in years:
    print(year)
    
    # Read forcing files
    forcing_files = glob(f'{FORCING_RAW}/pr/*{year}*')
    
    ds = xr.open_mfdataset(forcing_files)

    # Select period
    # ds = ds.sel(time=slice(start_year, end_year))

    # Drop redundant variables
    ds = ds.drop_vars(['lat','lon','min_dist','crs'])

    # Rename variable
    ds = ds.rename({'rainfall_amount':'pr'})

    # Change dtype variable
    ds = ds.pr.astype(dtype='float32')
    ds = ds.to_dataset()

    # Set ESPG:27700
    espg = '27700'

    # Write CRS
    ds = ds.rio.write_crs(f"epsg:{espg}", inplace=True)
    
    # Reproject
    ds = ds.rio.reproject("EPSG:4326")
    ds = ds.drop_vars(['spatial_ref'])
    ds = ds.rename({'x':'lon','y':'lat'})
    ds.encoding['_FillValue'] = -999
    ds = ds.where(ds < 10000)
   
    datasets.append(ds)

2000
time
2001
time
2002
time
2003
time
2004
time
2005
time
2006
time
2007
time
2008
time
2009
time
2010
time
2011
time
2012
time
2013
time
2014
time
2015
time
2016
time
2017
time


In [7]:
# Create mm dataset
ds = xr.concat(datasets, dim='time')
write_job = ds.to_netcdf(f'{FORCING}/ceh-gear_pr_gb_1km_daily_2000_2017.nc', compute=False)
with ProgressBar():
    write_job.compute()

[########################################] | 100% Completed | 105.60 ms


In [6]:
# Create meter dataset
ds = xr.concat(datasets, dim='time')
ds = ds / 1000
write_job = ds.to_netcdf(f'{FORCING}/ceh-gear_pr_gb_1km_daily_2000_2017_meter.nc', compute=False)
with ProgressBar():
    write_job.compute()

[########################################] | 100% Completed | 101.88 ms
