# Subset surface climate data for eastern Australia

In [1]:
import os
import dask.config
from dask.distributed import Client,LocalCluster
from dask_jobqueue import PBSCluster

In [4]:
# One node on Gadi has 48 cores - try and use up a full node before going to multiple nodes (jobs)

walltime = '00:10:00'
cores = 48
memory = str(4 * cores) + 'GB'

cluster = PBSCluster(walltime=str(walltime), cores=cores, memory=str(memory), processes=cores,
                     job_extra=['-q normal',
                                '-P w42',
                                '-l ncpus='+str(cores),
                                '-l mem='+str(memory),
                                '-l storage=gdata/w42+gdata/rt52'],
                     local_directory='$TMPDIR',
                     header_skip=["select"])
                     # python=os.environ["DASK_PYTHON"])



PermissionError: [Errno 13] Permission denied: '/g/data/xv83'

In [None]:
cluster.scale(jobs=1)
client = Client(cluster)

In [None]:
client

In [6]:
%load_ext autoreload
%autoreload 2

In [4]:
import functions as fn
rez_boundary = fn.get_REZ_boundary()

# ERA5

In [1]:
years = range(1959, 2021)

In [3]:
root_path = '/g/data/rt52/era5/single-levels/reanalysis/'

# Solar radiation

Ideally we would have `ssrd` [Joules m^-2]. However, NCI does not store this variable.

Instead, (I think) we can use the Mean surface downward short-wave radiation flux `msdwswrf` [Watts m^-2]. This is the same as `ssrd` but expressed as a temporal average.

Links:
https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation#ERA5:datadocumentation-Table4

https://apps.ecmwf.int/codes/grib/param-db?id=169

https://apps.ecmwf.int/codes/grib/param-db?id=235035

In [8]:
mssrd = fn.open_era_data(
    root_path,
    'msdwswrf',
    years,
    subset_region=rez_boundary
)

In [9]:
mssrd.nbytes / (1024 ** 3)

25.411483719944954

In [12]:
mean_mssrd = mssrd['msdwswrf'].mean('time')