# Subset surface climate data for eastern Australia

In [1]:
from dask.distributed import Client,LocalCluster
from dask_jobqueue import PBSCluster

In [2]:
# One node on Gadi has 48 cores - try and use up a full node before going to multiple nodes (jobs)

walltime = '00:20:00'
cores = 6 #24
memory = str(4 * cores) + 'GB'

cluster = PBSCluster(walltime=str(walltime), cores=cores, memory=str(memory), processes=cores,
                     job_extra_directives=['-q normal',
                                           '-P w42',
                                           '-l ncpus='+str(cores),
                                           '-l mem='+str(memory),
                                           '-l storage=gdata/w42+gdata/rt52'],
                     local_directory='$TMPDIR',
                     job_directives_skip=["select"])
                     # python=os.environ["DASK_PYTHON"])

In [3]:
cluster.scale(jobs=1)
client = Client(cluster)

In [4]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: http://10.6.51.22:8787/status,

0,1
Dashboard: http://10.6.51.22:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.6.51.22:46715,Workers: 0
Dashboard: http://10.6.51.22:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [5]:
# client.close()
# cluster.close()

In [6]:
# %load_ext autoreload
# %autoreload 2

In [5]:
import xarray as xr
import numpy as np

In [6]:
import functions as fn

rez_boundary = fn.get_REZ_boundary()

# ERA5

In [7]:
years = range(1959, 2021)

In [8]:
root_path = '/g/data/rt52/era5/single-levels/'

# Solar radiation

Ideally we would have `ssrd` [Joules m^-2]. However, NCI does not store this variable.

Instead, (I think) we can use the Mean surface downward short-wave radiation flux `msdwswrf` [Watts m^-2]. This is the same as `ssrd` but expressed as a temporal average. In this case the average radiation per second over the hour.

Links:

- https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation#ERA5:datadocumentation-Table4
- https://apps.ecmwf.int/codes/grib/param-db?id=169
- https://apps.ecmwf.int/codes/grib/param-db?id=235035
- https://www.ecmwf.int/sites/default/files/elibrary/2015/18490-radiation-quantities-ecmwf-model-and-mars.pdf

In [11]:
mssrd = fn.open_era_data(
    root_path=root_path+'reanalysis/',
    variable='msdwswrf',
    years=years,
    subset_region=rez_boundary,
    rename_lon_lat=['lon', 'lat']
)

Check size of dataset (in GB)

In [12]:
mssrd.nbytes / (1024 ** 3)

25.411483719944954

Re-chunk to ~100Mb chunks.

In [15]:
mssrd = mssrd.chunk({
    'time': 24*90,
    'lon': -1,
    'lat': -1
})

Save to zarr

In [31]:
mssrd_encoding = {
    'msdwswrf': {
        'dtype': 'float32'
    }
}

In [18]:
mssrd.to_zarr(
    '/g/data/w42/dr6273/work/data/era5/msdwswrf/msdwswrf_era5_reanalysis_sfc_'+str(years[0])+'-'+str(years[-1])+'_REZ_region.zarr',
    mode='w',
    consolidated=True,
    encoding=mssrd_encoding
)

<xarray.backends.zarr.ZarrStore at 0x145d9a54cba0>

### Compute daily averages

Do this using 24 hour rolling mean, then select all 1400 hour times (equivalent to 0000 eastern state time)

In [11]:
mssrd = xr.open_zarr(
    '/g/data/w42/dr6273/work/data/era5/msdwswrf/msdwswrf_era5_reanalysis_sfc_'+str(years[0])+'-'+str(years[-1])+'_REZ_region.zarr',
    consolidated=True
)

In [14]:
mssrd_24hr = mssrd.rolling(time=24).mean()

In [18]:
mssrd_1400 = mssrd_24hr.isel(time=mssrd_24hr.time.dt.hour == 14)

In [29]:
mssrd_1400 = mssrd_1400.chunk({'time': 365*6})

In [32]:
mssrd_1400.to_zarr(
    '/g/data/w42/dr6273/work/data/era5/msdwswrf/msdwswrf_era5_daily_1400UTC_sfc_'+str(years[0])+'-'+str(years[-1])+'_REZ_region.zarr',
    mode='w',
    consolidated=True,
    encoding=mssrd_encoding
)

<xarray.backends.zarr.ZarrStore at 0x15424dd7f450>

# Wind speed 100m

Calculate using $w = \sqrt{u^2 + v^2}$

In [9]:
u = fn.open_era_data(
    root_path=root_path+'reanalysis/',
    variable='100u',
    years=years,
    subset_region=rez_boundary,
    rename_lon_lat=['lon', 'lat']
)

In [10]:
v = fn.open_era_data(
    root_path=root_path+'reanalysis/',
    variable='100v',
    years=years,
    subset_region=rez_boundary,
    rename_lon_lat=['lon', 'lat']
)

In [11]:
w = np.sqrt(u.rename({'u100': 'w100'})**2 + v.rename({'v100': 'w100'})**2)

Check size of dataset (in GB)

In [12]:
w.nbytes / (1024 ** 3)

25.412138305604458

Re-chunk to ~100Mb chunks.

In [15]:
w = w.chunk({
    'time': 24*90,
    'lon': -1,
    'lat': -1
})

Save to zarr

In [37]:
w_encoding = {
    'w100': {
        'dtype': 'float32'
    }
}

In [19]:
w.to_zarr(
    '/g/data/w42/dr6273/work/data/era5/100w/100w_era5_reanalysis_sfc_'+str(years[0])+'-'+str(years[-1])+'_REZ_region.zarr',
    mode='w',
    consolidated=True,
    encoding=w_encoding
)

<xarray.backends.zarr.ZarrStore at 0x14731bb6eab0>

### Compute daily averages

Do this using 24 hour rolling mean, then select all 1400 hour times (equivalent to 0000 eastern state time)

In [33]:
w = xr.open_zarr(
    '/g/data/w42/dr6273/work/data/era5/100w/100w_era5_reanalysis_sfc_'+str(years[0])+'-'+str(years[-1])+'_REZ_region.zarr',
    consolidated=True
)

In [34]:
w_24hr = w.rolling(time=24).mean()

In [35]:
w_1400 = w_24hr.isel(time=w_24hr.time.dt.hour == 14)

In [36]:
w_1400 = w_1400.chunk({'time': 365*6})

In [38]:
w_1400.to_zarr(
    '/g/data/w42/dr6273/work/data/era5/100w/100w_era5_daily_1400UTC_sfc_'+str(years[0])+'-'+str(years[-1])+'_REZ_region.zarr',
    mode='w',
    consolidated=True,
    encoding=w_encoding
)

<xarray.backends.zarr.ZarrStore at 0x15424b144b30>

# Temperature 2m

In [21]:
t = fn.open_era_data(
    root_path=root_path+'reanalysis/',
    variable='2t',
    years=years,
    subset_region=rez_boundary,
    rename_lon_lat=['lon', 'lat']
)

Check size of dataset (in GB)

In [22]:
t.nbytes / (1024 ** 3)

25.412138305604458

Re-chunk to ~100Mb chunks.

In [28]:
t = t.chunk({
    'time': 24*90,
    'lon': -1,
    'lat': -1
})

Save to zarr

In [29]:
t_encoding = {
    't2m': {
        'dtype': 'float32'
    }
}

In [33]:
t.to_zarr(
    '/g/data/w42/dr6273/work/data/era5/2t/2t_era5_reanalysis_sfc_'+str(years[0])+'-'+str(years[-1])+'_REZ_region.zarr',
    mode='w',
    consolidated=True,
    encoding=t_encoding
)

<xarray.backends.zarr.ZarrStore at 0x147310823b50>

# Monthly runoff

In [9]:
ro = fn.open_era_data(
    root_path=root_path+'monthly-averaged/',
    variable='ro',
    years=years,
    subset_region=rez_boundary,
    rename_lon_lat=['lon', 'lat']
)

Check size of dataset (in GB)

In [10]:
ro.nbytes / (1024 ** 3)

0.034787409007549286

Re-chunk to single chunk.

In [15]:
ro = ro.chunk({
    'time': -1,
    'lon': -1,
    'lat': -1
})

Save to zarr

In [17]:
ro_encoding = {
    'ro': {
        'dtype': 'float32'
    }
}

In [18]:
ro.to_zarr(
    '/g/data/w42/dr6273/work/data/era5/ro/ro_era5_monthly-averaged_sfc_'+str(years[0])+'-'+str(years[-1])+'_REZ_region.zarr',
    mode='w',
    consolidated=True,
    encoding=ro_encoding
)

<xarray.backends.zarr.ZarrStore at 0x15123c4765e0>