# Create ancillary datasets

To support the production of AusEFlux, we need some ancillary static datasets such as:
- land-sea mask
- urban area and water mask
- NDVI of bare soil
- Vegetation height

All of these datasets need to be at various resolutions to support higher and lower resolution runs of the models. Thus, we need a script that creates these datasets at each of the target resolutions. These are 5km, 1km, and 500m.


In [None]:
import os
import pickle
import odc.geo
import numpy as np
import xarray as xr
import rioxarray as rxr
import matplotlib.pyplot as plt
from odc.geo.xr import assign_crs

import sys
sys.path.append('/g/data/os22/chad_tmp/AusEFlux/src/')
from _utils import round_coords

import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [None]:
from datacube.utils.dask import start_local_dask
client = start_local_dask(mem_safety_margin='2Gb')
client

## Open target grid

In [None]:
target_grid='500m'
dask_chunks = dict(latitude=2000, longitude=2000)

In [None]:
# Grab a common grid to reproject all datasets too 
gbox_path = f'/g/data/xc0/project/AusEFlux/data/grid_{target_grid}'
with open(gbox_path, 'rb') as f:
    gbox = pickle.load(f)

## Make an urban and water mask

Urban is the GUF urban fraction layer

Water class comes from NVIS version 6

In [None]:
urban = assign_crs(xr.open_dataarray('/g/data/ub8/au/LandCover/OzWALD_LC/GUF_FractionUrban_Australia.nc'), crs='EPSG:4326')
urban = urban.chunk(dask_chunks)
urban = urban.transpose('latitude', 'longitude')
urban = urban.odc.reproject(how=gbox, resampling='bilinear').compute()
urban = xr.where(urban>10, 1, 0) #greater than 10% urban fraction
urban = urban.rename('urban_mask')
urban = round_coords(urban)

In [None]:
ds = rxr.open_rasterio('/g/data/xc0/project/AusEFlux/data/aus6_0e_mvg.tif')
ds = ds.chunk(dict(x=250, y=250)).squeeze().drop_vars('band')
ds = ds.where(ds<99).astype('float32')
ds = assign_crs(ds, crs='EPSG:3577')
ds = ds.odc.reproject(how=gbox, resampling='mode')
ds = ds.rename('NVISv6')
ds = ds.compute()
water = xr.where(ds==24,1,0)
water = round_coords(water)

### Combine urban and water masks

In [None]:
assert np.sum(urban.latitude == water.latitude) == len(urban.latitude)

mask = (urban | water).astype(np.int16)
mask = mask.rename('urban_water_mask')

#export
mask.to_netcdf(f'/g/data/xc0/project/AusEFlux/data/urban_water_mask_{target_grid}.nc')

In [None]:
mask.odc.explore(
            tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
            attr = 'Esri',
            name = 'Esri Satellite')

## Land-sea mask

Use the current OzWALD GPP for this https://thredds.nci.org.au/thredds/catalog/ub8/au/OzWALD/8day/GPP/catalog.html

In [None]:
ls = xr.open_dataarray('/g/data/ub8/au/OzWALD/annual/OzWALD.GPP.AnnualMeans.nc', 
                       chunks=dict(latitude=2000, longitude=2000)).isel(time=12)
ls = ls.transpose('latitude', 'longitude').compute()
ls = assign_crs(ls, crs='EPSG:4326')

In [None]:
ls_mask = np.isnan(ls.squeeze().drop_vars('time'))
ls_mask = ls_mask.rename(f'landsea_mask_{target_grid}')
ls_mask = ~ls_mask
ls_mask.attrs = {}

In [None]:
ls_mask.to_netcdf(f'/g/data/xc0/project/AusEFlux/data/land_sea_mask_{target_grid}.nc')

In [None]:
# ls_mask.odc.explore(
#             tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
#             attr = 'Esri',
#             name = 'Esri Satellite')

## Vegetation height

https://thredds.nci.org.au/thredds/dodsC/ub8/au/LandCover/OzWALD_LC/VegH_2007-2010_mosaic_AustAlb_25m.nc

Don't use dask here as it seems to leave some line artefacts

In [None]:
%%time
vegh = xr.open_dataset('/g/data/ub8/au/LandCover/OzWALD_LC/VegH_2007-2010_mosaic_AustAlb_25m.nc')['VegH'] #chunks=dict(x=1000,y=1000)
vegh = vegh.astype('float32')
vegh = assign_crs(vegh, crs='EPSG:3577')
vegh = vegh.transpose('y','x')
print('finished transpose')
vegh.attrs['nodata'] = np.nan
vegh = vegh.odc.reproject(how=gbox, resampling='bilinear').compute()
vegh = vegh.rename('VegH')
vegh = round_coords(vegh)

In [None]:
vegh.to_netcdf(f'/g/data/xc0/project/AusEFlux/data/VegH_{target_grid}_2007_2010.nc')

In [None]:
# (np.isnan(vegh)).plot.imshow(size=12)

In [None]:
# vegh.odc.explore(robust=True,
#             tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
#             attr = 'Esri',
#             name = 'Esri Satellite')

## NDVI of bare soil

In [None]:
ndvi = xr.open_dataarray('/g/data/xc0/project/AusEFlux/data/NDVI_BG_SLGA_MLR_MODcor_final.tif'
                       ,chunks=dict(x=2000,y=2000)).squeeze().drop_vars('band')
ndvi = assign_crs(ndvi, crs='EPSG:4326')
ndvi.attrs['nodata'] = np.nan
ndvi = ndvi.odc.reproject(how=gbox, resampling='bilinear').compute()
ndvi = ndvi.rename('NDVI')
ndvi = round_coords(ndvi)
ndvi = assign_crs(ndvi, crs='EPSG:4326')

In [None]:
ndvi.latitude

In [None]:
ndvi.to_netcdf(f'/g/data/xc0/project/AusEFlux/data/ndvi_of_baresoil_{target_grid}.nc')

In [None]:
# ndvi.odc.explore(robust=True,
#             tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
#             attr = 'Esri',
#             name = 'Esri Satellite')

## Mask the predictions

This is necessary because I had failed to mask water during the production run. Will be redundant on any future run because the urban and water mask is now in the production code.

In [None]:
base = f'/g/data/os22/chad_tmp/AusEFlux/'

# ----NEE-------------------------------
folder = base+f'results/AusEFlux/NEE/'
files = [f'{folder}/{i}' for i in os.listdir(folder) if i.endswith(".nc")]
files.sort()
nee = xr.open_mfdataset(files)
nee = assign_crs(nee, crs='EPSG:4326')
nee.attrs['nodata'] = np.nan

# ----GPP-------------------------------
folder = base+f'results/AusEFlux/GPP/'
files = [f'{folder}/{i}' for i in os.listdir(folder) if i.endswith(".nc")]
files.sort()
gpp = xr.open_mfdataset(files)
gpp = assign_crs(gpp, crs='EPSG:4326')
gpp.attrs['nodata'] = np.nan


# ----ER-------------------------------
folder = base+f'results/AusEFlux/ER/'
files = [f'{folder}/{i}' for i in os.listdir(folder) if i.endswith(".nc")]
files.sort()
er = xr.open_mfdataset(files)
er = assign_crs(er, crs='EPSG:4326')
er.attrs['nodata'] = np.nan

# ----ER-------------------------------
folder = base+f'results/AusEFlux/ET/'
files = [f'{folder}/{i}' for i in os.listdir(folder) if i.endswith(".nc")]
files.sort()
et = xr.open_mfdataset(files)
et = assign_crs(et, crs='EPSG:4326')
et.attrs['nodata'] = np.nan

### Mask

In [None]:
mask = mask.rename({'y':'latitude','x':'longitude'})

nee = nee.where(mask!=1).astype(np.float32)
gpp = gpp.where(mask!=1).astype(np.float32)
er = er.where(mask!=1).astype(np.float32)
et = et.where(mask!=1).astype(np.float32)

### export

In [None]:
 #list of years and export
years = [str(i) for i in range(2003, 2024)]
fluxes = ['GPP', 'ER', 'NEE', 'ET'] 
dss = [gpp,er,nee,et] 

for flux,ds in zip(fluxes,dss):
    for year in years:
        xx = ds.sel(time=year)
        print(flux, year)
        xx.to_netcdf(f'/g/data/os22/chad_tmp/AusEFlux/results/AusEFlux/{flux}_new/AusEFlux_{flux}_1km_quantiles_{year}_v1.2.nc')