# Shifting AusEFlux results to OpenDAP production folder

This notebook will copy AusEFlux datasets from `xc0` into `ub8`, prepare them for publishing through OpenDAP, and calculate annual summaries.

Enter below under the `Analysis Parameters` section the version number and the last year in the time series and the code will do the rest.

Datasets are sent to here: https://thredds.nci.org.au/thredds/catalog/ub8/au/AusEFlux/catalog.html

In [None]:
import os
import xarray as xr
import numpy as np
from odc.geo.xr import assign_crs

## Analysis Parameters

In [None]:
version='v2.0' # What version of the datasets are we running?
year_end = 2024 # update this each year you run 'annual_update' scripts
target_grid = '500m' # what is the spatial resolution of the datasets?

## Move to production and make annual summaries

In [None]:
fluxes = {
    'GPP':'Gross Primary Productivity',
    'NEE':'Net Ecosystem Exchange',
    'ER':'Ecosystem Respiration',
    'ET':'Evapotranspiration'
         }

for var in list(fluxes.keys()):
    print(var)
    if version == 'v1.1':
        path = f'/g/data/os22/chad_tmp/NEE_modelling/results/predictions/AusEFlux_{var}_2003_2022_1km_quantiles_v1.1.nc'
        ds = xr.open_dataset(path)
        ds = assign_crs(ds, crs='EPSG:4326')
        
    else:
        folder = f'/g/data/xc0/project/AusEFlux/results/AusEFlux/{var}/'
        files = [f'{folder}/{i}' for i in os.listdir(folder) if i.endswith(".nc")]
        files.sort()
        
        #combine annual files into one file
        ds = xr.open_mfdataset(files)
        ds = assign_crs(ds, crs='EPSG:4326')
    
    if var =='ET':
        units = 'mm/month'
    else:
        units = 'gC/m\N{SUPERSCRIPT TWO}/month'
        
    description = f'AusEFlux {var} is created by empirically upscaling the OzFlux eddy covariance network using machine learning methods coupled with climate and remote sensing datasets. The estimates provided within this dataset were extracted from an ensemble of predictions and represent the median and uncertainty range.'
    
    # Create attributes dictionary
    attrs_dict={}
    attrs_dict['nodata'] = np.nan
    attrs_dict['crs'] = 'EPSG:4326'
    attrs_dict['short_name'] = var
    attrs_dict['long_name'] = fluxes[var]
    attrs_dict['units'] = units
    
    if version=='v1.1':
        attrs_dict['version'] = 'v1.1'
    else:
        attrs_dict['version'] = version
    attrs_dict['description'] = description
    
    ds.attrs = attrs_dict
    
    for v in ds.data_vars:
        del ds[v].attrs['grid_mapping'] #remove this attr
    
    if version=='v2.0':# changing versioning to "v2"
        export=f'/g/data/ub8/au/AusEFlux/v2/'
    
    else:
        export=f'/g/data/ub8/au/AusEFlux/v1/'
    
    #list of years and export
    if version=='v1.1':
        ye=2021
    else:
        ye=year_end
    
    years = [str(i) for i in range(2003, ye+1)]
    
    for year in years:
        print('', year)
        xx = ds.sel(time=year)
        
        #annual summaries
        xx_mean = xx.resample(time='YE').mean()
        xx_sum = xx.resample(time='YE').sum()
    
        mask = ~np.isnan(xx_mean[var+'_median'])
        xx_sum = xx_sum.where(mask).astype(np.float32)
    
        #update units for annual sums
        if var =='ET':
            units = 'mm/year'
        else:
            units = 'gC/m\N{SUPERSCRIPT TWO}/year'
        
        xx_sum.attrs['units'] = units
        
        # hack to make time dim work with OpenDAP which doesn't like datetime64
        start_time = xx.time.values[0].astype('datetime64[D]')###first date
        # set time as the duration between actual and first date
        coords_time = np.array(xx.time, dtype='datetime64[D]') - np.array(xx.time, dtype='datetime64[D]')[0]        
        xx['time'] = coords_time.astype('int32')
        xx.time.attrs = {'units': f'days since {start_time}'} #make sure attrs explain int32 time
    
        annual_time = xx_mean.time.values[0].astype('datetime64[D]')
        xx_mean['time'] = np.array([0], dtype='timedelta64[D]').astype('int32') #zero days since 'annual_time'
        xx_sum['time'] = np.array([0], dtype='timedelta64[D]').astype('int32') #zero days since 'annual_time'
        xx_mean.time.attrs = {'units': f'days since {annual_time}'}
        xx_sum.time.attrs = {'units': f'days since {annual_time}'}
        
        if version=='v2.0':
            xx.to_netcdf(f'{export}/monthly/{var}/AusEFlux_{var}_{target_grid}_quantiles_{year}_{version}.nc')
            xx_mean.to_netcdf(f'{export}/annual/AnnualMean/{var}/AusEFlux_{var}_{target_grid}_AnnualMean_{year}_{version}.nc')
            xx_sum.to_netcdf(f'{export}/annual/AnnualSum/{var}/AusEFlux_{var}_{target_grid}_AnnualSum_{year}_{version}.nc')
        
        else:
            xx.to_netcdf(f'{export}/monthly/{var}/AusEFlux_{var}_1km_quantiles_{year}_v1.1.nc')
            xx_mean.to_netcdf(f'{export}/annual/AnnualMean/{var}/AusEFlux_{var}_1km_AnnualMean_{year}_v1.1.nc')
            xx_sum.to_netcdf(f'{export}/annual/AnnualSum/{var}/AusEFlux_{var}_1km_AnnualSum_{year}_v1.1.nc')

## Open and plot a dataset through OpenDAP

To check that its working as intended

In [None]:
url_base = 'https://thredds.nci.org.au/thredds/dodsC/ub8/au/AusEFlux/v2/'
path = f'{url_base}annual/AnnualSum/ET/AusEFlux_ET_500m_AnnualSum_{2024}_{version}.nc'
ds = xr.open_dataset(path)
ds

In [None]:
ds['ET_median'].squeeze().plot.imshow(robust=True, size=6);