# Combine all the predictions, and map uncertainty

Ran an ensemble of predictions, now we need to compute the ensemble median and the uncertainty range.

- ~12.5 gb per prediction * 30 = 375 GiB 
- Use hugemem queue

In [None]:
import os
import sys
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from odc.geo.xr import assign_crs
import odc.geo.xr
import warnings
warnings.filterwarnings("ignore")

sys.path.append('/g/data/os22/chad_tmp/AusEFlux/src/')
from _utils import start_local_dask
from _percentile import xr_quantile


In [None]:
client = start_local_dask(mem_safety_margin='2Gb')
client

## Analysis Parameters

In [None]:
model_var = 'NEE'
base = '/g/data/os22/chad_tmp/AusEFlux/'

predictions_folder= f'{base}results/predictions/ensemble/{model_var}/'

#metadata for export
full_name = 'Net Ecosystem Exchange'
version = 'v1.2'
units = 'gC/m2/month'

### Predictions paths

In [None]:
files = os.listdir(predictions_folder)
pred_filepaths = [predictions_folder+i for i in files if i.endswith('.nc')]
len(pred_filepaths)

## Quickly plot the results to inspect

In [None]:
fig,ax = plt.subplots(1,1, figsize=(15, 5))

i=0
for pred in pred_filepaths:
    print("  {:02}/{:02}\r".format(i + 1, len(range(0, len(pred_filepaths)))), end="")
    ds=xr.open_dataarray(pred, chunks=dict(x=1050, y=680, time=1))
    ds.sum(['x','y']).compute().plot(ax=ax, alpha=0.25, c='lightskyblue')
    i+=1 

plt.title('Ensembles');

## Find median and interquantile range

In [None]:
arrs=[]
for pred in pred_filepaths:
    ds=xr.open_dataarray(pred, chunks=dict(x=1000, y=1000, time=1)) #680, longitude=1050, time=1
    arrs.append(ds.rename(pred[-8:-3]))
    
ds = xr.merge(arrs)
ds = ds.to_array()

ds = xr_quantile(ds, quantiles=[0.05, 0.50, 0.95], nodata=np.nan)
ds = ds.rename({'band':model_var+'_quantiles'}).to_array().squeeze().drop('variable')
ds.attrs['nodata']=np.nan
ds

In [None]:
%%time
ds = ds.compute()

### Plot the results

In [None]:
fig,ax = plt.subplots(1,1, figsize=(15, 5))
ds.sel(quantile=0.05).sum(['x','y']).plot(ax=ax, alpha=0.5, c='lightskyblue', label='0.25')
ds.sel(quantile=0.50).sum(['x','y']).plot(ax=ax, alpha=0.5, c='darkgreen', label='median')
ds.sel(quantile=0.95).sum(['x','y']).plot(ax=ax, alpha=0.5, c='lightskyblue', label='0.75')

ax.legend()
plt.title('Uncertanity modelling, quantiles of ensembles')

## Save to netcdf

Add attritbutes and naming convention ready for productions

In [None]:
ds = ds.rename(full_name)
ds.attrs['units'] = units

In [None]:
ds = ds.to_dataset(dim='quantile').rename({0.05:model_var+'_5th_percentile',
                                           0.5:model_var+'_median',
                                           0.95:model_var+'_95th_percentile',
                                           'x':'longitude', 'y':'latitude'})
ds = assign_crs(ds, crs='EPSG:4326')
ds

In [None]:
ds.to_netcdf(f'{base}results/predictions/AusEFlux_{model_var}_2003_2022_5km_quantiles_{version}.nc')