# Preprocess GPP datasets

In [None]:
import os
import xarray as xr
import numpy as np
import pandas as pd
from odc.geo.xr import assign_crs

import sys
sys.path.append('/g/data/os22/chad_tmp/AusEFlux/src/')
from _utils import round_coords

## DIFFUSE

In [None]:
ds_path = f'/g/data/os22/chad_tmp/Aus_CO2_fertilisation/data/DIFFUSE_GPP_1km_2003_2021.nc'
ds = assign_crs(xr.open_dataset(ds_path), crs='EPSG:4326')['DIFFUSE_GPP']
ds = ds.rename('GPP')
mean = ds.max('time')
mask = mean.where(mean>0)
mask = (~np.isnan(mask))
ds = ds.where(mask)
ds = ds * ds.time.dt.daysinmonth
ds.attrs['nodata'] = np.nan

In [None]:
gbox = xr.open_dataset('/g/data/os22/chad_tmp/AusENDVI/data/5km/rain_5km_monthly_1981_2022.nc')['rain'].odc.geobox
ds = assign_crs(ds, crs='epsg:4326')
ds = ds.odc.reproject(gbox, resampling='average')
ds = round_coords(ds)
ds = assign_crs(ds, crs='epsg:4326')
ds.name ='GPP'

In [None]:
# ds.mean('time').plot.imshow(robust=True)

In [None]:
ds.to_netcdf('/g/data/os22/chad_tmp/Aus_CO2_fertilisation/data/DIFFUSE_GPP_5km_2003_2021.nc')

## MODIS

In [None]:
ds_path ='/g/data/xc0/project/AusEFlux/data/comparison_datasets/MODIS_GPP_1km_monthly_2002_2021.nc'
ds = assign_crs(xr.open_dataset(ds_path), crs='EPSG:4326')['GPP']
ds = ds*1000
mean = ds.max('time')
mask = mean.where(mean>0)
mask = (~np.isnan(mask))
ds = ds.where(mask)
ds.attrs['nodata'] = np.nan

In [None]:
gbox = xr.open_dataset('/g/data/os22/chad_tmp/AusENDVI/data/5km/rain_5km_monthly_1981_2022.nc')['rain'].odc.geobox
ds = assign_crs(ds, crs='epsg:4326')
ds = ds.odc.reproject(gbox, resampling='average')
ds = round_coords(ds)
ds = assign_crs(ds, crs='epsg:4326')
ds.name ='GPP'

In [None]:
ds.to_netcdf('/g/data/os22/chad_tmp/Aus_CO2_fertilisation/data/MODIS_GPP_5km_2002_2021.nc')

## AusEFlux version 2

In [None]:
import sys
sys.path.append('/g/data/os22/chad_tmp/AusEFlux/src/')
from _utils import start_local_dask

start_local_dask(
        n_workers=13,
        threads_per_worker=1,
        memory_limit='60GiB'
                    )

In [None]:
ds_path = '/g/data/ub8/au/AusEFlux/v2/monthly/GPP/'

files = [f'{ds_path}/{i}' for i in os.listdir(ds_path) if i.endswith(".nc")]
files.sort()
ds = xr.open_mfdataset(files).sel(time=slice('2003','2023'))['GPP_median']
ds = assign_crs(ds, crs='EPSG:4326')
ds.attrs['nodata'] = np.nan

gbox = xr.open_dataset('/g/data/os22/chad_tmp/AusENDVI/data/5km/rain_5km_monthly_1981_2022.nc')['rain'].odc.geobox
ds = assign_crs(ds, crs='epsg:4326')
ds = ds.odc.reproject(gbox, resampling='average')
ds = round_coords(ds)
ds = ds.rename('GPP')
ds

In [None]:
%%time
ds = ds.compute()

In [None]:
# ds.mean('time').plot.imshow(robust=True)

In [None]:
ds.to_netcdf('/g/data/os22/chad_tmp/Aus_CO2_fertilisation/data/AusEFlux_GPP_5km_2003_2023.nc')

## CEDAR-GPP

In [None]:
ds_path = '/g/data/os22/chad_tmp/Aus_CO2_fertilisation/data/LT_CFE-Hybrid_NT'

files = [f'{ds_path}/{i}' for i in os.listdir(ds_path) if i.endswith(".nc")]
files.sort()
ds = xr.open_mfdataset(files).sel(time=slice('1982','2023'))['GPP_mean']
ds = ds.sel(y=slice(-10,-45), x=slice(111,155))
ds = ds.where(ds>-9999)
ds = ds * 0.01 #scale factor
ds = ds * ds.time.dt.daysinmonth # convert to monthly flux from daily flux 
ds = assign_crs(ds, crs='EPSG:4326')
ds.attrs['nodata'] = np.nan

gbox = xr.open_dataset('/g/data/os22/chad_tmp/AusENDVI/data/5km/rain_5km_monthly_1981_2022.nc')['rain'].odc.geobox
ds = ds.odc.reproject(gbox, resampling='average').compute()
ds = round_coords(ds)
ds = assign_crs(ds, crs='epsg:4326')
ds.name = 'GPP'
ds['time'] = ds['time'] + pd.Timedelta(14, 'd') #Make time the middle of the month

In [None]:
# ds.resample(time='YS').mean().mean(['latitude', 'longitude']).plot();

In [None]:
ds.to_netcdf('/g/data/os22/chad_tmp/Aus_CO2_fertilisation/data/CEDAR_GPP_CFE_5km_1982_2020.nc')