# Collecting various gridded datasets to use as predictors in a NEE model

- ANU climate data
- MODIS LAI
- Soil moisture from GRAFS

Results are saved to `/g/data/os22/chad_tmp/NEE_modelling/results/input_data/input_data_<YYYY>.nc`

In [None]:
# !pip install xarray
# !pip install datacube
# !pip install --extra-index-url="https://packages.dea.ga.gov.au" \
#   odc-algo

# !pip install rioxarray
# !pip install odc-geo
# !pip install dea_tools
# !pip install joblib
# !pip install tqdm
# !pip install geopandas
# !pip install -U scikit-learn
# !pip install dask-ml

In [None]:
from datacube.utils.dask import start_local_dask
import sys
sys.path.append('/g/data/os22/chad_tmp/NEE_modelling/')
from collect_gridded_data import collect_gridded_data

In [None]:
client = start_local_dask(mem_safety_margin='2Gb')
print(client)

<Client: 'tcp://127.0.0.1:43781' processes=1 threads=16, memory=44.92 GiB>


## Analysis Parameters

In [None]:
time=('2019')

In [None]:
%%time
data = collect_gridded_data(time=time, chunks=dict(lat=1000, lon=1000))

---
---
## Try to stich together SIF

In [None]:
import xarray as xr
import rioxarray
from odc.geo.xr import assign_crs
import pandas as pd
import numpy as np
import os

In [None]:
base='/g/data/os22/chad_tmp/NEE_modelling/data/SIF/GOSIF/'

arrs=[]
files = os.listdir(base)
i = 0
for f in files:
    print(" Tif {:03}/{:03}\r".format(i + 1, len(files)), end="")
    if f.endswith('.tif'):
        y=f[-12:-8]
        m=f[-6:-4]
        sif = rioxarray.open_rasterio(base+f).squeeze().drop('band')
        sif = assign_crs(sif, crs='epsg:4326')
        time=pd.date_range(np.datetime64(y+'-'+m), periods=1, freq="MS") + pd.Timedelta(14, 'd')
        sif = sif.expand_dims(time=time) 
        sif = sif.where(sif < 32766) # clean up dataset
        sif = sif.sel(x=slice(110,155)).sel(y=slice(-8,-45)).astype('float32') #Australia only
        arrs.append(sif)
        i += 1
        
sif = xr.concat(arrs, dim='time').sortby('time')
sif = xr.where(sif < 0, 0, sif) #replace -ve values with 0
sif.name = 'SIF'
sif.to_netcdf('/g/data/os22/chad_tmp/NEE_modelling/data/SIF/GOSIF_2000_2020.nc')

## MODIS Land surface temp

In [5]:
base='/g/data/ub8/au/MODIS/mosaic/MYD11A1.006/'
files = os.listdir(base)
paths = [base+i for i in files if not 'QC' in i]

In [6]:
for p in paths:
    y=p[-7:-3] #year
    lst = xr.open_dataset('/g/data/ub8/au/MODIS/mosaic/MYD11A1.006/MYD11A1.006.LST_Day_1km.2020.nc',
                          chunks=dict(latitude=1000, longitude=1000))
    lst = assign_crs(lst, crs='epsg:4326')
    lst = lst.resample(time='MS', loffset=pd.Timedelta(14, 'd')).mean().compute()
    lst = lst.LST_Day_1km.rename('LST')
    lst.to_netcdf('/g/data/os22/chad_tmp/NEE_modelling/data/LST/LST_'+y+'.nc')
    

## MODIS fPAR
data here goes to 2017-03: /g/data/u39/public/data/modis/lpdaac-mosaics-cmar/v2-nc4/aust/MOD15A2.005

## fPAR from GEE

https://github.com/aazuspan/wxee/blob/main/docs/examples/image_collection_to_xarray.ipynb

https://github.com/aazuspan/wxee/blob/main/docs/examples/modis.ipynb

In [None]:
!pip install earthengine-api
!pip install wxee

In [None]:
import ee
import wxee

In [None]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize()

In [None]:
ts = wxee.TimeSeries("MODIS/061/MCD15A3H").filterDate("2020-03", "2020-09")
ts = ts.select(["Fpar"])

ts.describe()

In [None]:
countries = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level0")
aus = countries.filterMetadata("ADM0_NAME", "equals", "Australia")

In [None]:
ds = ts.wx.to_xarray(region=aus.geometry().bounds(), scale=0.01, crs="EPSG:4326")

## AWRA

In [None]:
# var='rain_day'
# flux = xr.open_dataset('/g/data/fj8/public/BoM/AWRA/Outputs/DailyScheduledRun/processed/values/month/'+var+'.nc')
# flux.isel(time=2)[var].plot.imshow()