# Generate training and prediction data from insitu and gridded datasets

More datasets are here! https://dap.tern.org.au/thredds/catalog/ecosystem_process/ozflux/catalog.html



## Load modules

In [None]:
from datacube.utils.dask import start_local_dask

import sys
sys.path.append('/g/data/os22/chad_tmp/NEE_modelling/')
from collect_training_data import extract_ec_gridded_data

In [None]:
client = start_local_dask(mem_safety_margin='2Gb')
client

## Collect training data from EC towers, and gridded data at pixel location of EC tower

This data is used for training the models.


In [None]:
suffixes = [
    'Emerald/2022_v1/L6/default/Emerald_L6_20110610_20131231_Monthly.nc',
    'GreatWesternWoodlands/2022_v1/L6/default/GreatWesternWoodlands_L6_20130101_20220101_Monthly.nc',
    'CowBay/2022_v1/L6/default/CowBay_L6_20090101_20211231_Monthly.nc',
    'SamfordEcologicalResearchFacility/2022_v1/L6/default/Samford_L6_20100602_20171231_Monthly.nc',
    'CumberlandPlain/2022_v1/L6/default/CumberlandPlain_L6_20140101_20220101_Monthly.nc',
    'Tumbarumba/2021_v1/L6/default/Tumbarumba_L6_20020107_20191231_Monthly.nc',
    'Whroo/2021_v1/L6/default/Whroo_L6_20111201_20210724_Monthly.nc',
    'WombatStateForest/2022_v1/L6/default/WombatStateForest_L6_20100120_20210529_Monthly.nc',
    'WallabyCreek/2022_v1/L6/default/WallabyCreek_L6_20050825_20130409_Monthly.nc',
    'RobsonCreek/2022_v1/L6/default/RobsonCreek_L6_20130801_20211218_Monthly.nc',
    'CapeTribulation/2022_v1/L6/default/CapeTribulation_L6_20100101_20181102_Monthly.nc',
    'AliceSpringsMulga/2022_v1/L6/default/AliceSpringsMulga_L6_20100903_20211231_Monthly.nc',
    'CalperumChowilla/2022_v1/L6/default/Calperum_L6_20100730_20220216_Monthly.nc',
    'DryRiver/2022_v1/L6/default/DryRiver_L6_20091025_20220218_Monthly.nc',
    'Litchfield/2021_v1/L6/default/Litchfield_L6_20150623_20210725_Monthly.nc',
    'TiTreeEast/2022_v1/L6/default/TiTreeEast_L6_20120718_20220117_Monthly.nc',
    'SturtPlains/2021_v1/L6/default/SturtPlains_L6_20080828_20210724_Monthly.nc',
    'RiggsCreek/2022_v1/L6/default/RiggsCreek_L6_20110101_20170712_Monthly.nc', #dryland cropping
    'DalyPasture/2022_v1/L6/default/DalyPasture_L6_20080101_20130908_Monthly.nc',
    'Otway/2021_v1/L6/default/Otway_L6_20070811_20110101_Monthly.nc', # pasture site
    'Yanco/2022_v1/L6/default/Yanco_L6_20130101_20220218_Monthly.nc', # soil site
    
    ## check the location of these ones below as they are new additions
    'Gingin/2022_v1/L6/default/Gingin_L6_20111013_20201231_Monthly.nc', # native Banksia woodland
    'LongreachMitchellGrassRangeland/2022_v1/L6/default/Longreach_L6_20181009_20210909_Monthly.nc',
    'Ridgefield/2022_v1/L6/default/Ridgefield_L6_20160101_20220218_Monthly.nc', #  dryland agriculture
    'SilverPlains/2022_v1/L6/default/SilverPlains_L6_20200101_20211231_Monthly.nc', #Australian Mountain Reseach 
    'DalyUncleared/2022_v1/L6/default/DalyUncleared_L6_20080101_20220217_Monthly.nc', #Woodland savanna
    'Collie/2022_v1/L6/default/Collie_L6_20170804_20191111_Monthly.nc', # wandoo woodland
    'Boyagin/2022_v1/L6/default/Boyagin_L6_20171020_20211231_Monthly.nc', #wandoo woodland
    'AdelaideRiver/2022_v1/L6/default/AdelaideRiver_L6_20071017_20090524_Monthly.nc' #  Savanna dominated by Eucalyptus
    
    # 'FoggDam/2021_v1/L6/default/FoggDam_L6_20060207_20081031_Monthly.nc', # wetland that floods seasonally
    # 'Loxton/2022_v1/L6/default/Loxton_L6_20080819_20090609_Monthly.nc' # almond tress, 1 year of data
    #'RedDirtMelonFarm/2021_v1/L6/default/RedDirtMelonFarm_L6_20110923_20130721_Monthly.nc', #savannah converted to melons
        # removing Red dirt because its a small farm in the middle of svannah woodland so LC is wrong at LC scale (5km resolution)
]

i=0
sites = []
for suffix in suffixes:
    print(" {:02}/{:02}\r".format(i + 1, len(suffixes)), end="")
    xx = extract_ec_gridded_data(suffix)
    sites.append(xx)
    i+=1
    

## Collate gridded data for predictions

This data is used to predict on, results of this are stored here `/g/data/os22/chad_tmp/NEE_modelling/results/predicton_data/prediction_data_YYYY.nc`

In [1]:
import sys
sys.path.append('/g/data/os22/chad_tmp/NEE_modelling/')
from collect_prediction_data import collect_prediction_data

In [2]:
t1, t2='2003','2021'

In [3]:
ds = collect_prediction_data(time_start=t1, time_end=t2, verbose=True)
ds

   Extracting MODIS LAI
   Extracting MODIS EVI
   Extracting MODIS LST
   Extracting MODIS fPAR
   Extracting dT
   Extracting Moisture Index
   Extracting Climate
   Cumulative rainfall
   Adding Vegetation fractions
   Merge and create valid data mask
   Exporting netcdf
