# Downloading products from GEE into netcdfs

Using package `wxee`: https://github.com/aazuspan/wxee

The default example loads `MODIS Terra thermal anomalies and Fire Daily`

> Note: Will likely need to install the two packages listed in the cell below. Uncomment, run the cell, then restart kernel.

## Load packages

In [1]:
import os
import ee
import wxee
import geemap as gmap
import xarray as xr
import pandas as pd
import numpy as np

import rioxarray
from odc.geo.xr import assign_crs
from odc.algo import xr_reproject

## Authenticate GEE    

Need to sign up: https://earthengine.google.com/new_signup/

Then run the cell and follow the link/prompts

In [2]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize()

Enter verification code:  4/1AX4XfWjhnh-LLULfAGNV298iIDSp7lFYn85OJy4hlxUwudSAPSEkoULV52o



Successfully saved authorization token.


## Analysis parameters

Enter the product name and other parameters below, you find some of this info on the product data page, e.g. https://developers.google.com/earth-engine/datasets/catalog/MODIS_006_MOD14A1?hl=en

> Note: For the analysis extent try using http://geojson.io/ to draw a polygon, then copy and paste the coordinates into the cell

In [3]:
product = "MODIS/MCD43A4_006_NDWI"
bands =  ['NDWI']
start_date = '1/1/2002'
end_date = '12/31/2021'
output_path = '/g/data/os22/chad_tmp/NEE_modelling/data/NDWI/'
output_crs = "EPSG:3577"
resolution = 1300
scale = 0.0001

#~Aus region (slightly less to trick google into giving me 1km res)
region = ee.Geometry.Polygon([[
            [114,-43.0],
            [153.0,-43.0],
            [153.0,-10.0],
            [114,-10.0],
            [114,-43.0]]])


## Select region

In [None]:
# Map = gmap.Map(basemap=gmap.basemaps['SATELLITE'], center=[-35.2041, 149.2721], zoom=8)
# Map

In [None]:
region = Map.user_roi.getInfo()
region

## Loop through months and load MODIS data

Doing this on a per-month basis to keep the memory usage down

In [None]:
#use start and end dates to loop through months and load GEE data
start = pd.date_range(start=start_date, end=end_date, freq='MS') 
end = pd.date_range(start=start_date, end=end_date, freq='M')

i = 0
for s, e in zip(start,end):
    print(" {:03}/{:03}\r".format(i+1, len(start)), end="")

    # use this to check if file already exists
    ss = s+pd.Timedelta(14, 'd')

    if os.path.isfile(output_path+product[-4:]+'_'+ss.strftime('%Y-%m')+'.nc'):
        pass
    
    else:
        try:
            s = s.strftime('%Y-%m-%d')
            e = e.strftime('%Y-%m-%d')
            
            #download data from GEE
            ts = wxee.TimeSeries(product).filterDate(s,e)
            ts = ts.select(bands)
            #print(ts)
            ds = ts.wx.to_xarray(region=region, scale=resolution, crs=output_crs, progress=False)
            #ds = ds[bands] * scale # scale values 
            attrs=ds.attrs #extract attributes so we don't loose them
            ds.attrs['nodata'] = np.nan
            ds = assign_crs(ds, crs=output_crs) #add geobox
            ds = ds.resample(time='MS', loffset=pd.Timedelta(14, 'd')).mean() #resample to monthly

            evi = xr.open_dataarray('/g/data/os22/chad_tmp/NEE_modelling/data/1km/EVI_1km_monthly_2002_2021.nc').isel(time=0) #use this to reproject too
            ds = xr_reproject(ds, geobox=evi.geobox, resampling='bilinear') #reproject
            ds['latitude'] = ds.latitude.astype('float32')
            ds['longitude'] = ds.longitude.astype('float32')
            ds = ds.where(~np.isnan(evi)).astype('float32')

            #ds = ds.where(ds!=0) # remove spurious zeros from reprojection
            ds = ds.assign_attrs(attrs) #add back attrs
            ds.to_netcdf(output_path+product[-4:]+'_'+np.datetime_as_string(ds.time.values[0], unit='M')+'.nc')
    
        except:
            print('fail:', s,e)
            pass
        
    i+=1


 024/240

## Optional: Stitch together monthly files into annual netcdfs

Set the three parameters below

In [None]:
year_start = 2002
year_end = 2021
annual_output_path = '/g/data/os22/chad_tmp/NEE_modelling/data/MODIS_GPP/'

In [None]:
i=0
for y in range(year_start, year_end+1):
    print(" {:02}/{:02}\r".format(i + 1, len(range(year_start, year_end+1))), end="")
    
    year = str(y)
    files = [output_path+f for f in os.listdir(output_path) if year in f]
    
    dss=[]
    for f in files:
        ds = xr.open_dataset(f)
        dss.append(ds)
        
    data = xr.concat(dss, dim='time').sortby('time')
    data = data.astype('float32')
    data['latitude'] = data.latitude.astype('float32')
    data['longitude'] = data.longitude.astype('float32')

    data.to_netcdf(annual_output_path+product[-8:]+'_'+year+'.nc')
    i+=1

In [None]:
import os
import xarray as xr

In [None]:
rm -r -f /g/data/os22/chad_tmp/NEE_modelling/data/MODIS_GPP/.ipynb_checkpoints

In [None]:
base='/g/data/os22/chad_tmp/NEE_modelling/data/GOSIF_GPP/'
files = os.listdir(base)
ds = xr.open_mfdataset([base+f for f in files])

In [None]:
ds = ds.rename({'Gpp':'GPP'})

In [None]:
ds.to_netcdf('/g/data/os22/chad_tmp/NEE_modelling/data/1km/MODIS_GPP_1km_monthly_2002_2021.nc')