# Downloading products from GEE into netcdfs

Using packages

* `wxee`: https://github.com/aazuspan/wxee
* `eemont`: https://github.com/davemlz/eemont

> Note: Will likely need to install the two packages listed in the cell below. Uncomment, run the cell, then restart kernel.

In [None]:
# !pip install wxee
# !pip install eemont

## Load packages

In [None]:
import os
import ee
import wxee
import eemont
import geemap as gmap
import xarray as xr
import pandas as pd
import numpy as np
import geopandas as gpd
import rioxarray
from odc.geo.xr import assign_crs
from odc.algo import xr_reproject

## Authenticate GEE    

Need to sign up: https://earthengine.google.com/new_signup/

Then run the cell and follow the link/prompts

In [None]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize()

## Analysis parameters

Enter the product name and other parameters below, you find some of this info on the product data page

In [None]:
product = "MODIS/006/MOD17A2H"
bands =  ['Gpp']
start_date = '2000/03/01'
end_date = '2022/08/31'
output_path = 'location/to/save/the/data/GPP/'
output_crs = "EPSG:3577"
resolution = 500

# geojson defining analysis region
region = gmap.geojson_to_ee('/locationtothe/NSW_boundary.geojson')



## Show region

In [None]:
Map = gmap.Map(basemap=gmap.basemaps['SATELLITE'], center=[-35.2041, 149.2721], zoom=5)
Map.addLayer(region, {}, 'Tiles')
Map

## Loop through months and load MODIS data

Doing this on a per-month basis to keep the memory usage down and not cross GEE limits for downloading data

In [None]:
#use start and end dates to loop through months and load GEE data
start = pd.date_range(start=start_date, end=end_date, freq='MS') 
end = pd.date_range(start=start_date, end=end_date, freq='M')

i = 0
for s, e in zip(start,end):
    print(" {:03}/{:03}\r".format(i+1, len(start)), end="")

    s = s.strftime('%Y-%m-%d')
    e = e.strftime('%Y-%m-%d')
    
    #download data from GEE, convert to xarray
    ds = ee.ImageCollection(product) \
        .filterDate(s,e) \
        .scaleAndOffset() \
        .select(bands) \
        .wx.to_time_series() \
        .wx.to_xarray(region=region.geometry(), scale=resolution, crs=output_crs, progress=False)

    ds.attrs['nodata'] = np.nan
    ds = assign_crs(ds, crs=output_crs) #add gbox
    ds.to_netcdf(output_path+bands[0]+'_'+np.datetime_as_string(ds.time.values[0], unit='M')+'.nc')
   
    i+=1


## Optional: Stitch together monthly files into one large netcdf


In [None]:
#where are the files:
base = '/g/data/os22/chad_tmp/NEE_modelling/data/GPP/'

# Create a list of these files
files=[]
for file in os.listdir(base):
    if file.endswith(".nc"):
        files.append(file)


In [None]:
#Use open_mfdatasets to concat these along the time dim
ds = xr.open_mfdataset([base+i for i in files])

### Consider rasterizing your NSW polygon and clipping the netcdf to that extent before exporting