# Downloading products from GEE into netcdfs

Using package `wxee`: https://github.com/aazuspan/wxee

The default example loads `MODIS GPP`

> Note: Will likely need to install the two packages listed in the cell below. Uncomment, run the cell, then restart kernel.

In [None]:
# !pip install wxee
# !pip install eemont

## Load packages

In [2]:
import os
import ee
import wxee
import eemont
import geemap as gmap
import xarray as xr
import pandas as pd
import numpy as np
import geopandas as gpd
import rioxarray
from odc.geo.xr import assign_crs
# from odc.algo import xr_reproject

## Authenticate GEE    

Need to sign up: https://earthengine.google.com/new_signup/

Then run the cell and follow the link/prompts

In [3]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize()

Enter verification code:  4/1AWtgzh4_HW4HwT7c5_Br8MONIFpDqYMfuN9SbDHzNsnh-9nEnr4qp0vli2I



Successfully saved authorization token.


## Analysis parameters

Enter the product name and other parameters below, you find some of this info on the product data page, e.g. https://developers.google.com/earth-engine/datasets/catalog/MODIS_006_MOD14A1?hl=en

> Note: For the analysis extent try using http://geojson.io/ to draw a polygon, then copy and paste the coordinates into the cell

In [4]:
# product = "MODIS/006/MCD43A4" #NBAR
# product = "MODIS/061/MOD11A2" #LST
product = "MODIS/MOD09GA_006_NDWI"

bands =  'NDWI'
start_date = '1/1/2002'
end_date = '06/30/2022'
# output_path = '/g/data/os22/chad_tmp/NEE_modelling/data/kNDVI/'
output_crs = "EPSG:3577"
resolution = 1000
# scale = 0.0001

# ~Aus region (slightly less to trick google into giving me 1km res)
region = ee.Geometry.Polygon([[
            [114,-43.0],
            [153.0,-43.0],
            [153.0,-10.0],
            [114,-10.0],
            [114,-43.0]]])


## Select region

In [None]:
# Map = gmap.Map(basemap=gmap.basemaps['SATELLITE'], center=[-35.2041, 149.2721], zoom=8)
# # region = gmap.geopandas_to_ee(gdf, geodesic=False)
# Map.addLayer(region, {}, 'Tiles')
# Map

In [None]:
# region = Map.user_roi.getInfo()
# region

## Loop through months and load MODIS data

Doing this on a per-month basis to keep the memory usage down

In [5]:
#use start and end dates to loop through months and load GEE data
start = pd.date_range(start=start_date, end=end_date, freq='MS') 
end = pd.date_range(start=start_date, end=end_date, freq='M')

i = 0
# for index, row in gdf.iterrows():
    # print(index)
for s, e in zip(start,end):
    print(" {:03}/{:03}\r".format(i+1, len(start)), end="")

    #region = gmap.geopandas_to_ee(gdf.iloc[[index]])

    ss = s+pd.Timedelta(14, 'd')

#     if os.path.isfile(output_path+product[-19:]+'_'+ss.strftime('%Y-%m')+'.nc'):
#         pass

#     else:
#         try:
    s = s.strftime('%Y-%m-%d')
    e = e.strftime('%Y-%m-%d')
    
    # spectral indices
#     ds = ee.ImageCollection(product) \
#         .filterDate(s,e) \
#         .scaleAndOffset() \
#         .spectralIndices(bands) \
#         .select(bands) \
#         .mean()
    
    
    #LST
#     ds = ee.ImageCollection(product) \
#         .filterDate(s,e) \
#         .scaleAndOffset() \
#         .select('LST_Day_1km') \
#         .mean()
    
    # NDWI
    ds = ee.ImageCollection(product) \
        .filterDate(s,e) \
        .select('NDWI') \
        .mean()
        
    export = ee.batch.Export.image.toDrive(ds,
        description=bands+'_'+ss.strftime('%Y-%m-%d'),
        folder=bands,
        region=region,
        scale=resolution,
        crs=output_crs,
        maxPixels=1e13,
        fileFormat='GeoTIFF',
        formatOptions={
            'cloudOptimized':True
              }
    )
    export.start()
#     gmap.ee_export_image_to_drive(d

#     )
    
    #ts = wxee.TimeSeries(product).filterDate(s,e)
    #ts = ts.select(bands)
    #kndvi = kndvi.wx.to_time_series()
    #print(kndvi)
    #ds = kndvi.wx.to_xarray(region=region, scale=resolution, crs=output_crs, progress=True)

    # ds = ds[bands] * scale # scale values 
    # attrs=ds.attrs #extract attributes so we don't loose them
    # ds.attrs['nodata'] = np.nan
    # ds = assign_crs(ds, crs=output_crs) #add gbox
    # ds['time'] = ds.time + pd.Timedelta(14, 'd')
    # ds = ds * n_days #convert to total ET/month
    #ds = ds.resample(time='MS', loffset=pd.Timedelta(14, 'd')).mean() #resample to monthly

#     da = xr.open_dataarray('/g/data/os22/chad_tmp/NEE_modelling/data/1km/tavg_1km_monthly_2002_2021.nc').isel(time=0) #use this to reproject too
#     ds = xr_reproject(ds, geobox=da.geobox, resampling='bilinear') #reproject

#     ds['latitude'] = ds.latitude.astype('float32')
#     ds['latitude'] = np.array([round(i,4) for i in ds.latitude.values])
#     ds['longitude'] = ds.longitude.astype('float32')
#     ds['longitude'] = np.array([round(i,4) for i in ds.longitude.values])

#     ds = ds.where(~np.isnan(da)).astype('float32')

    #  ds = ds.where(ds!=0) # remove spurious zeros from reprojection
    #ds = ds.assign_attrs(attrs) #add back attrs
    #f = str(row['attr'])
    #ds.to_netcdf(output_path+f+'/'+'kNDVI'+'_'+np.datetime_as_string(ds.time.values[0], unit='M')+'.nc')

        # except:
        #     print('fail:', s,e)
        #     pass
   
    i+=1


 246/246

In [6]:
export.status()

{'state': 'READY',
 'description': 'NDWI_2022-06-15',
 'creation_timestamp_ms': 1677040103605,
 'update_timestamp_ms': 1677040103605,
 'start_timestamp_ms': 0,
 'task_type': 'EXPORT_IMAGE',
 'id': 'VZKE65ZRQGC6322E2UOGQBQA',
 'name': 'projects/earthengine-legacy/operations/VZKE65ZRQGC6322E2UOGQBQA'}

In [None]:
# Map = gmap.Map(center=[-35.2041, 149.2721], zoom=10)

# modisVisParams = {'bands': ["NDWI"],
#               'max': 1,
#               'min': -1}

# Map.addLayer(ds, modisVisParams, 'modis')
# Map.addLayerControl()
# Map

## Optional: Stitch together monthly files into annual netcdfs

Set the three parameters below

In [None]:
year_start = 2002
year_end = 2021
annual_output_path = '/g/data/os22/chad_tmp/NEE_modelling/data/NDWI_annual/'

In [None]:
i=0
for y in range(year_start, year_end+1):
    print(" {:02}/{:02}\r".format(i + 1, len(range(year_start, year_end+1))), end="")
    
    year = str(y)
    files = [output_path+f for f in os.listdir(output_path) if year in f]
    
    dss=[]
    for f in files:
        ds = xr.open_dataset(f)
        dss.append(ds)
        
    data = xr.concat(dss, dim='time').sortby('time')
    data = data.astype('float32')
    data['latitude'] = data.latitude.astype('float32')
    data['longitude'] = data.longitude.astype('float32')

    data.to_netcdf(annual_output_path+product[-8:]+'_'+year+'.nc')
    i+=1

In [None]:
ds = ds.rename({'Gpp':'GPP'})

In [None]:
#compute climatology
clim = ds['NDWI'].chunk(dict(latitude=750, longitude=750))
clim = ds.groupby("time.month").mean("time").compute()

# fill nans with climatology and plot cloudiest period
ds = ds.groupby("time.month").fillna(clim).drop('month')

In [None]:
ds.to_netcdf('/g/data/os22/chad_tmp/NEE_modelling/data/1km/NDWI_1km_monthly_2002_2021.nc')

In [None]:
evi = xr.open_dataarray('/g/data/os22/chad_tmp/NEE_modelling/data/5km/EVI_5km_monthly_2002_2021.nc').isel(time=0) #use this to reproject too
ds_rp = xr_reproject(ds, geobox=evi.geobox, resampling='average', dst_nodata=np.nan).compute() #reproject
ds_rp['latitude'] = ds_rp.latitude.astype('float32')
ds_rp['longitude'] = ds_rp.longitude.astype('float32')
ds_rp

In [None]:
ds_rp.NDWI.isel(time=110).plot.imshow(size=6,vmax=0.2, vmin=-0.2, cmap='RdBu');

In [None]:
ds.NDWI.isel(time=110).plot.imshow(size=6,vmax=0.2, vmin=-0.2, cmap='RdBu');

In [None]:
ds.NDWI.groupby('time.month').mean().mean(['latitude','longitude']).plot(figsize=(11,5))

In [None]:
ds_rp.to_netcdf('/g/data/os22/chad_tmp/NEE_modelling/data/5km/NDWI_5km_monthly_2002_2021.nc')