# Downloading products from GEE into netcdfs


## Load packages

In [None]:
import os
import ee
import wxee
import math
import warnings
import xarray as xr
import pandas as pd
import numpy as np
from odc.geo.xr import assign_crs

## Authenticate GEE    

In [None]:
# Trigger the authentication flow.
# ee.Authenticate()

# Initialize the library.
ee.Initialize()

## Analysis parameters


In [None]:
# product = 'NOAA/CDR/AVHRR/SR/V5'
product = 'NOAA/CDR/AVHRR/NDVI/V5'
bands =  ['NDVI']
start_date = '1/1/1982'
end_date = '12/31/2013'
output_crs = "EPSG:3577"
resolution = 5000
scale = 0.0001 #NDVI
# scale = 0.01 #SR

# ~Aus region (slightly less to trick google into giving me 1km res)
region = ee.Geometry.Polygon([[
            [114,-43.0],
            [154.0,-43.0],
            [154.0,-10.0],
            [113,-10.0],
            [113,-43.0]]])


In [None]:
def qualityFlag(image):
    qa = image.select('QA')
    # Select bits
    cloudBitMask = math.pow(2, 1)
    shadowBitMask = math.pow(2, 2)
    brdfBitMask = math.pow(2, 14)
    ch_1_5_BitMask = math.pow(2, 7)

    mask = qa.bitwiseAnd(cloudBitMask).eq(0) \
            .And(qa.bitwiseAnd(shadowBitMask).eq(0))

    image = image.updateMask(mask)
    
    return(image)

In [None]:
#use start and end dates to loop through months and load GEE data
start = pd.date_range(start=start_date, end=end_date, freq='MS') 
end = pd.date_range(start=start_date, end=end_date, freq='M')

i = 0
for s, e in zip(start,end):
    print(" {:03}/{:03}\r".format(i+1, len(start)), end="")

    ss = s+pd.Timedelta(14, 'd')

    s = s.strftime('%Y-%m-%d')
    e = e.strftime('%Y-%m-%d')

    ds = ee.ImageCollection(product) \
      .filterDate(s,e) \
      .map(lambda image: image.clip(region)) \
      .map(qualityFlag) \
      .select(bands) \
      .map(lambda image: image.multiply(scale)) \
      .reduce(ee.Reducer.count())  
      # .reduce(ee.Reducer.stdDev())
      # .median()
      

    export = ee.batch.Export.image.toDrive(ds,
        description='AVHRR_NDVI_count'+'_'+ss.strftime('%Y-%m-%d'),
        folder='AVHRR_NDVI_count',
        region=region,
        scale=resolution,
        crs=output_crs,
        maxPixels=1e13,
        fileFormat='GeoTIFF',
        formatOptions={
            'cloudOptimized':True
              }
        )
    export.start()
    
    i+=1

## Stitch together geotiffs

In [None]:
import os
import xarray as xr
import numpy as np
import pandas as pd
from odc.geo.xr import assign_crs

In [None]:
base='/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR_NDVI_count/'
name= 'n_obs'
dates_start = '1982-01-01'
dates_end = '2013-12-31'


In [None]:
files = [base+i for i in os.listdir(base) if i.endswith('.tif')]
files.sort()

time = (pd.date_range(dates_start, dates_end, freq='MS') + pd.DateOffset(days=14)).to_numpy()
time = list(time)

In [None]:
i=0
arrs=[]
for f, t in zip(files, time):
    print(" {:02}/{:02}\r".format(i + 1, len(files)), end="")
    
    if name=='SR_median':
        ds = xr.open_dataarray(f).astype('float32')
        tod = ds.isel(band=0).squeeze().drop('band').rename('TIMEOFDAY_median')
        sza = ds.isel(band=1).squeeze().drop('band').rename('SZEN_median')
        ds = tod.to_dataset()
        ds['SZEN_median'] = sza
    else:
        ds = xr.open_dataarray(f).squeeze().drop('band').astype('float32')
        ds = ds.expand_dims(time=[t])
        ds = ds.rename(name)
        del ds.attrs['long_name']
        del ds.attrs['AREA_OR_POINT']
    
    ds.attrs['nodata']=np.nan
    ds = ds.drop('spatial_ref')
    ds = assign_crs(ds, crs='epsg:3577')
    arrs.append(ds)
    i+=1 
    

In [None]:
ds = xr.concat(arrs, dim='time').sortby('time')

In [None]:
ds.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR_'+name+'_5km_monthly_1982_2013.nc')

### Combine netcdfs

In [None]:
nobs = assign_crs(xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR_n_obs_5km_monthly_1982_2013.nc'), crs='epsg:3577')
ndvi_med = assign_crs(xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR_NDVI_median_5km_monthly_1982_2013.nc'), crs='epsg:3577')
ndvi_std = assign_crs(xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR_NDVI_stddev_5km_monthly_1982_2013.nc'), crs='epsg:3577')
ndvi_sr = assign_crs(xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR_SR_median_5km_monthly_1982_2013.nc'), crs='epsg:3577')

In [None]:
ds = xr.merge([nobs, ndvi_med, ndvi_sr, ndvi_std])

In [None]:
for i in ds.data_vars:
    del ds[i].attrs['grid_mapping']

In [None]:
ds.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR_5km_monthly_1982_2013.nc')

In [1]:
import xarray as xr

In [2]:
xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR_5km_monthly_1982_2013.nc')

In [None]:
# #use start and end dates to loop through months and load GEE data
# start = pd.date_range(start=start_date, end=end_date, freq='MS') 
# end = pd.date_range(start=start_date, end=end_date, freq='M')

# i = 0
# for s, e in zip(start,end):
#     print(" {:03}/{:03}\r".format(i+1, len(start)), end="")

#     ss = s+pd.Timedelta(14, 'd')

#     if os.path.isfile(output_path+'AVHRR_SR_'+ss.strftime('%Y-%m')+'.nc'):
#         i+=1
#         pass

#     else:
#         s = s.strftime('%Y-%m-%d')
#         e = e.strftime('%Y-%m-%d')

#         ds = ee.ImageCollection(product) \
#           .filterDate(s,e) \
#           .map(lambda image: image.clip(region)) \
#           .map(qualityFlag)

#         ds = ds.select(bands)

#         ts = wxee.TimeSeries(ds)
#         ds = ts.wx.to_xarray(region=region, scale=resolution, crs=output_crs, progress=False, num_cores=4)

#         ds[bands] = ds[bands] * scale
        
#         warnings.filterwarnings("ignore")
#         ds = ds.astype(np.float32)
#         ds_mean = ds.resample(time='MS', loffset=pd.Timedelta(14, 'd')).mean().compute()
#         # ds_std = ds.resample(time='MS', loffset=pd.Timedelta(14, 'd')).std().compute()
#         # count = (~np.isnan(ds))
#         # count =  count.resample(time='MS', loffset=pd.Timedelta(14, 'd')).sum().compute()
        
#         ds_mean = ds_mean.rename({'TIMEOFDAY':'TIMEOFDAY_mean', 'SZEN':'SZEN_mean'})

#         ds_mean.attrs['nodata'] = np.nan
#         # ds_std = ds_std.rename('NDVI_std').to_dataset()
#         # ds_std.attrs['nodata'] = np.nan
#         # count = count.rename('n_obs').to_dataset()
#         # count.attrs['nodata'] = 0
        
#         # ds_out = xr.merge([ds_mean, ds_std, count])
#         # ds_out.attrs['nodata'] = np.nan
#         ds_out = assign_crs(ds_mean, crs='epsg:3577')
        
#         ds_out.to_netcdf(output_path+'AVHRR_SR_'+ss.strftime('%Y-%m')+'.nc')

#         i+=1
