# Create input datasets to GAM

These datasets form the input to the R scripts: `Harmonize_avhrr_modis_<var>_CLIM.R`

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
from scipy import stats
import geopandas as gpd
import contextily as ctx
from odc.geo.xr import assign_crs
import matplotlib.pyplot as plt

import sys
sys.path.append('/g/data/os22/chad_tmp/AusEFlux/src/')
from _collect_prediction_data import round_coords

import warnings
warnings.filterwarnings("ignore")

## Parameters

In [None]:
model_var='NDVI' #NDVI or LST
name = 'nontrees' #trees nontrees AUS

## Open AVHRR datasets and filter

In [None]:
ds = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/'+model_var+'_harmonization/AVHRR_'+model_var+'_5km_monthly_1982_2013.nc')
ds = assign_crs(ds, crs ='epsg:4326')

In [None]:
before_fraction_avail = (~np.isnan(ds['NDVI_avhrr'])).sum('time')/len(ds.time)

In [None]:
#filter by num of obs/month
# ds = ds.where(ds['n_obs']>=2)

#remove any very low NDVI vals
if model_var=='NDVI':
    ds = ds.where(ds[model_var+'_avhrr']>=0.01)

# filter by coefficient of variation each month
ds[model_var+'_cv'] = ds[model_var+'_stddev'] / ds[model_var+'_avhrr']
ds = ds.where(ds[model_var+'_cv']<0.5)

#filter by large std dev anomalies
def stand_anomalies(ds, clim_mean, clim_std):
    std_anom = xr.apply_ufunc(lambda x, m, s: (x - m) / s,
    ds.compute().groupby("time.month"),
    clim_mean, clim_std)
    return std_anom

#calculate anomalies
clim_std = ds.groupby('time.month').std()
clim = ds.groupby('time.month').mean()
std_anom = stand_anomalies(ds, clim, clim_std)

#create masks where values are < 4 stddev >
anom_mask = xr.where((std_anom[model_var+'_avhrr'] > -4) & (std_anom[model_var+'_avhrr'] < 4), 1, 0)
sza_anom_mask = xr.where((std_anom['SZEN_median'] > -4) & (std_anom['SZEN_median'] < 4), 1, 0)
tod_anom_mask = xr.where((std_anom['TIMEOFDAY_median'] > -4) & (std_anom['TIMEOFDAY_median'] < 4), 1, 0)
ds = ds.where(anom_mask)
ds = ds.where(sza_anom_mask)
ds = ds.where(tod_anom_mask)

### drop last months of 1994 due to poor data (stripes)
if model_var =='NDVI':
    _1994 = ds.sel(time=slice('1994-04', '1994-12'))
    _1994 = _1994.where(_1994['NDVI_avhrr']<-1, np.nan)
    times_to_drop = ds.time.sel(time=slice('1994-04', '1994-12'))
    ds = ds.drop_sel(time=times_to_drop)
    ds = xr.concat([ds, _1994], dim='time').sortby('time')

### export filtered mask

In [None]:
filtered_mask = ~np.isnan(ds[model_var+'_avhrr'])
filtered_mask = filtered_mask.drop('month')
# filtered_mask.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/'+model_var+'_harmonization/AVHRR_'+model_var+'_filtered_mask_1982_2013.nc')

### Plot available fraction of data before/after filtering

In [None]:
after_fraction_avail = (~np.isnan(ds[model_var+'_avhrr'])).sum('time')/len(ds.time)

fract_avail_mask = xr.where(before_fraction_avail>0, 1, 0)
after_fraction_avail = after_fraction_avail.where(fract_avail_mask)
before_fraction_avail = before_fraction_avail.where(fract_avail_mask)

fig,ax=plt.subplots(1,2, figsize=(8,5),sharey=True, layout='constrained')

im = before_fraction_avail.rename('').plot(vmin=0.1, vmax=0.95, ax=ax[0], cmap='magma', add_labels=False, add_colorbar=False)
ctx.add_basemap(ax[0], source=ctx.providers.CartoDB.Voyager, crs='EPSG:4326', attribution='', attribution_size=1)
im = after_fraction_avail.rename('').plot(vmin=0.1, vmax=0.95, ax=ax[1], cmap='magma', add_labels=False, add_colorbar=False)
ctx.add_basemap(ax[1], source=ctx.providers.CartoDB.Voyager, crs='EPSG:4326', attribution='', attribution_size=1)

ax[0].set_title('Mean Fraction (before filter): '+str(round(before_fraction_avail.mean().values.item(), 3)));
ax[1].set_title('Mean Fraction (after filter): '+str(round(after_fraction_avail.mean().values.item(), 3)))

ax[0].set_yticklabels([])
ax[0].set_xticklabels([])
ax[1].set_yticklabels([])
ax[1].set_xticklabels([])

cb = fig.colorbar(im, ax=ax, shrink=0.5, orientation='horizontal', label='Fraction of Available '+model_var+' Data');
cb.ax.xaxis.set_label_position('top')

# fig.savefig("/g/data/os22/chad_tmp/climate-carbon-interactions/results/figs/AVHRR_"+model_var+"_fraction_available.png",
#             bbox_inches='tight', dpi=300)

### Add lagged NDVI features

In [None]:
# ds['NDVI_avhrr_1f'] = ds['NDVI_avhrr'].shift(time=1)
# ds['NDVI_avhrr_1b'] = ds['NDVI_avhrr'].shift(time=-1)

In [None]:
ds = ds.drop('month')

### Open covariables

In [None]:
base = '/g/data/os22/chad_tmp/climate-carbon-interactions/data/'

if model_var=='NDVI':
    datasets = [
        model_var+'_harmonization/MODIS_'+model_var+'_5km_monthly_200003_202212.nc',
        '5km/srad_5km_monthly_1982_2022.nc',
        '5km/rain_cml3_5km_monthly_1982_2022.nc',
        '5km/vpd_5km_monthly_1982_2022.nc',
               ]

if model_var=='LST':
    datasets = [
        model_var+'_harmonization/MODIS_'+model_var+'_5km_monthly_200003_202212.nc',
        '5km/srad_5km_monthly_1982_2022.nc',
        '5km/tavg_5km_monthly_1982_2022.nc',
               ]

In [None]:
dss = []
names = []
for d in datasets:
    xx =  assign_crs(xr.open_dataset(base+d),crs='epsg:4326').sel(time=slice('1982','2013'))
    xx = round_coords(xx)
    xx = xx.drop('spatial_ref')
    names.append(list(xx.data_vars)[0])
    dss.append(xx.transpose('time', 'latitude', 'longitude'))

covars = xr.merge(dss)
covars = assign_crs(covars, crs ='epsg:4326')
covars = covars.rename({model_var+'_median':model_var+'_modis'})

#merge the AVHR with covariables
ds = xr.merge([ds,covars])

### Add some MODIS summary stats

In [None]:
mean_modis = ds['NDVI_modis'].median('time')
mean_modis = mean_modis.expand_dims(time=ds.time)
ds['NDVI_modis_median'] = mean_modis

min_modis = ds['NDVI_modis'].quantile(0.05, dim='time').drop('quantile')
min_modis = min_modis.expand_dims(time=ds.time)
ds['NDVI_modis_min'] = min_modis

max_modis = ds['NDVI_modis'].quantile(0.95, dim='time').drop('quantile')
max_modis = max_modis.expand_dims(time=ds.time)
ds['NDVI_modis_max'] = max_modis

#remove unneeded variables
ds = ds.drop(['NDVI_stddev', 'n_obs', 'NDVI_cv', 'NDVI_modis'])

### Mask with Trees/nontrees

In [None]:
# if model_var=='NDVI'
trees = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/5km/WCF_5km_monthly_1982_2022.nc')['WCF']
trees = assign_crs(trees, crs ='epsg:4326')
trees = trees.sel(time=slice('2001', '2018'))
trees = trees.mean('time')

if model_var=='NDVI':
    if name=='trees':
        mask = xr.where(trees>0.25, 1, 0)
    if name=='nontrees':
        mask = xr.where(trees<=0.25, 1, 0)

if model_var=='LST':
    if name=='AUS':
        mask = xr.where(trees>=0, 1, 0) # set everywhere as valid

In [None]:
ds = ds.where(mask)

In [None]:
# ds.NDVI_modis_median.isel(time=1).plot.imshow()

### Export

In [None]:
for i in ds.data_vars:
    try:
        del ds[i].attrs['grid_mapping']
    except:
        continue

In [None]:
ds.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/NDVI_harmonization/GAM/'+name+'_AVHRR_NDVI_5km_monthly_1982_2013_GAMinput.nc')

### Modis clipped to regions

In [None]:
mod_path = '/g/data/os22/chad_tmp/climate-carbon-interactions/data/NDVI_harmonization/MODIS_NDVI_5km_monthly_200003_202212.nc'
mod = xr.open_dataset(mod_path)
mod = assign_crs(mod, crs ='epsg:4326')

In [None]:
mod = mod.where(mask)

In [None]:
mod.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/NDVI_harmonization/regions/'+name+'_MODIS_NDVI_5km_monthly_200003_202212.nc')

## Post-process GAM results

In [None]:
import xarray as xr
from odc.geo.xr import assign_crs

In [None]:
name='nontrees'
model_var = 'NDVI'
base='/g/data/os22/chad_tmp/climate-carbon-interactions/data/NDVI_harmonization/GAM/'

In [None]:
path = f'{base}{name}_AVHRR_MODIS_{model_var}_{type}_harmonized_{feat}_1982_2013.nc'
ds = xr.open_dataset(path)


In [None]:
ds = assign_crs(ds, crs ='epsg:4326')
ds = ds['ndvi_mcd_pred'].rename('NDVI')

In [None]:
# ds.sel(time='2001').plot.imshow(col='time', col_wrap=4, robust=True)

In [None]:
ds.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/NDVI_harmonization/NDVI_'+name+'_GAM_harmonize_5km_monthly_1982_2013.nc')