# Export a series of smoothed NDVI products

We can use these to see how NDVI trends differ between products

In [None]:
%matplotlib inline

import sys
import warnings
import scipy.signal
import xarray as xr
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
from odc.geo.xr import assign_crs
from odc.geo.geom import Geometry

sys.path.append('/g/data/os22/chad_tmp/AusEFlux/src/')
from _utils import round_coords

## Analysis Parameters


In [None]:
# ds_path = '/g/data/os22/chad_tmp/AusENDVI/results/publication/AusENDVI-clim_MCD43A4_gapfilled_1982_2022.nc'
ds_path = '/g/data/os22/chad_tmp/AusENDVI/results/publication/AusENDVI-clim_1982_2013.nc'
# ds_path = '/g/data/os22/chad_tmp/AusENDVI/data/NDVI_harmonization/AVHRR_GIMMS3g_v1.2_1982_2022.nc4'
# ds_path = '/g/data/os22/chad_tmp/AusENDVI/data/NDVI_harmonization/AVHRR_GIMMS-PKU-MODIS_1982_2022.nc'
# ds_path = '/g/data/os22/chad_tmp/AusENDVI/data/NDVI_harmonization/AVHRR_GIMMS3g_v1.1_1982_2013.nc'
# ds_path ='/g/data/os22/chad_tmp/AusENDVI/data/NDVI_harmonization/Landsat_NDVI_5km_monthly_1988_2012.nc'

syn_path = '/g/data/os22/chad_tmp/AusENDVI/data/synthetic/NDVI/NDVI_CLIM_synthetic_5km_monthly_1982_2022.nc'


## Open data

In [None]:
# ds.sel(time='2000').plot.imshow(col='time', col_wrap=6, vmin=0)

In [None]:
# AusENDVI merged with MCD43A4
# ds = assign_crs(xr.open_dataset(ds_path), crs='EPSG:4326')
# ds = ds.rename({'AusENDVI_clim_MCD43A4':'NDVI'})
# ds = ds['NDVI']

# AusENDVI-clim
ds = assign_crs(xr.open_dataset(ds_path), crs='EPSG:4326')
ds = ds.rename({'AusENDVI_clim':'NDVI'})
ds = ds['NDVI']

#PKU GIMMS
# ds = assign_crs(xr.open_dataset(ds_path), crs='EPSG:4326')
# ds = ds['__xarray_dataarray_variable__']
# ds = ds.rename('NDVI')
# ds = ds.sel(time=slice('1982','2013'))

# #gimms v1.2 - can't figure out the flags
# ds = assign_crs(xr.open_dataset(ds_path), crs='EPSG:4326')
# ds = ds['ndvi']
# ds = ds/10000
# ds = ds.where(ds>0)
# ds = ds.sel(lat=slice(-10,-45), lon=slice(110,154))
# ds = ds.rename({'lat':'latitude','lon':'longitude'})
# ds = assign_crs(ds, crs='EPSG:4326')

# #gimms v1.1
# ds = assign_crs(xr.open_dataset(ds_path), crs='EPSG:3577')
# ds = ds['NDVI']
# ds = ds.odc.reproject(ds.odc.geobox.to_crs('EPSG:4326'), resampling='nearest')

#synthetic
syn = xr.open_dataset(syn_path)['NDVI']
syn = assign_crs(syn, crs='EPSG:4326')
syn.attrs['nodata'] = np.nan
syn = syn.rename('NDVI')
syn = syn.sel(time=ds.time)
# syn = syn.odc.reproject(ds.odc.geobox, resampling='average')
# ds = round_coords(ds)
# syn = round_coords(syn)

#Landsat
# ds = xr.open_dataset(ds_path)['NDVI']
# ds = assign_crs(ds, crs='EPSG:4326')
# ds.attrs['nodata'] = np.nan

In [None]:
# ds.sel(time='1994').plot.imshow(col='time', col_wrap=6, vmin=0)

In [None]:
# gapfill landsat
# obs_monthly = ds.groupby('time.month').mean()
# obs_anom = ds.groupby('time.month') - obs_monthly
# obs_anom = obs_anom.interpolate_na(dim='time', method='linear', limit=2)
# ds = obs_anom.groupby('time.month') + obs_monthly
# ds = ds.drop('month')
# ds = ds.groupby("time.month").fillna(obs_monthly)
# #mask landsat with syn to remove ocean +ve values
# m = syn.mean('time')
# m = ~np.isnan(m)
# ds = ds.where(m)

#gapfill ause and pku
obs_monthly = ds.groupby('time.month').mean()
obs_anom = ds.groupby('time.month') - obs_monthly
obs_anom = obs_anom.interpolate_na(dim='time', method='linear', limit=1)
ds = obs_anom.groupby('time.month') + obs_monthly
ds = ds.drop('month')
ds = ds.fillna(syn)

#gapfill gimms3g
# obs_monthly = ds.groupby('time.month').mean()
# obs_anom = ds.groupby('time.month') - obs_monthly
# obs_anom = obs_anom.interpolate_na(dim='time', method='linear', limit=1)
# ds = obs_anom.groupby('time.month') + obs_monthly
# ds = ds.drop('month')
# ds = ds.groupby("time.month").fillna(obs_monthly)

## Smoothing filters

In [None]:
#resample before we smooth
ds = ds.resample(time="2W").interpolate("linear")

# # Savitsky-Golay smoothing
ds_smooth = xr.apply_ufunc(
        scipy.signal.savgol_filter,
        ds,
        input_core_dims=[['time']],
        output_core_dims=[['time']],
        kwargs=dict(
            window_length=11,
            polyorder=3,
            deriv=0,
            mode='interp'),
        dask='parallelized'
    )

# ufunc reordered dims for some reason
ds_smooth = ds_smooth.transpose('time', 'latitude','longitude')

In [None]:
with plt.style.context('ggplot'):
    fig,ax=plt.subplots(1,1, figsize=(12,4))
    ds.mean(['latitude','longitude']).plot(ax=ax, label='NDVI')
    ds_smooth.mean(['latitude','longitude']).plot(ax=ax, label='Interpolated & smoothed NDVI')
    ax.legend(loc='upper left')
    ax.set_title('Smoothed NDVI');

## Save smoothed data to file

This make parallelisation with dask.delayed efficient. Each processor will read in the dataset rather than transferring the dataset across processors.

In [None]:
# ds_smooth = ds_smooth.sel(time=slice('1982','2000'))

In [None]:
ds_smooth.to_netcdf('/g/data/os22/chad_tmp/Aus_phenology/data/NDVI/NDVI_smooth_AusENDVI-clim_2000.nc')

In [None]:
# dss = xr.open_dataarray('/g/data/os22/chad_tmp/Aus_phenology/data/NDVI/NDVI_smooth_GIMMSv1.2.nc')

# with plt.style.context('ggplot'):
#     fig,ax=plt.subplots(1,1, figsize=(12,4))
#     ds.mean(['latitude','longitude']).plot(ax=ax, label='GIMSSSv1.1')
#     dss.mean(['latitude','longitude']).plot(ax=ax, label='GIMMSv1.2')
#     ax.legend(loc='upper left')
#     # ax.set_title('Smoothed NDVI');