# How does the monthly resolution impact phenology extraction?

Compare with 8-day NDVI

In [None]:
%matplotlib inline

import sys
import pickle
import warnings
import numpy as np
import xarray as xr
import pandas as pd
import scipy.signal
import geopandas as gpd
import matplotlib.pyplot as plt
from odc.geo.geom import Geometry
from odc.geo.xr import assign_crs

sys.path.append('/g/data/os22/chad_tmp/Aus_phenology/src')
from phenology_pixel_circular import _extract_peaks_troughs, xr_phenometrics

import sys
sys.path.append('/g/data/xc0/project/AusEFlux/src/')
from _utils import round_coords, start_local_dask

# %load_ext autoreload
# %autoreload 2

In [None]:
# client = start_local_dask(mem_safety_margin='2Gb')
# client

## Analysis Parameters


In [None]:
product='AusENDVI-clim_MCD43A4'
timeseries_file = '/g/data/os22/chad_tmp/Aus_phenology/data/pickle/IBRA_subregions_NDVI_'+product+'.pkl'
save_file = '/g/data/os22/chad_tmp/Aus_phenology/data/pickle/IBRA_subregions_'+product+'_phenometrics_new.pkl'
ecoregions_file = '/g/data/os22/chad_tmp/Aus_phenology/data/vectors/IBRAv7_subregions_modified.geojson'
var='SUB_NAME_7'
region_type = 'IBRA_subregions'
years='1982-2022'

## Select region

Some more strongly water-limited and highly variable regions to test if monthly upscaled data is missing rapid growth and senescence pulses.

In [None]:
# k = 'Birdum'
k ='Atarting'
# k ='Murrumbidgee'
# k='Carnarvon Ranges'


## Open datasets

In [None]:
gdf = gpd.read_file(ecoregions_file)

phenometrics_file = '/g/data/os22/chad_tmp/Aus_phenology/data/pickle/IBRA_subregions_AusENDVI-clim_MCD43A4_phenometrics_new.pkl'
with open(phenometrics_file, 'rb') as f:
    eco_regions_phenometrics = pickle.load(f)

#NDVI timeseries processed earlier to daily
with open(timeseries_file, 'rb') as f:
    results = pickle.load(f)

ds_8day = xr.open_dataset('/g/data/os22/chad_tmp/Aus_phenology/data/NDVI_mcd43a4_8day_2014_2020.nc')['NDVI']
ds_8day = assign_crs(ds_8day, 'epsg:4326')

### Process 8-day data to daily

Matching the smoothing and interpolation of manuscript.

In [None]:
geom = Geometry(geom=gdf[gdf['SUB_NAME_7'] == k].geometry.reset_index(drop=True)[0], crs=gdf.crs)

ds_8day_clip = ds_8day.odc.mask(poly=geom)
ds_8day_clip = ds_8day_clip.dropna(dim='latitude', how='all').dropna(dim='longitude', how='all')

In [None]:
# ds_8day_clip.sel(time='2016').plot(col='time', col_wrap=10);

In [None]:
#fill the all-NaN slices with a fill value
nan_mask = np.isnan(ds_8day_clip).sum('time') == len(ds_8day_clip.time)
ds_8day_clip = xr.where(nan_mask, -99, ds_8day_clip)

#interpolate away NaNs
ds_8day_clip = ds_8day_clip.interpolate_na(dim='time', method='cubic', fill_value="extrapolate")

# # Savitsky-Golay smoothing
ds_8day_clip = xr.apply_ufunc(
        scipy.signal.savgol_filter,
        ds_8day_clip,
        input_core_dims=[['time']],
        output_core_dims=[['time']],
        kwargs=dict(
            window_length=15,#higher time freq than monthly so higher window length to ~match 
            polyorder=3,
            deriv=0,
            mode='interp'),
        dask='parallelized'
    )

#now we can interpolate to daily
ds_8day_clip = ds_8day_clip.resample(time='1D').interpolate(kind='quadratic').astype('float32')

#now mask out -99 values
ds_8day_clip = ds_8day_clip.where(~nan_mask)

#now collapse spatial dims
ds_8day_clip = ds_8day_clip.mean(['latitude','longitude'])

## Plot monthly alongside 8-day upscaled NDVI

In [None]:
ds = results[k]

fig,ax=plt.subplots(1,1, figsize=(6,3))
ds.plot(ax=ax, color='tab:blue', linestyle='--', linewidth=1.0, label='Monthly upscaled NDVI') #.sel(time=slice('1997','2016'))

ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(eco_regions_phenometrics[k].SOS.values, eco_regions_phenometrics[k].SOS_year.values)],
       y=eco_regions_phenometrics[k].vSOS,
      c='tab:green', zorder=10)

ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(eco_regions_phenometrics[k].EOS.values, eco_regions_phenometrics[k].EOS_year.values)],
       y=eco_regions_phenometrics[k].vEOS,
      c='tab:purple', zorder=10)

ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(eco_regions_phenometrics[k].POS.values, eco_regions_phenometrics[k].POS_year.values)],
           y=eco_regions_phenometrics[k].vPOS,
          c='black', zorder=10)

ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(eco_regions_phenometrics[k].TOS.values, eco_regions_phenometrics[k].TOS_year.values)],
       y=eco_regions_phenometrics[k].vTOS,
      c='tab:orange', zorder=10)


ds_8day_clip.plot(ax=ax, color='tab:red', linestyle='--', linewidth=1.0, label='8-day upscaled NDVI')

ax.set_xlim(pd.to_datetime(330, unit='D', origin='2014'), pd.to_datetime(30, unit='D', origin='2020'))
ax.set_xlabel(None)
ax.set_ylabel('NDVI')
ax.set_title(k, fontsize=12)
ax.legend()
ax.grid(which='both')
# ax.set_ylim(0.5,0.9)

# fig.savefig(f'/g/data/os22/chad_tmp/Aus_phenology/results/figs/compare_temporal_periods_{k}.png',
#             bbox_inches='tight', dpi=300)

In [None]:
# gdf.explore(column='SUB_NAME_7',
#             tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
#             attr = 'Esri',
#             name = 'Esri Satellite',
#             control = True,
#             legend=False
#            )

## Process 8-day NDVI

We'll use this to compare against monthly.

In [None]:
# # Grab a common grid to reproject all datasets too 
# gbox_path = f'/g/data/xc0/project/AusEFlux/data/grid_5km'
# with open(gbox_path, 'rb') as f:
#     gbox = pickle.load(f)

In [None]:
# base = '/g/data/ub8/au/'
# years=['2014','2015', '2016', '2017', '2018', '2019', '2020']

# arrs=[]
# for year in years:
#     print(year)
#     modis_sr_inputs = {
#         'SR_B1': 'MODIS/mosaic/MCD43A4.006/MCD43A4.006.b01.500m_0620_0670nm_nbar.'+year+'.nc',
#         'SR_B2': 'MODIS/mosaic/MCD43A4.006/MCD43A4.006.b02.500m_0841_0876nm_nbar.'+year+'.nc',
#          }
    
#     d = {}
#     for k,i in modis_sr_inputs.items():
        
#         #open and do some prelim processing
#         ds = xr.open_dataset(base+i, chunks='auto')
#         ds = assign_crs(ds, crs='epsg:4326')
#         ds = ds.to_array()
#         ds = ds.squeeze().drop_vars('variable')
#         ds.attrs['nodata'] = np.nan
#         ds = ds.rename(k)        
#         d[k] = ds #add to dict
    
#     ds = (d['SR_B2'] - d['SR_B1']) / (d['SR_B2'] + d['SR_B1'])
    
#     ds.attrs['nodata'] = np.nan
    
#     # resample spatial
#     ds = ds.odc.reproject(gbox, resampling='average').compute()  # bring into memory
    
#     #tidy up
#     ds = round_coords(ds)
#     ds = ds.rename('NDVI')
#     ds = ds.where(ds>0)
#     arrs.append(ds)

# ds = xr.concat(arrs, dim='time').sortby('time')

# ds.to_netcdf('/g/data/os22/chad_tmp/Aus_phenology/data/NDVI_mcd43a4_8day_2014_2020.nc')

In [None]:
# # Grab a common grid to reproject all datasets too 
# gbox_path = f'/g/data/xc0/project/AusEFlux/data/grid_5km'
# with open(gbox_path, 'rb') as f:
#     gbox = pickle.load(f)

# years=['2014','2015', '2016', '2017', '2018', '2019', '2020']

# data_path = '/g/data/ub8/au/OzWALD/8day/NDVI/'

# arrs=[]
# for y in years:
#     ds = xr.open_dataset(f'{data_path}OzWALD.NDVI.{y}.nc', chunks="auto")['NDVI']
#     ds = ds.transpose('time', 'latitude', 'longitude')
#     ds = assign_crs(ds, 'epsg:4326')
#     ds.attrs['nodata'] = np.nan
#     # resample spatial
#     ds = ds.odc.reproject(gbox, resampling='average').compute()
#     ds = round_coords(ds)
#     ds = ds.rename('NDVI')
#     arrs.append(ds)

# ds_8day = xr.concat(arrs, dim='time').sortby('time')

# ds_8day.to_netcdf('/g/data/os22/chad_tmp/Aus_phenology/data/NDVI_8day_2014_2020.nc')