# Validate phenology

Possibilities:
* Validate against
    * flux towers,
    * VODCAv2,
    * TERN phenology
    * PKU-GIMMS.
    * MODIS only
* Inspect how the transition between AVHRR and MODIS impacts trends in seasonal cycle for a sample of ecoregions. And the gapfilling.

In [None]:
%matplotlib inline
import os
import sys
import xarray as xr
import numpy as np
import pandas as pd
import scipy.signal
import matplotlib.pyplot as plt
from odc.geo.xr import assign_crs
import warnings
warnings.simplefilter(action='ignore')

sys.path.append('/g/data/os22/chad_tmp/Aus_phenology/src')
from phenology import phenometrics

sys.path.append('/g/data/os22/chad_tmp/AusEFlux/src/')
from _training import extract_ozflux

%load_ext autoreload
%autoreload 2

## Flux towers

### Get flux data
Flux tower data from OzFlux is in the AusEFlux folder so no need to redownload

Only load sites with >80% homogenous landcover. Using Yi's paper
https://www.sciencedirect.com/science/article/pii/S0034425724001949

Also exclude sites with complex topography as 5km pixel can't account for that.

In [None]:
ds = extract_ozflux(version='2023_v1',
                        level='L6',
                        type='default',
                        timestep='Monthly',
                        # rs_data_folder='/g/data/os22/chad_tmp/AusEFlux/data/5km/',
                        # save_ec_data='/g/data/os22/chad_tmp/AusEFlux/data/ozflux_netcdf/',
                        return_coords=True,
                        verbose=False,
                        export_path='/g/data/os22/chad_tmp/Aus_phenology/data/ozflux/'
                        )

In [None]:
base = '/g/data/os22/chad_tmp/Aus_phenology/'
sites = os.listdir(f'{base}data/ozflux/')
homogenous = ['AliceSpringsMulga','DalyUncleared','CowBay', #'CapeTribulation', 'RobsonCreek',
              'DryRiver','Gingin','RiggsCreek','TiTreeEast',
              'Tumbarumba','Whroo','WombatStateForest']

flux_ts = {}
for site in sites:
    if '.csv' in site:
        if any(h in site for h in homogenous): #only homogenous sites
            print(site[0:-4])
            xx = pd.read_csv(f'{base}data/ozflux/{site}',
                             index_col='time', parse_dates=True)

            #convert to 1d xarray
            lat, lon = xx[['y_coord']].iloc[0].y_coord, xx[['x_coord']].iloc[0].x_coord
            xx = xx[['GPP_SOLO_EC']]  #'x_coord', 'y_coord'
            xx = xx.to_xarray().expand_dims(dim={'latitude':[lat], 'longitude':[lon]})
            xx = xx['GPP_SOLO_EC'].squeeze()
            flux_ts[site[0:-4]] = xx
        else:
            continue

### Extract phenology forom flux towers and NDVI

In [None]:
# # Savitsky-Golay smoothing function
def sg_smooth(ds, window, poly, deriv):
    return xr.apply_ufunc(
        scipy.signal.savgol_filter,
        ds,
        input_core_dims=[['time']],
        output_core_dims=[['time']],
        kwargs=dict(
            window_length=window,
            polyorder=poly,
            deriv=deriv,
            mode='interp'),
        dask='parallelized'
    )

#### Open NDVI data

In [None]:
path = '/g/data/os22/chad_tmp/AusENDVI/results/publication/AusENDVI-clim_MCD43A4_gapfilled_1982_2022.nc'
# path = '/g/data/os22/chad_tmp/AusENDVI/data/NDVI_harmonization/MODIS_NDVI_5km_monthly_200003_202212.nc'
# path = '/g/data/os22/chad_tmp/AusEFlux/data/5km/NDVI_5km.nc'
# path = '/g/data/os22/chad_tmp/AusENDVI/data/NDVI_harmonization/AVHRR_GIMMS3g_v1.1_1982_2013.nc'
# path = '/g/data/os22/chad_tmp/AusENDVI/data/NDVI_harmonization/AVHRR_GIMMS-PKU-MODIS_1982_2022.nc'
# path = '/g/data/os22/chad_tmp/NEE_modelling/data/5km/GOSIF_GPP_5km_monthly_2002_2021.nc'
# path = '/g/data/os22/chad_tmp/NEE_modelling/data/5km/EVI_5km_monthly_2002_2021.nc'
# path = '/g/data/os22/chad_tmp/NEE_modelling/data/5km/LAI_5km_monthly_2002_2021.nc'
# path = '/g/data/os22/chad_tmp/NEE_modelling/data/5km/FPAR_5km_monthly_2002_2021.nc'
# path = '/g/data/os22/chad_tmp/NEE_modelling/data/1km/kNDVI_1km_monthly_2002_2022.nc'
# path = '/g/data/os22/chad_tmp/NEE_modelling/data/1km/NDWI_1km_monthly_2002_2022.nc'
# path = '/g/data/os22/chad_tmp/NEE_modelling/data/1km/LST_Tair_1km_monthly_2002_2022.nc'
# path = '/g/data/os22/chad_tmp/NEE_modelling/data/1km/DIFFUSE_GPP_1km_monthly_2003_2021.nc'
# path = '/g/data/os22/chad_tmp/Aus_phenology/data/VODCAv2_1988_2021.nc'
# path = '/g/data/os22/chad_tmp/NEE_modelling/results/predictions/AusEFlux_GPP_2003_2022_1km_quantiles_v1.1.nc'

#open GPP
# ds = xr.open_dataset(path)
# ds = ds['NDVI']

ds = xr.open_dataset(path)
ds = ds.rename({'AusENDVI_clim_MCD43A4':'NDVI'})
ds = ds['NDVI']

#### Process timeseries and calculate phenometrics

In [None]:
flux_pheno = {}
ndvi_pheno = {}
flux_tss = {}
ndvi_tss = {}

for k,v in flux_ts.items():
    
    # Index NDVI at location and time so we have matching tim series
    lat,lon = v.latitude, v.longitude
    ndvi = ds.sel(latitude=lat, longitude=lon, method='nearest', time=v.time)
    ndvi = ndvi.drop_duplicates(dim='time')
    v = v.sel(time=ndvi.time, method='nearest')

    #smooth
    ndvi = ndvi.resample(time="2W").interpolate("linear")
    v = v.resample(time="2W").interpolate("linear")
    ndvi=sg_smooth(ndvi,window=11, poly=3, deriv=0)
    v=sg_smooth(v,window=11, poly=3, deriv=0)
    
    #interpolate
    v = v.sel(time=slice('2000','2021')) #make sure ts isn't longer than NDVI
    ndvi = ndvi.sel(time=slice('2000','2021'))
    ndvi = ndvi.dropna(dim='time',
            how='all').resample(time='1D').interpolate(kind='quadratic')
    v = v.dropna(dim='time',
            how='all').resample(time='1D').interpolate(kind='quadratic')
    

    #phenology
    ndvi_phen = phenometrics({k:ndvi}, verbose=False)
    v_phen = phenometrics({k:v}, verbose=False)
    
    # add results to dict for plotting
    flux_tss[k] = v
    ndvi_tss[k] = ndvi
    flux_pheno[k] = v_phen[k]
    ndvi_pheno[k] = ndvi_phen[k]


### Plot

In [None]:
with plt.style.context('ggplot'):
    fig,axes = plt.subplots(5,2, figsize=(20,15), layout='constrained')
    for ax, (k,v) in zip(axes.reshape(-1), flux_tss.items()):
        ax1 = ax.twinx()
        v.plot(ax=ax1, c='tab:blue')
        ndvi_tss[k].plot(ax=ax, c='tab:red')
        ax.set_title(k)
        ax1.set_title(None)
        ax1.grid(None)
        ax.grid(axis='y', which='both')
        ax.set_xlabel(None)
        ax1.set_xlabel(None)
        ax1.set_xlabel(None)
        ax1.set_ylabel('GPP')
        ax.set_ylim(0.10,0.9)
        ax1.set_ylim(-1,350)
    
        
        # ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(ndvi_pheno[k].SOS.values, ndvi_pheno[k].SOS_year.values)],
        #            y=ndvi_pheno[k].vSOS,
        #           c='tab:green', label='SOS', zorder=10)
        
        # ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(ndvi_pheno[k].EOS.values, ndvi_pheno[k].EOS_year.values)],
        #            y=ndvi_pheno[k].vEOS,
        #           c='tab:purple', label='EOS', zorder=10)
        
        # ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(ndvi_pheno[k].POS.values, ndvi_pheno[k].POS_year.values)],
        #                y=ndvi_pheno[k].vPOS,
        #               c='black', label='POS', zorder=10)
            
        # ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(ndvi_pheno[k].TOS.values, ndvi_pheno[k].TOS_year.values)],
        #            y=ndvi_pheno[k].vTOS,
        #           c='tab:orange', label='TOS', zorder=10)
# fig.savefig('/g/data/os22/chad_tmp/Aus_phenology/results/figs/flux_tower_validate.png',
#             bbox_inches='tight', dpi=300)

In [None]:
# v.plot(figsize=(14,5))

In [None]:
# with plt.style.context('ggplot'):
#     fig,axes = plt.subplots(5,2, figsize=(20,15), layout='constrained')
#     for ax, (k,v) in zip(axes.reshape(-1), flux_tss.items()):
#         v.plot(ax=ax, c='tab:blue', label='OzFlux')
#         ndvi_tss[k].plot(ax=ax, c='tab:red', label='AusEFlux')
#         ax.set_title(k)
#         ax.grid(axis='y', which='both')
#         ax.set_xlabel(None)
#         ax.set_ylabel('GPP gC/m2/month')
#         ax.legend()
#         # ax.set_ylim(0.10,0.9)
#         # ax1.set_ylim(-1,350)
    
        
#         # ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(ndvi_pheno[k].SOS.values, ndvi_pheno[k].SOS_year.values)],
#         #            y=ndvi_pheno[k].vSOS,
#         #           c='tab:green', label='SOS', zorder=10)
        
#         # ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(ndvi_pheno[k].EOS.values, ndvi_pheno[k].EOS_year.values)],
#         #            y=ndvi_pheno[k].vEOS,
#         #           c='tab:purple', label='EOS', zorder=10)
        
#         # ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(ndvi_pheno[k].POS.values, ndvi_pheno[k].POS_year.values)],
#         #                y=ndvi_pheno[k].vPOS,
#         #               c='black', label='POS', zorder=10)
            
#         # ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(ndvi_pheno[k].TOS.values, ndvi_pheno[k].TOS_year.values)],
#         #            y=ndvi_pheno[k].vTOS,
#         #           c='tab:orange', label='TOS', zorder=10)
# fig.savefig('/g/data/os22/chad_tmp/Aus_phenology/results/figs/flux_tower_validate_GPP.png',
#             bbox_inches='tight', dpi=300)

In [None]:
    # # Index NDVI at location and time so we have matching tim series
    # lat,lon = v.latitude, v.longitude
    # ndvi = ds.sel(latitude=lat, longitude=lon, method='nearest')
    
    # #smooth
    # ndvi = ndvi.resample(time="2W").interpolate("linear")
    # v = v.sel(time=ndvi.time, method='nearest')
    # ndvi=sg_smooth(ndvi, window=11, poly=3, deriv=0)
    # v=sg_smooth(v, window=11, poly=3, deriv=0)

    # #interpolate
    # v = v.drop_duplicates(dim='time')
    # ndvi = ndvi.dropna(dim='time',
    #         how='all').resample(time='1D').interpolate(kind='quadratic')
    # v = v.dropna(dim='time',
    #         how='all').resample(time='1D').interpolate(kind='quadratic')

    # # same length of time for both ds
    # ndvi = ndvi.sel(time=v.time, method='nearest')
    # v = v.sel(time=ndvi.time, method='nearest')