# Extract phenometrics over IBRA subregions

This is both for testing, and produce estimates for example regions to highlight the function of the algorithm etc.

In [None]:
%matplotlib inline

import sys
import pickle
import warnings
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from odc.geo.geom import Geometry
from odc.geo.xr import assign_crs

sys.path.append('/g/data/os22/chad_tmp/Aus_phenology/src')
from phenology_pixel import _extract_peaks_troughs, xr_phenometrics

# %load_ext autoreload
# %autoreload 2

## Analysis Parameters


In [None]:
product='AusENDVI-clim_MCD43A4'
timeseries_file = '/g/data/os22/chad_tmp/Aus_phenology/data/pickle/IBRA_subregions_NDVI_'+product+'.pkl'
save_file = '/g/data/os22/chad_tmp/Aus_phenology/data/pickle/IBRA_subregions_'+product+'_phenometrics_new.pkl'
ecoregions_file = '/g/data/os22/chad_tmp/Aus_phenology/data/vectors/IBRAv7_subregions_modified.geojson'
var='SUB_NAME_7'
region_type = 'IBRA_subregions'
years='1982-2022'

## Open data

In [None]:
#NDVI timeseries processed earlier to daily
with open(timeseries_file, 'rb') as f:
    results = pickle.load(f)

gdf = gpd.read_file(ecoregions_file)

#bare soil NDVI data
ss_path = f'/g/data/xc0/project/AusEFlux/data/ndvi_of_baresoil_5km.nc'
ss = assign_crs(xr.open_dataset(ss_path)['NDVI'], crs='epsg:4326')
ss.name = 'NDVI'

## Extract phenometrics 

<!-- import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

def double_logistic_function(t, wNDVI, mNDVI, S, A, mS, mA):
    sigmoid1 = 1 / (1 + np.exp(-mS * (t - S)))
    sigmoid2 = 1 / (1 + np.exp(mA * (t - A)))
    seasonal_term = sigmoid1 + sigmoid2 - 1
    return wNDVI + (mNDVI - wNDVI) * seasonal_term

def weight_function(t, S, A, r):
    tr = 100 * (t - S) / (A - S)
    tr = np.clip(tr, 0, 100)
    return np.exp(-np.abs(r) / (1 + tr / 10))

def fit_curve(t, ndvi_observed):
    initial_guess = [np.min(ndvi_observed), np.max(ndvi_observed), np.mean(t), np.mean(t), 1, 1]
    params, _ = curve_fit(double_logistic_function, t, ndvi_observed, p0=initial_guess, maxfev=10000)
    residuals = ndvi_observed - double_logistic_function(t, *params)
    weights = weight_function(t, params[2], params[3], residuals)
    params, _ = curve_fit(double_logistic_function, t, ndvi_observed, p0=initial_guess, sigma=weights, maxfev=10000)
    return params

doys = ndvi_cycle.time.dt.dayofyear.values[2:]
doys_frac = doys/365
values = ndvi_cycle.values[2:]

##Fit the curve
parameters = fit_curve(doys_frac, values)

##Plot the observed NDVI values
plt.scatter(doys, values, label='Observed NDVI')

##Generate points for the fitted curve
t_fit = np.linspace(min(doys_frac), max(doys_frac), 365)
ndvi_fit = double_logistic_function(t_fit, *parameters)

##Plot the fitted curve
plt.plot(t_fit*365, ndvi_fit, label='Fitted Curve', color='red')

plt.xlabel('Day of the Year')
plt.ylabel('NDVI')
plt.legend()
plt.title('Double Logistic Curve Fitting for NDVI Observations')
plt.show() -->

In [None]:
pheno={}
i=0
for index, row in gdf.iterrows():
    print(" {:02}/{:02}\r".format(i + 1, len(range(0, len(gdf)))), end="")

    if row['SUB_NAME_7'] == 'Coral Se':
        continue

    if row['SUB_NAME_7'] == 'Timor Sea Coral Islands':
        continue
    
    ds = results[row[var]]

    #bare soil NDVI clip to region
    geom = Geometry(geom=gdf.iloc[index].geometry, crs=gdf.crs)
    soil = ss.odc.mask(poly=geom)
    soil = soil.mean().values.item()

    # fake expand dims
    ds = ds.expand_dims(latitude=[-33.0],longitude=[135.0])
    
    #apply pheno
    p = xr_phenometrics(ds,
                    rolling=90,
                    distance=90,
                    prominence='auto',
                    plateau_size=10,
                    amplitude=0.2,
                    verbose=True,
                    soil_signal=soil,
                       ).compute()
    
    p = p.squeeze().drop_vars(['latitude','longitude']).to_dataframe()

    pheno[row['SUB_NAME_7']] = p
    i+=1

## Save results

In [None]:
# with open(save_file, 'wb') as f:
#     pickle.dump(pheno, f)

## Plot some regions

In [None]:
gdf.explore(column='SUB_NAME_7',
            tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
            attr = 'Esri',
            name = 'Esri Satellite',
            control = True,
            legend=False
           )

In [None]:
phenometrics_file = '/g/data/os22/chad_tmp/Aus_phenology/data/pickle/IBRA_subregions_AusENDVI-clim_MCD43A4_phenometrics_new.pkl'
with open(phenometrics_file, 'rb') as f:
    eco_regions_phenometrics = pickle.load(f)

In [None]:
k = 'Victorian Alps' #'Geraldton Hills rainfed crop' 'Weipa Plateau'

In [None]:
ds = results[k] #'Recherche rainfed crop'

fig,ax=plt.subplots(1,1, figsize=(15,5))
ds.plot(ax=ax, color='tab:blue', linestyle='--', linewidth=1.0, label='Daily NDVI') #.sel(time=slice('1997','2016'))

# ax.set_ylim(0.15, 0.7)
ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(eco_regions_phenometrics[k].SOS.values, eco_regions_phenometrics[k].SOS_year.values)],
       y=eco_regions_phenometrics[k].vSOS,
      c='tab:green', label='SOS', zorder=10)

ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(eco_regions_phenometrics[k].EOS.values, eco_regions_phenometrics[k].EOS_year.values)],
       y=eco_regions_phenometrics[k].vEOS,
      c='tab:purple', label='EOS', zorder=10)

ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(eco_regions_phenometrics[k].POS.values, eco_regions_phenometrics[k].POS_year.values)],
           y=eco_regions_phenometrics[k].vPOS,
          c='black', label='POS', zorder=10)

ax.scatter(x=[pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(eco_regions_phenometrics[k].TOS.values, eco_regions_phenometrics[k].TOS_year.values)],
       y=eco_regions_phenometrics[k].vTOS,
      c='tab:orange', label='TOS', zorder=10)

ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_title(k, fontsize=12)
ax.grid()
# plt.ylim(0.1,0.4)

In [None]:
ds.sel(time=slice('1982','2000')).groupby('time.month').mean().plot(label='1982-1999')
ds.sel(time=slice('2010','2021')).groupby('time.month').mean().plot(label='2000-2021')
plt.legend()
plt.title(k+' NDVI average annual cycle');

In [None]:
eco_regions_phenometrics[k].SOS.plot()

In [None]:
(eco_regions_phenometrics[k].EOS).plot()
# plt.ylim(100,400)

In [None]:
# sos = [pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(eco_regions_phenometrics[k].SOS.values, eco_regions_phenometrics[k].SOS_year.values)]
# eos = [pd.to_datetime(d-1, unit='D', origin=str(int(y))) for d,y in zip(eco_regions_phenometrics[k].EOS.values, eco_regions_phenometrics[k].EOS_year.values)]
# sos = np.array(sos)
# eos = np.array(eos)

# var='EOS'
# identical_dayofyear = [pd.to_datetime(int(eco_regions_phenometrics[k][var].mean()), unit='D', origin=str(int(y))) for y in range(int(eco_regions_phenometrics[k][f'{var}_year'][0].item()), int(eco_regions_phenometrics[k][f'{var}_year'][-1:].item() +1))]

# plt.plot(eco_regions_phenometrics[k].index, identical_dayofyear, label='if EOS was identical every year');
# plt.plot(eos, label='Actual EOS datetimes')
# plt.legend();

## Cirular linear regression

https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2017WR021731

## Chat GPT response to doing trends in DOY

To resolve this issue, you can "wrap around" the day-of-year values to make events near the end and start of the year more comparable. This can be achieved by considering the year as circular rather than linear. A useful approach is to use trigonometric transformations to map day-of-year values onto a unit circle. This avoids the discontinuity between day 365 (or 366) and day 1.

### Steps to Resolve:
1. **Convert Day-of-Year to Circular Coordinates**:
   - Represent the day-of-year \( d \) as an angle in radians:
     \[
     \theta = \frac{2\pi \cdot d}{\text{days\_in\_year}}
     \]
   - Compute the x and y coordinates on the unit circle:
     \[
     x = \cos(\theta), \quad y = \sin(\theta)
     \]

2. **Fit Trends Using Circular Coordinates**:
   - Use \( x \) and \( y \) values to calculate the trend. For example, you can apply a linear regression on these coordinates over time.

3. **Convert Back to Day-of-Year for Interpretation**:
   - After calculating a trend in \( x \) and \( y \), you can convert back to a day-of-year using:
     \[
     d = \frac{\arctan2(y, x) \cdot \text{days\_in\_year}}{2\pi}
     \]
   - Adjust for negative angles to ensure \( d \) lies within [1, days_in_year].

This approach captures the circular nature of the calendar and ensures events near the year's end and beginning are treated a approach can be used with more sophisticated models, such as Fourier transforms or circular statistics, if needed.

In [None]:
gdf.explore(column='SUB_NAME_7',
            tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
            attr = 'Esri',
            name = 'Esri Satellite',
            control = True,
            legend=False
           )

In [None]:
from phenology_pixel import mk_with_slopes
from scipy.stats import circmean
import numpy as np
import pandas as pd
from scipy.stats import linregress

In [None]:
k = 'Claraville Plains' #'Victorian Alps' #'Geraldton Hills rainfed crop' 'Weipa Plateau', Claraville Plains
df = eco_regions_phenometrics[k]
var='SOS'

In [None]:
# Example data: day-of-year events and corresponding years
data = pd.DataFrame({
    "year": df[f'{var}_year'].values,
    "day_of_year": df[var].values
})

# Number of days in a year (adjusting for leap years if needed)
data['days_in_year'] = data['year'].apply(lambda y: 366 if y % 4 == 0 and (y % 100 != 0 or y % 400 == 0) else 365)

# Convert day-of-year to circular coordinates doy / 365 * 2 * np.pi
data['theta'] = data['day_of_year']*((2*np.pi)/data['days_in_year'])
data['theta_unwrap'] = np.unwrap(data['theta'])

# Calculate circular mean
circular_mean = circmean(data['theta'])
circular_mean_doy = circular_mean / (2 * np.pi) * 365
print(f"Circular mean {var} DOY: {circular_mean_doy}")
print(f"Linear mean {var} DOY: {df[var].mean()}")
print('\n')

# p_value, slope, intercept = mk_with_slopes(data['theta'])
p_value, slope, intercept = mk_with_slopes(data['theta_unwrap'])
p_value_alt, slope_alt, intercept_alt = mk_with_slopes(data['theta_unwrap'])
slope_doy = slope * 365 / (2 * np.pi)

print(f"{var} circular slope magnitude: {slope:.4f}")
print(f"{var} circular slope magnitude converted to DOY units: {slope_doy:.4f}")
# print(f"{var} unwrapped linear slope: {slope_alt:.4f}")

print(f"{var} linear slope magnitude: {mk_with_slopes(df[var]).slope}")
print('\n')
print(f"{var} circular p-value: {p_value:.4f}")
print(f"{var} linear p-value: {mk_with_slopes(df[var]).p:.4f}")

In [None]:
fig,ax=plt.subplots(3,1, figsize=(9,10), sharex=True)

df[var].plot(ax=ax[0])
data['theta'].plot(ax=ax[1],label='theta')
data['theta_unwrap'].plot(ax=ax[2],label=f'x, slope={slope:.4f}')
# data['day_of_year_unwrap'].plot(ax=ax[1,1],label='day_of_year_unwrap')
ax[0].grid(alpha=0.75)
ax[1].grid(alpha=0.75)
ax[2].grid(alpha=0.75)
ax[0].set_title(f'{k}: {var} slope={mk_with_slopes(df[var]).slope:.3f} (days/yr), Mean={df[var].mean():.0f} DOY')
ax[1].set_title('theta')
ax[2].set_title(f'theta unwrapped, slope={slope_doy:.3f} (days/yr), Mean={circular_mean_doy:.0f} DOY');

In [None]:
# Sample data: day-of-year and year
years = np.array([2000, 2001, 2002, 2003, 2004])

# days_of_year = np.array([360, 362, 364, 1, 3])  #later (positive) 
days_of_year = np.array([3, 1, 364, 362, 360])  #earlier (negative) 
# days_of_year = np.array([158, 156, 154, 152, 150])  # earlier (negative)
# days_of_year = np.array([150, 152, 154, 156, 158])  # later (postive)

# Step 1: Convert day-of-year to circular angles
theta = days_of_year / 365 * 2 * np.pi  # Convert to radians

# Step 2: Unwrap the angles to handle continuity
theta_unwrapped = np.unwrap(theta)

# Step 3: Fit linear regression on unwrapped angles
time = years - years[0]
reg = LinearRegression().fit(time.reshape(-1, 1), theta_unwrapped)
slope_angle_per_year = reg.coef_[0]  # Angular slope (radians/year)

# Step 4: Convert slope to days/year
slope_days_per_year = slope_angle_per_year * (365 / (2 * np.pi))

# Determine direction: Positive (later), Negative (earlier)
direction = "later (positive)" if slope_days_per_year > 0 else "earlier (negative)"

print(f'{slope_days_per_year:.3f} (days/year) and direction is {direction}')

In [None]:
plt.plot(years, theta)

In [None]:
# Example data: day-of-year events and corresponding years
data = pd.DataFrame({
    "year": df[f'{var}_year'].values,
    "day_of_year": df[var].values
})

# Number of days in a year (adjusting for leap years if needed)
data['days_in_year'] = data['year'].apply(lambda y: 366 if y % 4 == 0 and (y % 100 != 0 or y % 400 == 0) else 365)

# Convert day-of-year to circular coordinates doy / 365 * 2 * np.pi
data['theta'] = data['day_of_year']*((2*np.pi)/data['days_in_year'])
# Unwrap Angular Data: Use np.unwrap to handle angular continuity
# for both wraparound and non-wraparound cases.
data['theta_unwrap'] = np.unwrap(data['theta'])
data['x'] = np.cos(data['theta'])
data['y'] = np.sin(data['theta'])

# Calculate circular mean
circular_mean = circmean(data['theta'])

circular_mean_doy = circular_mean / (2 * np.pi) * 365
print(f"Circular mean {var} DOY: {circular_mean_doy}")
print(f"Linear mean {var} DOY: {df[var].mean()}")
print('\n')

# p_value, slope, intercept = mk_with_slopes(data['theta'])
p_value_x, slope_x, intercept_x = mk_with_slopes(data['x'])
p_value_y, slope_y, intercept_y = mk_with_slopes(data['y'])

# Linear regression for x and y separately
slope_magnitude = np.sqrt(slope_x**2 + slope_y**2)
slope_angle = np.arctan2(slope_y, slope_x)  # angle in radians
slope_magnitude_doy = slope_magnitude * 365 / (2 * np.pi)

combined_p_value = max(p_value_x, p_value_y)
# # Ensure DOY slope has the correct sign
# if slope_angle < 0:
#     slope_magnitude = slope_magnitude*-1
#     slope_magnitude_doy = slope_magnitude_doy*-1
# else:
#     slope_magnitude = slope_magnitude*1
#     slope_magnitude_doy  = slope_magnitude_doy*1

# Output the results
print(f"{var} slope angle: {slope_angle:.4f}")
# print(f"{var} slope angle DOY: {slope_angle_doy:.4f}")
print(f"{var} circular slope magnitude: {slope_magnitude:.4f}")
print(f"{var} circular slope magnitude converted to DOY units: {slope_magnitude_doy:.4f}")

print(f"{var} linear slope magnitude: {mk_with_slopes(df[var]).slope}")
print('\n')
print(f"{var} circular p-value: {combined_p_value:.4f}")
print(f"{var} linear p-value: {mk_with_slopes(df[var]).p:.4f}")

In [None]:
# Example slopes for x and y
slope_x = -0.02  # change in x per year
slope_y = 0.01  # change in y per year

# Compute magnitude and direction of the slope
slope_magnitude = np.sqrt(slope_x**2 + slope_y**2)
slope_angle = np.arctan2(slope_y, slope_x)  # angle in radians

# Convert angle to change in day-of-year (DOY)
days_in_year = 365
doy_change = (slope_angle / (2 * np.pi)) * days_in_year

# Ensure DOY slope has the correct sign
# if doy_change < 0:
#     slope_signed = -slope_magnitude
# else:
#     slope_signed = slope_magnitude

# Output results
print(f"Slope angle: {slope_angle:.4f}")
print(f"Slope magnitude: {slope_magnitude:.4f} (units/year)")
print(f"Slope in DOY units (with sign): {slope_signed:.4f} days/year")

In [None]:
# Example data: day-of-year events and corresponding years
data = pd.DataFrame({
    "year": df[f'{var}_year'].values,
    "day_of_year": df[var].values
})

# Number of days in a year (adjusting for leap years if needed)
data['days_in_year'] = data['year'].apply(lambda y: 366 if y % 4 == 0 and (y % 100 != 0 or y % 400 == 0) else 365)

# Convert day-of-year to circular coordinates doy / 365 * 2 * np.pi
data['theta'] = 2 * np.pi * data['day_of_year'] / data['days_in_year']

# Calculate circular mean
circular_mean = circmean(data['theta'])
circular_mean_doy = circular_mean / (2 * np.pi) * 365
print(f"Circular mean {var} DOY: {circular_mean_doy}")
print(f"Linear mean {var} DOY: {df[var].mean()}")
print('\n')

# Linear regression for x and y separately
# slope_x, intercept_x, r_value_x, p_value_x, std_err_x = linregress(data['year'], data['x'])
# slope_y, intercept_y, r_value_y, p_value_y, std_err_y = linregress(data['year'], data['y'])
p_value, slope, intercept = mk_with_slopes(data['theta'])
# p_value_y, slope_y, intercept_y = mk_with_slopes(data['y'])

# # Combine slopes into a single magnitude and direction
slope_magnitude = np.sqrt(slope_x**2 + slope_y**2)
# slope_angle = np.arctan2(slope_y, slope_x)  # angle in radians
slope_magnitude_doy = slope * 365 / (2 * np.pi)

# slope_angle_doy = (slope_angle/(2*np.pi) * 365)

# Ensure DOY slope has the correct sign
if slope_angle < 0:
    slope_magnitude = slope_magnitude*-1
    slope_magnitude_doy = slope_magnitude_doy*-1
else:
    slope_magnitude = slope_magnitude*1
    slope_magnitude_doy  = slope_magnitude_doy*1

# Output the results
# print(f"{var} slope angle: {slope_angle:.4f}")
# print(f"{var} slope angle DOY: {slope_angle_doy:.4f}")
print(f"{var} circular slope magnitude: {slope:.4f}")
print(f"{var} circular slope magnitude converted to DOY units: {slope_magnitude_doy:.4f}")

print(f"{var} linear slope magnitude: {mk_with_slopes(df[var]).slope}")
print('\n')
print(f"{var} circular p-value: {p_value:.4f}")
print(f"{var} linear p-value: {mk_with_slopes(df[var]).p:.4f}")

In [None]:
# # Fit linear trend on circular coordinates
# from sklearn.linear_model import LinearRegression

# model_x = LinearRegression().fit(data[['year']], data['x'])
# model_y = LinearRegression().fit(data[['year']], data['y'])

# # Predict future circular coordinates
# future_years = np.array([[2024], [2025]])
# x_pred = model_x.predict(future_years)
# y_pred = model_y.predict(future_years)

# # Convert predicted circular coordinates back to day-of-year
# predicted_theta = np.arctan2(y_pred, x_pred)
# predicted_day_of_year = (predicted_theta * (365 if not any(future_years % 4 == 0) else 366) / (2 * np.pi)) % 365
# predicted_day_of_year = np.where(predicted_day_of_year < 1, predicted_day_of_year + 365, predicted_day_of_year)

# # Output predictions
# print(predicted_day_of_year)