# Sentinel-2 Vegetation Phenology
This notebook calculates vegetation phenology changes using Sentinel-2 data. To detect changes, the algorithm uses Normalized Difference Vegetation Index (NDVI) which is a common proxy for vegetation growth and health. The outputs of this notebook can be used to assess differences in agriculture fields over time or space and also allow the assessment of growing states such as planting and harvesting.  

## Load Data Cube Configuration and Import Utilities

In [None]:
import xarray as xr
import numpy as np  
import matplotlib.pyplot as plt

import datacube

import sys, os
os.environ['USE_PYGEOS'] = '0'

from dea_tools.plotting import display_map

### EASI tools
sys.path.append(os.path.expanduser('../scripts'))
from ceos_utils.data_cube_utilities.clean_mask import landsat_clean_mask_invalid, landsat_qa_clean_mask
from easi_tools import EasiDefaults
from easi_tools import notebook_utils
easi = EasiDefaults() # Get the default parameters for this system

In [None]:
cluster, client = notebook_utils.initialize_dask(use_gateway=False)
display(cluster if cluster else client)
print(notebook_utils.localcluster_dashboard(client, server=easi.hub))

In [None]:
from datacube.utils.aws import configure_s3_access
configure_s3_access(aws_unsigned=False, requester_pays=True, client=client)

In [None]:
# Select a Product and Platform
product = "s2_l2a"
platform = "Sentinel-2A"

## <span id="define_extents">Define the Extents of the Analysis [&#9652;](#top)</span>

In [None]:
# NEW Yield Data from Vietnam (18-Nov-2022)

# lat_long = (10.443492, 105.281103) # 17, Chau Thanh, Yield High
# lat_long = (10.4172, 105.3635) # 28, Chau Thanh, Low High

# lat_long = (10.454342, 105.322838) #6, Chau Thanh, High Yield
# lat_long = (10.434116, 105.273150) #13, Chau Thanh, Low Yield
# lat_long = (10.392899, 105.188514) #37, Chau Thanh, High Yield
# lat_long = (10.394341, 105.126836) #47, Chau Thanh, Low Yield
# lat_long = (10.356519, 105.309450) #146, Chau Thanh, High Yield
# lat_long = (10.354744, 105.336739) #142, Chau Thanh, Low Yield

# box_size_deg = 0.0004 # Typically yields 5x5 pixel region

# Calculate the latitude and longitude bounds of the analysis box
# latitude = (lat_long[0]-box_size_deg/2, lat_long[0]+box_size_deg/2)
# longitude = (lat_long[1]-box_size_deg/2, lat_long[1]+box_size_deg/2)

latitude = easi.latitude
longitude = easi.longitude

# Define Time Range 
# The format of the time date is YYYY-MM-DD
start_date = '2022-04-01'
end_date = '2022-09-01'
time_extents = (start_date,end_date)

In [None]:
# The code below renders a map that can be used to view the region.
display_map(longitude,latitude)

## Load bands needed for NDVI and remove clouds and water

In [None]:
dc = datacube.Datacube()

In [None]:
sentinel_dataset = dc.load(latitude = latitude,
                           longitude = longitude,
                           time = time_extents,
                           product = product,
                           group_by = 'solar_day',
                           measurements = ['red', 'nir', 'SCL'],
                           output_crs = 'EPSG:6933',
                           resolution = (-10,10),
                           dask_chunks = {'time':1})

In [None]:
# Filter data using SCL band classification

# scl=0 > No Data
# scl=1 > Saturated
# scl=3 > Cloud Shadows
# scl=6 > Water
# scl=8 > Cloud Medium Probability
# scl=9 > Cloud High Probability
# scl=10 > Thin Cirrus Cloud

cloud_mask = (sentinel_dataset.SCL != 0) & (sentinel_dataset.SCL != 1) & \
             (sentinel_dataset.SCL != 3) & (sentinel_dataset.SCL != 8) & \
             (sentinel_dataset.SCL != 9) & (sentinel_dataset.SCL != 10)

land_mask =  ((sentinel_dataset.SCL != 6) & cloud_mask)

# Drop the SCL data as it is no longer needed
sentinel_dataset = sentinel_dataset.drop('SCL')

# Apply land mask ... NO Clouds, NO Cloud Shadows and NO Water pixels
cleaned_dataset = sentinel_dataset.where(land_mask)

## Define NDVI and add it to the dataset

In [None]:
def NDVI(dataset):
    return (dataset.nir - dataset.red)/(dataset.nir + dataset.red)

In [None]:
sentinel_dataset['NDVI'] = NDVI(sentinel_dataset)

In [None]:
cleaned_dataset['NDVI'] = NDVI(cleaned_dataset)

In [None]:
cleaned_dataset

In [None]:
# Plot the monthly time slice data in a table
import pandas as pd
pd.DataFrame({'time': cleaned_dataset.time.values})

## Plot NDVI vs Time

In [None]:
nanmask = np.any(np.isfinite(cleaned_dataset.NDVI), axis=(1,2))
 
plt.figure(figsize=(12, 6))
plt.plot(cleaned_dataset.time[nanmask],
         cleaned_dataset['NDVI'][nanmask].median(dim=['x','y']),
         color='red',marker='o')
plt.xlabel("Index")
plt.ylabel("NDVI")
plt.title("NDVI = Vegetation Index");

In [None]:
# Output data to CSV
filename = "output.csv"
img3 = cleaned_dataset['NDVI']
img5 = img3.median(dim=['y','x'])
img5.to_dataframe().to_csv(filename)