# Testing Notebook


Random online python phenology functions that might help:

- https://gist.github.com/YanCheng-go/d4e17831f294199443d0f7682558e608

- https://github.com/JavierLopatin/PhenoPY



In [None]:
# !pip install richdem
# !pip install xarray --upgrade

In [1]:
%matplotlib inline

import datacube
import matplotlib.pyplot as plt
from odc.algo import xr_reproject
import hdstats
import numpy as np
import pandas as pd
import sys
import xarray as xr
import datetime as dt
import os

sys.path.append('../Scripts')
from deafrica_datahandling import load_ard
from deafrica_bandindices import calculate_indices
from deafrica_plotting import display_map, rgb
from deafrica_temporal_statistics import xr_phenology, temporal_statistics
from datacube.utils.geometry import assign_crs
from deafrica_dask import create_local_dask_cluster

import warnings
warnings.filterwarnings("ignore", "Mean of empty slice")
warnings.simplefilter("ignore", FutureWarning)

%load_ext autoreload
%autoreload 2

In [2]:
create_local_dask_cluster()

0,1
Client  Scheduler: tcp://127.0.0.1:45807  Dashboard: /user/chad/proxy/8787/status,Cluster  Workers: 1  Cores: 2  Memory: 14.18 GB


### Connect to the datacube

In [3]:
dc = datacube.Datacube(app='Vegetation_phenology')

### Analysis parameters


In [4]:
# Set the vegetation proxy to use
veg_proxy = 'NDVI'

# Define area of interest
lat = 22.817 #-34.288 
lon = 28.518 #20.012 
lon_buffer = 0.03#0.0175
lat_buffer = 0.03#0.004

# Combine central lat,lon with buffer to get area of interest
lat_range = (lat-lat_buffer, lat+lat_buffer)
lon_range = (lon-lon_buffer, lon+lon_buffer)

# Set the range of dates for the analysis
years_range = ('2018-01', '2018-06')

## View the selected location

In [5]:
# display_map(x=lon_range, y=lat_range)

## Load cloud-masked Sentinel-2 data

The first step is to load Sentinel-2 data for the specified area of interest and time range. 
The `load_ard` function is used here to load data that has been masked for cloud, shadow and quality filters, making it ready for analysis.

In [6]:
# Create a reusable query
query = {
    'y': lat_range,
    'x': lon_range,
    'time': years_range,
    'measurements': ['blue', 'green', 'red', 'nir_1'],
    'resolution': (-20,20),
    'output_crs': 'epsg:6933'
}

# Load available data from Landsat 8
ds = load_ard(dc=dc,
              products=['s2_l2a'],
              dask_chunks={'x':100, 'y':100,'time':-1},
              **query
              )

# Load available data from Landsat 8
ds1 = load_ard(dc=dc,
              products=['s2_l2a'],
              **query
              )

print(ds)

Using pixel quality parameters for Sentinel 2
Finding datasets
    s2_l2a
Applying pixel quality/cloud mask
Returning 34 time steps as a dask array
Using pixel quality parameters for Sentinel 2
Finding datasets
    s2_l2a
Applying pixel quality/cloud mask
Loading 34 time steps
<xarray.Dataset>
Dimensions:      (time: 34, x: 290, y: 354)
Coordinates:
  * time         (time) datetime64[ns] 2018-01-02T08:53:30 ... 2018-06-26T08:...
    spatial_ref  int32 6933
  * y            (y) float64 2.84e+06 2.84e+06 2.84e+06 ... 2.833e+06 2.833e+06
  * x            (x) float64 2.749e+06 2.749e+06 ... 2.754e+06 2.754e+06
Data variables:
    blue         (time, y, x) float32 dask.array<chunksize=(34, 100, 100), meta=np.ndarray>
    green        (time, y, x) float32 dask.array<chunksize=(34, 100, 100), meta=np.ndarray>
    red          (time, y, x) float32 dask.array<chunksize=(34, 100, 100), meta=np.ndarray>
    nir_1        (time, y, x) float32 dask.array<chunksize=(34, 100, 100), meta=np.ndarray>
At

**Once the load is complete**, we can plot the data as a true-colour image using the `rgb` function.  

In [7]:
# rgb(ds, index=[0,5], col_wrap=1)

In [8]:
# Calculate the chosen vegetation proxy index and add it to the loaded data set
# ds = (ds.nir - ds.red)/(ds.nir + ds.red)
ds = calculate_indices(ds, index=veg_proxy, collection='s2')
ds1 = calculate_indices(ds1, index=veg_proxy, collection='s2')
# ds

In [9]:
stats=['discordance','abs_change','complexity','f_mean','central_diff']

In [10]:
x = temporal_statistics(ds1.NDVI, stats=stats)
x

Completing...
   Statistics:
      discordance
      abs_change
      complexity
      f_mean
      central_diff


In [17]:
y = temporal_statistics(ds.NDVI, stats=stats).compute()
y

In [19]:
%%time
phen = xr_phenology(ds.NDVI,
                    method_sos='median',
                    method_eos='median',
                    complete='fast_complete',
                    smoothing='wiener').compute()
phen

CPU times: user 4.58 s, sys: 247 ms, total: 4.82 s
Wall time: 47.8 s


In [20]:
%%time
phen1 = xr_phenology(ds1.NDVI,
                    method_sos='median',
                    method_eos='median',
                    complete='fast_complete',
                    smoothing='wiener')
phen1

Completing using fast_complete...
   Smoothing with wiener filter...
      Phenology...
         POS
         EOS
         Trough
         vSOS
         vPOS
         vEOS
         LOS
         AOS
         ROG
         ROS
CPU times: user 1.04 s, sys: 95.9 ms, total: 1.14 s
Wall time: 1.12 s


In [21]:
phen.geobox

GeoBox(290, 354, Affine(20.0, 0.0, 2748700.0,
       0.0, -20.0, 2839960.0), PROJCS["WGS 84 / NSIDC EASE-Grid 2.0 Global",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Cylindrical_Equal_Area"],PARAMETER["standard_parallel_1",30],PARAMETER["central_meridian",0],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","6933"]])

In [None]:
#if dask arrays then map the blocks
    if dask.is_dask_collection(da):
        if version.parse(xr.__version__) < version.parse('0.16.0'):
            raise TypeError(
                "Dask arrays are not currently supported by this function, " +
                "run da.compute() before passing dataArray.")
        
        #create a template that matches the final datasets dims & vars
        arr = da.isel(time=0).drop('time')

        #deal with the case where fourier is first in the list
        if stats[0] in ("f_std", "f_median", "f_mean"):
            template = xr.zeros_like(arr).to_dataset(name=stats[0]+"_n1")
            template[stats[0]+'_n2'] = xr.zeros_like(arr)
            template[stats[0]+'_n3'] = xr.zeros_like(arr)

            for stat in stats[1:]:
                if stat in ("f_std", "f_median", "f_mean"):
                        template[stat+'_n1'] = xr.zeros_like(arr)
                        template[stat+'_n2'] = xr.zeros_like(arr)
                        template[stat+'_n3'] = xr.zeros_like(arr)
                else:
                    template[stat] = xr.zeros_like(arr)
        else:
            template = xr.zeros_like(arr).to_dataset(name=stats[0])

            for stat in stats:
                if stat in ("f_std", "f_median", "f_mean"):
                    template[stat+'_n1'] = xr.zeros_like(arr)
                    template[stat+'_n2'] = xr.zeros_like(arr)
                    template[stat+'_n3'] = xr.zeros_like(arr)
                else:
                     template[stat] = xr.zeros_like(arr)
        
        #ensure the time chunk is set to -1
        da=da.chunk({'time':-1})
        
        #apply function across chunks        
        ds = xr.map_blocks(
                calc_temporal_stats,
                da,
                kwargs={"stats": stats},
                template=template)

In [None]:
z

In [None]:
# def poly_fit(time, data, degree):
    
#     pfit = np.polyfit(time, data, degree) 
    
#     return np.transpose(np.polyval(pfit,time))

# def poly_fit_smooth(time, data, degree, n_pts):
#         """
#         """
    
#         time_smooth_inds = np.linspace(0, len(time), n_pts)
#         time_smooth = np.interp(time_smooth_inds, np.arange(len(time)), time)

#         data_smooth = np.array([np.array([coef * (x_val ** current_degree) for
#                                 coef, current_degree in zip(np.polyfit(time, data, degree),
#                                 range(degree, -1, -1))]).sum() for x_val in time_smooth])

#         return data_smooth

# def xr_polyfit(doy,
#                da,
#                degree,
#                interp_multiplier=1):    
    
#     # Fit polynomial curve to observed data points
#     if interp_multiplier==1:
#         print('Fitting polynomial curve to existing observations')
#         pfit = xr.apply_ufunc(
#             poly_fit,
#             doy,
#             da, 
#             kwargs={'degree':degree},
#             input_core_dims=[["time"], ["time"]], 
#             output_core_dims=[['time']],
#             vectorize=True,  
#             dask="parallelized",
#             output_dtypes=[da.dtype],
#         )
    
#     if interp_multiplier > 1:
#         print("Fitting polynomial curve to "+str(len(doy)*interp_multiplier)+
#                                                       " interpolated points")
#         pfit = xr.apply_ufunc(
#             poly_fit_smooth,  # The function
#             doy,# time
#             da,#.chunk({'time': -1}), #the data
#             kwargs={'degree':degree, 'n_pts':len(doy)*interp_multiplier},
#             input_core_dims=[["time"], ["time"]], 
#             output_core_dims=[['new_time']], 
#             output_sizes = ({'new_time':len(doy)*interp_multiplier}),
#             exclude_dims=set(("time",)),
#             vectorize=True, 
#             dask="parallelized",
#             output_dtypes=[da.dtype],
#         ).rename({'new_time':'time'})
    
#         # Map 'dayofyear' onto interpolated time dim
#         time_smooth_inds = np.linspace(0, len(doy), len(doy)*interp_multiplier)
#         new_datetimes = np.interp(time_smooth_inds, np.arange(len(doy)), doy)
#         pfit = pfit.assign_coords({'time':new_datetimes})
    
#     return pfit

# # da=xr_polyfit(dayofyear=dayofyear, 
# #               da=da,
# #               degree=degree,
# #               interp_multiplier=interp_multiplier)

In [None]:
# #set up figure
# fig, ax = plt.subplots(nrows=5,ncols=2,figsize=(18,25), sharex=True, sharey=True)

# #start of season
# temp_stats.discordance.plot(ax=ax[0,0])
# ax[0,0].set_title('discordance')
# temp_stats.f_std.plot(ax=ax[0,1])
# ax[0,1].set_title('f_std')

# #peak of season
# temp_stats.f_mean.plot(ax=ax[1,0])
# ax[1,0].set_title('f_mean')
# phen.f_median.plot(ax=ax[1,1])
# ax[1,1].set_title('f_median')

# #end of season
# temp_stats.mean_change.plot(ax=ax[2,0])
# ax[2,0].set_title('mean_change')
# phen.med_change.plot(ax=ax[2,1])
# ax[2,1].set_title('med_change')

# #Length of Season
# temp_stats.abs_change.plot(ax=ax[3,0])
# ax[3,0].set_title('abs_change');

# #Amplitude
# temp_stats.complexity.plot(ax=ax[3,1])
# ax[3,1].set_title('complexity')

# #rate of growth
# temp_stats.central_diff.plot(ax=ax[4,0])
# ax[4,0].set_title('central_diff')

# #rate of Sensescence
# temp_stats.num_peaks.plot(ax=ax[4,1])
# ax[4,1].set_title('num_peaks');

# plt.tight_layout();