# Extract Time Series Sentinel-2 data
## from Digital Earth Australia (DEA) via STAC

In [1]:
!python --version

Python 3.8.13


In [2]:
import os
import datetime
import numpy as np
import matplotlib.pyplot as plt
import folium
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon

import pystac_client
import odc.stac
# odc-stac library downloads DEA datasets stored in AWS
# when external to AWS (like outside DEA sandbox), AWS signed requests must be disabled
os.environ['AWS_NO_SIGN_REQUEST'] = 'YES'

from datacube.utils.geometry import CRS, Geometry, BoundingBox
from datacube.model import GridSpec

# My helper class
from helperfunctions import begin_timer, end_timer, saveDataset, loadDataset
from dea_helperfunctions import DEA_HelperFunctions

from dea_tools.plotting import rgb
from dea_tools.bandindices import calculate_indices

### Constants

In [7]:
# configure helper functions
dea_fns = DEA_HelperFunctions()
basepath = "../datasets/Sentinel2withIndices_"
fileextn = ".pkl"

filespecifier = "PortGawlerBeachSA_2020to2022_Mar"

# Survey Area origin point
survey_origin = [138.43196647747274, -34.62929501472954 ] # PortGawlerBeach, SA
timebands = [["2020-03-01","2020-03-05"], # Mar 2020
             ["2021-03-01","2021-03-05"], # Mar 2021
             ["2022-03-01","2022-03-05"]] # Mar 2022

# Coordinate Reference Systems (CRS)
# !! ENSURE YOU USING CONSISTENT CRS WHEN PLOTTING AREAS OR PERFORMING GEOMETRIC OPERATIONS.
epsg4326 = 'epsg:4326'            # EPSG:4326 | WGS84 latitude-longitude CRS | in Degrees of Latitude and Longitude
epsg3577 = 'epsg:3577'            # EPSG:3577 | GDA94 / Australian Albers projection | in Metres from CRS Centroid

# Survey Area (Region of Interest)
pixel_resolution = (-10,10)       # Each pixel is of 10m x 10m resolution
survey_tilesize = (5120,5120)     # size of each tile in metres
survey_tiles = (10,5)              # number of horizontal and vertical tiles

# Satellite datasets
collections = ["s2a_ard_granule","s2b_ard_granule"] # Sentinel-2A and 2B MSI Definitive ARD - NBART and Pixel Quality
bands = ("nbart_blue", "nbart_green", "nbart_red", "nbart_nir_1") # Satellite Bands

# The following band indices are added to the datasets
#        'NDVI' (Normalised Difference Vegetation Index, Rouse 1973)
#        'NDWI' (Normalised Difference Water Index, McFeeters 1996)
#        'kNDVI' (Non-linear Normalised Difference Vegetation Index, Camps-Valls et al. 2021)
#         Note: kNDVI is more resistant to saturation, bias, and complex phenological cycles
#               and shows enhanced robustness to noise and stability across spatial and temporal scales.
additional_bands = ['NDVI','kNDVI','NDWI']

### Define and Display Survey Area Grid Tiles

In [8]:
# Subdivide survey area into tiles
surveyarea_polygons = dea_fns.calc_surveyarea_polygons( origin=survey_origin,
                                                       tileresolution=survey_tilesize,
                                                       numtiles=survey_tiles)
# Add polygons to geodataframe
gdf_surveyarea = gpd.GeoDataFrame(columns=["tile", "geometry"], crs=epsg4326)

tilenum = 0
for surveyarea_polygon in surveyarea_polygons:
    tilenum +=1
    gdf_surveyarea.loc[tilenum] = (tilenum,surveyarea_polygon)

# Save data set to pickle file
saveDataset(basepath + filespecifier + "_polygons" + fileextn, gdf_surveyarea)

# display Tile areas
gdf_surveyarea.head(5)

Unnamed: 0,tile,geometry
1,1,"POLYGON ((138.43197 -34.62930, 138.43197 -34.5..."
2,2,"POLYGON ((138.48793 -34.62930, 138.48793 -34.5..."
3,3,"POLYGON ((138.54388 -34.62930, 138.54388 -34.5..."
4,4,"POLYGON ((138.59984 -34.62930, 138.59984 -34.5..."
5,5,"POLYGON ((138.65580 -34.62930, 138.65580 -34.5..."


In [9]:
map_zoom_level = 8
map_centroid = survey_origin.reverse() # swap lat and long

m = gdf_surveyarea.explore(
     column="tile", # make choropleth based on Commodity description
     tooltip=["tile"], popup=True, # show all values in popup (on click)
     tiles="OpenStreetMap", # use "CartoDB positron" tiles
     cmap="Wistia", # use "Set1" matplotlib colormap
     legend=False,
     location=map_centroid,
     zoom_start = map_zoom_level,
     name='Survey Area Tiles') # use black outline

folium.TileLayer('CartoDB positron', control=True).add_to(m)  # use folium to add alternative tiles
folium.LayerControl().add_to(m)  # use folium to add layer control

m  # show map

### Open Catalog & Extract images from Survey area

In [10]:
catalog = pystac_client.Client.open('https://explorer.sandbox.dea.ga.gov.au/stac')

stime = begin_timer()

# Iterate through each GeoDataFrame polygon
# Extract bands for each polygon for each time period specified

surveyarea_ds = []

for t in timebands:
    i=0
    datem = datetime. datetime. strptime(t[0], "%Y-%m-%d")
    y = datem.year # year
    m = datem.month # month
    for p in surveyarea_polygons:
        i+=1
        bb = dea_fns.convert_poly2bbox(p)
        
        query = catalog.search( bbox=bb, collections=collections, datetime=f"{t[0]}/{t[1]}" )
        items = list(query.get_items()) # Search the STAC catalog
        
        print("-----------------------------------------")
        print(f"Time band: {t[0]} to {t[1]}")
        print(f"Tile #: {i}")
        print(f"BoundingBox: {bb}")
        print(f"Found: {len(items):d} datasets")
        
        if len(items) > 0:
            ds = odc.stac.load( items,
                                bands=bands,
                                crs=epsg3577,
                                resolution=pixel_resolution,
                                chunks={},
                                groupby="solar_day",
                                bbox=bb)

            calculate_indices(ds=ds, index=additional_bands, collection='ga_s2_1', inplace=True)
            surveyarea_ds.append([t, y, m, len(items), i, p, ds])
        
end_timer(stime)

# Save data set to pickle file
saveDataset(basepath + filespecifier + fileextn, surveyarea_ds)

[ Begin timer at Sun May 15 13:58:06 2022 ]
-----------------------------------------
Time band: 2020-03-01 to 2020-03-05
Tile #: 1
BoundingBox: BoundingBox(left=138.43196647747274, bottom=-34.62929501472954, right=138.48792503199041, top=-34.583249748506496)
Found: 2 datasets
-----------------------------------------
Time band: 2020-03-01 to 2020-03-05
Tile #: 2
BoundingBox: BoundingBox(left=138.48792503199041, bottom=-34.62929501472954, right=138.54388358650806, top=-34.583249748506496)
Found: 2 datasets
-----------------------------------------
Time band: 2020-03-01 to 2020-03-05
Tile #: 3
BoundingBox: BoundingBox(left=138.54388358650806, bottom=-34.62929501472954, right=138.59984214102573, top=-34.583249748506496)
Found: 2 datasets
-----------------------------------------
Time band: 2020-03-01 to 2020-03-05
Tile #: 4
BoundingBox: BoundingBox(left=138.59984214102573, bottom=-34.62929501472954, right=138.6558006955434, top=-34.583249748506496)
Found: 2 datasets
---------------------

-----------------------------------------
Time band: 2020-03-01 to 2020-03-05
Tile #: 37
BoundingBox: BoundingBox(left=138.76771780457872, bottom=-34.491159216060424, right=138.8236763590964, top=-34.445113949837385)
Found: 4 datasets
-----------------------------------------
Time band: 2020-03-01 to 2020-03-05
Tile #: 38
BoundingBox: BoundingBox(left=138.8236763590964, bottom=-34.491159216060424, right=138.87963491361407, top=-34.445113949837385)
Found: 4 datasets
-----------------------------------------
Time band: 2020-03-01 to 2020-03-05
Tile #: 39
BoundingBox: BoundingBox(left=138.87963491361407, bottom=-34.491159216060424, right=138.93559346813174, top=-34.445113949837385)
Found: 4 datasets
-----------------------------------------
Time band: 2020-03-01 to 2020-03-05
Tile #: 40
BoundingBox: BoundingBox(left=138.93559346813174, bottom=-34.491159216060424, right=138.99155202264942, top=-34.445113949837385)
Found: 2 datasets
-----------------------------------------
Time band: 2020-

-----------------------------------------
Time band: 2021-03-01 to 2021-03-05
Tile #: 23
BoundingBox: BoundingBox(left=138.54388358650806, bottom=-34.537204482283464, right=138.59984214102573, top=-34.491159216060424)
Found: 2 datasets
-----------------------------------------
Time band: 2021-03-01 to 2021-03-05
Tile #: 24
BoundingBox: BoundingBox(left=138.59984214102573, bottom=-34.537204482283464, right=138.6558006955434, top=-34.491159216060424)
Found: 2 datasets
-----------------------------------------
Time band: 2021-03-01 to 2021-03-05
Tile #: 25
BoundingBox: BoundingBox(left=138.6558006955434, bottom=-34.537204482283464, right=138.71175925006108, top=-34.491159216060424)
Found: 2 datasets
-----------------------------------------
Time band: 2021-03-01 to 2021-03-05
Tile #: 26
BoundingBox: BoundingBox(left=138.71175925006108, bottom=-34.537204482283464, right=138.76771780457872, top=-34.491159216060424)
Found: 2 datasets
-----------------------------------------
Time band: 2021-

-----------------------------------------
Time band: 2022-03-01 to 2022-03-05
Tile #: 8
BoundingBox: BoundingBox(left=138.8236763590964, bottom=-34.62929501472954, right=138.87963491361407, top=-34.583249748506496)
Found: 4 datasets
-----------------------------------------
Time band: 2022-03-01 to 2022-03-05
Tile #: 9
BoundingBox: BoundingBox(left=138.87963491361407, bottom=-34.62929501472954, right=138.93559346813174, top=-34.583249748506496)
Found: 4 datasets
-----------------------------------------
Time band: 2022-03-01 to 2022-03-05
Tile #: 10
BoundingBox: BoundingBox(left=138.93559346813174, bottom=-34.62929501472954, right=138.99155202264942, top=-34.583249748506496)
Found: 2 datasets
-----------------------------------------
Time band: 2022-03-01 to 2022-03-05
Tile #: 11
BoundingBox: BoundingBox(left=138.43196647747274, bottom=-34.583249748506496, right=138.48792503199041, top=-34.537204482283464)
Found: 2 datasets
-----------------------------------------
Time band: 2022-03-0

-----------------------------------------
Time band: 2022-03-01 to 2022-03-05
Tile #: 44
BoundingBox: BoundingBox(left=138.59984214102573, bottom=-34.445113949837385, right=138.6558006955434, top=-34.399068683614345)
Found: 4 datasets
-----------------------------------------
Time band: 2022-03-01 to 2022-03-05
Tile #: 45
BoundingBox: BoundingBox(left=138.6558006955434, bottom=-34.445113949837385, right=138.71175925006108, top=-34.399068683614345)
Found: 4 datasets
-----------------------------------------
Time band: 2022-03-01 to 2022-03-05
Tile #: 46
BoundingBox: BoundingBox(left=138.71175925006108, bottom=-34.445113949837385, right=138.76771780457872, top=-34.399068683614345)
Found: 4 datasets
-----------------------------------------
Time band: 2022-03-01 to 2022-03-05
Tile #: 47
BoundingBox: BoundingBox(left=138.76771780457872, bottom=-34.445113949837385, right=138.8236763590964, top=-34.399068683614345)
Found: 6 datasets
-----------------------------------------
Time band: 2022-0