# Extract Time Series Sentinel-2 data
## from Digital Earth Australia (DEA) via STAC

In [9]:
!python --version

Python 3.8.13


In [10]:
import os
import datetime
import numpy as np
import matplotlib.pyplot as plt
import folium
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon

import pystac_client
import odc.stac
# odc-stac library downloads DEA datasets stored in AWS
# when external to AWS (like outside DEA sandbox), AWS signed requests must be disabled
os.environ['AWS_NO_SIGN_REQUEST'] = 'YES'

from datacube.utils.geometry import point, box, CRS, Geometry, Coordinate, BoundingBox, GeoBox
from datacube.model import GridSpec
from affine import Affine

# My helper class
from helperfunctions import begin_timer, end_timer, saveDataset, loadDataset
from dea_helperfunctions import DEA_HelperFunctions

from dea_tools.plotting import rgb
from dea_tools.bandindices import calculate_indices

# Set some configurations for displaying tables nicely
pd.set_option("display.max_colwidth", 200)
pd.set_option("display.max_rows", None)

### Constants

In [11]:
# configure helper functions
dea_fns = DEA_HelperFunctions()
basepath = "../datasets/Sentinel2withIndices_"
fileextn = ".pkl"

filespecifier = "PortGawlerBeachSA_2022_Mar_10x5_"

# Survey Area origin point
survey_origin = [138.43196647747274, -34.62929501472954 ] # PortGawlerBeach, SA
timebands = [["2022-03-01","2022-03-05"]] # Mar 2022

# Coordinate Reference Systems (CRS)
# !! ENSURE YOU USING CONSISTENT CRS WHEN PLOTTING AREAS OR PERFORMING GEOMETRIC OPERATIONS.
epsg4326 = 'epsg:4326'            # EPSG:4326 | WGS84 latitude-longitude CRS | in Degrees of Latitude and Longitude
epsg3577 = 'epsg:3577'            # EPSG:3577 | GDA94 / Australian Albers projection | in Metres from CRS Centroid

# Survey Area (Region of Interest)
resolution_int = 10 # where each pixel is of 10m x 10m resolution
resolution = (-resolution_int,resolution_int) # where each pixel is of 10m x 10m resolution
survey_tilesize = (5120,5120)     # size of each tile in metres
survey_tiles = (10,5)              # number of horizontal and vertical tiles change to 10,5 for final run

survey_tilesize_pixels = tuple(int(ti/resolution_int) for ti in survey_tilesize)
print(survey_tilesize_pixels)

# Satellite datasets
collections = ["s2a_ard_granule","s2b_ard_granule"] # Sentinel-2A and 2B MSI Definitive ARD - NBART and Pixel Quality
bands = ("nbart_blue", "nbart_green", "nbart_red", "nbart_nir_1") # Satellite Bands

# The following band indices are added to the datasets
#        'NDVI' (Normalised Difference Vegetation Index, Rouse 1973)
#        'NDWI' (Normalised Difference Water Index, McFeeters 1996)
#        'kNDVI' (Non-linear Normalised Difference Vegetation Index, Camps-Valls et al. 2021)
#         Note: kNDVI is more resistant to saturation, bias, and complex phenological cycles
#               and shows enhanced robustness to noise and stability across spatial and temporal scales.
additional_bands = ['NDVI','kNDVI','NDWI']

(512, 512)


### Define and Display Survey Area Grid Tiles

In [54]:
# Subdivide survey area into tiles
surveyarea_polygons = dea_fns.calc_surveyarea_polygons( origin=survey_origin,
                                                       tileresolution=survey_tilesize,
                                                       numtiles=survey_tiles)

gdf_surveyarea = gpd.GeoDataFrame(columns=['tile','geometry'],crs=epsg4326)
gdf_surveyarea['geometry'] = surveyarea_polygons
gdf_surveyarea['tile'] = range(1, len(gdf_surveyarea) + 1)

# Save data set to pickle file
saveDataset(basepath + filespecifier + "_tilepolygons" + fileextn, gdf_surveyarea)

#gdf_surveyarea.to_crs(epsg3577, inplace=True)

# display Tile areas
gdf_surveyarea.head(5)

Unnamed: 0,tile,geometry
0,1,"POLYGON ((-34.62930 138.43197, -34.62930 41.52199, -34.69084 41.52199, -34.69084 138.43197, -34.62930 138.43197))"
1,2,"POLYGON ((-34.69084 138.43197, -34.69084 41.52199, -34.75238 41.52199, -34.75238 138.43197, -34.69084 138.43197))"
2,3,"POLYGON ((-34.75238 138.43197, -34.75238 41.52199, -34.81393 41.52199, -34.81393 138.43197, -34.75238 138.43197))"
3,4,"POLYGON ((-34.81393 138.43197, -34.81393 41.52199, -34.87547 41.52199, -34.87547 138.43197, -34.81393 138.43197))"
4,5,"POLYGON ((-34.87547 138.43197, -34.87547 41.52199, -34.93701 41.52199, -34.93701 138.43197, -34.87547 138.43197))"


In [55]:
map_zoom_level = 9
map_centroid = survey_origin.reverse() # swap lat and long

m = gdf_surveyarea.explore(
     column="tile", # make choropleth based on Commodity description
     tooltip=["tile"], popup=True, # show all values in popup (on click)
     tiles="OpenStreetMap", # use "CartoDB positron" tiles
     cmap="Wistia", # use "Set1" matplotlib colormap
     legend=False,
     location=map_centroid,
     zoom_start = map_zoom_level,
     name='Survey Area Tiles') # use black outline

folium.TileLayer('CartoDB positron', control=True).add_to(m)  # use folium to add alternative tiles
folium.LayerControl().add_to(m)  # use folium to add layer control

m  # show map

### Open Catalog & Extract images from Survey area

In [6]:
catalog = pystac_client.Client.open('https://explorer.sandbox.dea.ga.gov.au/stac')

stime = begin_timer()

# Iterate through each GeoDataFrame polygon
# Extract bands for each polygon for each time period specified

surveyarea_ds = []

for t in timebands:
    i=0
    datem = datetime. datetime. strptime(t[0], "%Y-%m-%d")
    y = datem.year # year
    m = datem.month # month
    for p in surveyarea_polygons:
        i+=1
        bb_4326 = dea_fns.convert_poly2bbox(p)
        b_4326 = dea_fns.convert_poly2box(p)
        b = b_4326.to_crs(epsg3577)
        bb = b.boundingbox
        
        query = catalog.search( bbox=bb_4326, collections=collections, datetime=f"{t[0]}/{t[1]}" )
        items = list(query.get_items()) # Search the STAC catalog
        
        print(f"Time band: {t[0]} to {t[1]} | Tile #: {i} | Found: {len(items):d} datasets")
        print(bb_4326)
        print("---------------------------------------------------------------")
        
        if len(items) > 0:
            surveyarea_affine = Affine(resolution[1], 0.0, bb.left, 0.0, resolution[0], bb.bottom)
            ds = odc.stac.load( items,
                                bands=bands,
                                geobox=GeoBox(survey_tilesize_pixels[0], # width in pixels
                                              survey_tilesize_pixels[1], # height in pixels
                                              surveyarea_affine,
                                              epsg3577), # Output Coordinate Reference System (CRS)
                                groupby="solar_day")

            calculate_indices(ds=ds, index=additional_bands, collection='ga_s2_1', inplace=True)
            surveyarea_ds.append([t, y, m, len(items), i, bb, ds])      

end_timer(stime)

# Save data set to pickle file
saveDataset(basepath + filespecifier + fileextn, surveyarea_ds)

[ Begin timer at Thu May 19 10:25:36 2022 ]
Time band: 2022-03-01 to 2022-03-05 | Tile #: 1 | Found: 2 datasets
BoundingBox(left=138.43196647747274, bottom=-34.62929501472954, right=138.48792503199041, top=-34.583249748506496)
---------------------------------------------------------------
Time band: 2022-03-01 to 2022-03-05 | Tile #: 2 | Found: 2 datasets
BoundingBox(left=138.48792503199041, bottom=-34.62929501472954, right=138.54388358650806, top=-34.583249748506496)
---------------------------------------------------------------
Time band: 2022-03-01 to 2022-03-05 | Tile #: 3 | Found: 2 datasets
BoundingBox(left=138.54388358650806, bottom=-34.62929501472954, right=138.59984214102573, top=-34.583249748506496)
---------------------------------------------------------------
Time band: 2022-03-01 to 2022-03-05 | Tile #: 4 | Found: 2 datasets
BoundingBox(left=138.59984214102573, bottom=-34.62929501472954, right=138.6558006955434, top=-34.583249748506496)
---------------------------------

Time band: 2022-03-01 to 2022-03-05 | Tile #: 34 | Found: 2 datasets
BoundingBox(left=138.59984214102573, bottom=-34.491159216060424, right=138.6558006955434, top=-34.445113949837385)
---------------------------------------------------------------
Time band: 2022-03-01 to 2022-03-05 | Tile #: 35 | Found: 2 datasets
BoundingBox(left=138.6558006955434, bottom=-34.491159216060424, right=138.71175925006108, top=-34.445113949837385)
---------------------------------------------------------------
Time band: 2022-03-01 to 2022-03-05 | Tile #: 36 | Found: 2 datasets
BoundingBox(left=138.71175925006108, bottom=-34.491159216060424, right=138.76771780457872, top=-34.445113949837385)
---------------------------------------------------------------
Time band: 2022-03-01 to 2022-03-05 | Tile #: 37 | Found: 4 datasets
BoundingBox(left=138.76771780457872, bottom=-34.491159216060424, right=138.8236763590964, top=-34.445113949837385)
---------------------------------------------------------------
Time ba

In [7]:
print(ds)        # show data structure of last extract

<xarray.Dataset>
Dimensions:      (time: 2, y: 512, x: 512)
Coordinates:
  * time         (time) datetime64[ns] 2022-03-02T00:46:34.852402 2022-03-05T...
  * y            (y) float64 -3.778e+06 -3.778e+06 ... -3.783e+06 -3.783e+06
  * x            (x) float64 6.344e+05 6.344e+05 ... 6.395e+05 6.395e+05
    spatial_ref  int32 3577
Data variables:
    nbart_blue   (time, y, x) uint16 530 551 599 642 681 ... 5210 4928 4372 3860
    nbart_green  (time, y, x) uint16 695 727 762 800 826 ... 5155 4676 4080 3762
    nbart_red    (time, y, x) uint16 773 813 875 941 ... 5262 4691 4072 3821
    nbart_nir_1  (time, y, x) uint16 2258 2220 2214 2185 ... 6739 6616 6130 5772
    NDVI         (time, y, x) float64 0.4899 0.4639 0.4335 ... 0.2017 0.2034
    kNDVI        (time, y, x) float64 0.2355 0.2119 0.1857 ... 0.04067 0.04134
    NDWI         (time, y, x) float64 -0.5293 -0.5066 ... -0.2008 -0.2108
Attributes:
    crs:           epsg:3577
    grid_mapping:  spatial_ref
