# Extract Time Series Sentinel-2 data
## from Digital Earth Australia (DEA) via STAC

In [1]:
!python --version

Python 3.8.13


In [2]:
import os
import sys
import datetime
import numpy as np
import matplotlib.pyplot as plt
import folium
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon

import pystac_client
import odc.stac
# odc-stac library downloads DEA datasets stored in AWS
# when external to AWS (like outside DEA sandbox), AWS signed requests must be disabled
os.environ['AWS_NO_SIGN_REQUEST'] = 'YES'

from datacube.utils.geometry import point, box, CRS, Geometry, Coordinate, BoundingBox, GeoBox
from datacube.model import GridSpec
from affine import Affine

# My helper class
from helperfunctions import begin_timer, end_timer, saveDataset, loadDataset
from dea_helperfunctions import DEA_HelperFunctions

from dea_tools.plotting import rgb
from dea_tools.bandindices import calculate_indices

# Set some configurations for displaying tables nicely
pd.set_option("display.max_colwidth", 200)
pd.set_option("display.max_rows", None)

### Constants

In [3]:
# configure helper functions
dea_fns = DEA_HelperFunctions()
basepath = "../datasets/S2I_"
fileextn = ".pkl"

#filespecifier = "PortGawlerBeachSA_Test_10x5"
filespecifier = "PortGawlerBeachSA_2016First7daysEachMonth_10x5"
#filespecifier = "PortGawlerBeachSA_Jan2021toMar2021_10x5"

# Survey Area origin point
survey_origin = [138.43196647747274, -34.62929501472954 ] # PortGawlerBeach, SA
'''
# Feb & Mar 2022 - First 2 days (Test)
timebands = [["2022-02-01","2022-02-02"],
             ["2022-03-01","2022-03-02"]] # Test
'''
# Jan to Dec 2016 (First 7 days)
timebands = [["2016-01-01","2016-01-07"], ["2016-02-01","2016-02-07"], ["2016-03-01","2016-03-07"],
             ["2016-04-01","2016-04-07"], ["2016-05-01","2016-05-07"], ["2016-06-01","2016-06-07"],
             ["2016-07-01","2016-07-07"], ["2016-08-01","2016-08-07"], ["2016-09-01","2016-09-07"],
             ["2016-10-01","2016-10-07"], ["2016-11-01","2016-11-07"], ["2016-12-01","2016-12-07"]]
'''             
# Jan to Mar 2021
timebands = [["2021-01-01","2021-01-31"],
             ["2021-02-01","2021-02-28"],
             ["2021-03-01","2021-03-31"]]
'''
# Coordinate Reference Systems (CRS)
# !! ENSURE YOU USING CONSISTENT CRS WHEN PLOTTING AREAS OR PERFORMING GEOMETRIC OPERATIONS.
epsg4326 = 'epsg:4326'            # EPSG:4326 | WGS84 latitude-longitude CRS | in Degrees of Latitude and Longitude
epsg3577 = 'epsg:3577'            # EPSG:3577 | GDA94 / Australian Albers projection | in Metres from CRS Centroid

# Survey Area (Region of Interest)
resolution_int = 10 # where each pixel is of 10m x 10m resolution
resolution = (-resolution_int,resolution_int) # where each pixel is of 10m x 10m resolution
survey_tilesize = (5120,5120)     # size of each tile in metres
survey_tiles = (10,5)              # number of horizontal and vertical tiles change to 10,5 for final run

survey_tilesize_pixels = tuple(int(ti/resolution_int) for ti in survey_tilesize)
print(survey_tilesize_pixels)

# Satellite datasets
collections = ["s2a_ard_granule","s2b_ard_granule"] # Sentinel-2A and 2B MSI Definitive ARD - NBART and Pixel Quality
bands = ("nbart_blue", "nbart_green", "nbart_red", "nbart_nir_1", "fmask") # Satellite Bands

# The following band indices are added to the datasets
#        'NDVI' (Normalised Difference Vegetation Index, Rouse 1973)
#        'NDWI' (Normalised Difference Water Index, McFeeters 1996)
#        'kNDVI' (Non-linear Normalised Difference Vegetation Index, Camps-Valls et al. 2021)
#         Note: kNDVI is more resistant to saturation, bias, and complex phenological cycles
#               and shows enhanced robustness to noise and stability across spatial and temporal scales.
additional_bands = ['NDVI','kNDVI','NDWI']

(512, 512)


### Define and Display Survey Area Grid Tiles

In [4]:
# Subdivide survey area into tiles
surveyarea_polygons = dea_fns.calc_surveyarea_polygons( origin=survey_origin,
                                                       tileresolution=survey_tilesize,
                                                       numtiles=survey_tiles)
# Add polygons to geodataframe
gdf_surveyarea = gpd.GeoDataFrame(columns=["tile", "geometry"], crs=epsg4326)

gdf_surveyarea['geometry'] = surveyarea_polygons
gdf_surveyarea['tile'] = range(1, len(gdf_surveyarea) + 1)
gdf_surveyarea.to_crs(epsg3577, inplace=True)

# Save data set to pickle file
saveDataset(basepath + filespecifier + "_tilepolygons" + fileextn, gdf_surveyarea)

# display Tile areas
gdf_surveyarea.tail(5)

Unnamed: 0,tile,geometry
45,46,"POLYGON ((613956.893 -3776478.615, 614226.250 -3771356.218, 619342.523 -3771626.377, 619070.922 -3776748.655, 613956.893 -3776478.615))"
46,47,"POLYGON ((619070.922 -3776748.655, 619342.523 -3771626.377, 624458.676 -3771898.777, 624184.832 -3777020.935, 619070.922 -3776748.655))"
47,48,"POLYGON ((624184.832 -3777020.935, 624458.676 -3771898.777, 629574.710 -3772173.418, 629298.622 -3777295.456, 624184.832 -3777020.935))"
48,49,"POLYGON ((629298.622 -3777295.456, 629574.710 -3772173.418, 634690.623 -3772450.300, 634412.292 -3777572.216, 629298.622 -3777295.456))"
49,50,"POLYGON ((634412.292 -3777572.216, 634690.623 -3772450.300, 639806.415 -3772729.422, 639525.840 -3777851.216, 634412.292 -3777572.216))"


In [5]:
map_zoom_level = 9
map_centroid = survey_origin.reverse() # swap lat and long

m = gdf_surveyarea.explore(
     column="tile", # make choropleth based on Commodity description
     tooltip=["tile"], popup=True, # show all values in popup (on click)
     tiles="OpenStreetMap", # use "CartoDB positron" tiles
     cmap="Wistia", # use "Set1" matplotlib colormap
     legend=False,
     location=map_centroid,
     zoom_start = map_zoom_level,
     name='Survey Area Tiles') # use black outline

folium.TileLayer('CartoDB positron', control=True).add_to(m)  # use folium to add alternative tiles
folium.LayerControl().add_to(m)  # use folium to add layer control

m  # show map

### Open Catalog & Extract images from Survey area

In [6]:
catalog = pystac_client.Client.open('https://explorer.sandbox.dea.ga.gov.au/stac')

# Iterate through each GeoDataFrame polygon
# Extract bands for each polygon for each time period specified

surveyarea_ds = []
total_durn = 0
total_numdatasets = 0

for t in timebands:
    timeband_numdatasets = 0
    stime = begin_timer(info=False)
    i=0
    datem = datetime. datetime. strptime(t[0], "%Y-%m-%d")
    y = datem.year # year
    m = datem.month # month
    print(' ■', end='')
    for p in surveyarea_polygons:
        print('□', end='')
        i+=1
        bb_4326 = dea_fns.convert_poly2bbox(p)
        b_4326 = dea_fns.convert_poly2box(p)
        b = b_4326.to_crs(epsg3577)
        bb = b.boundingbox
        
        query = catalog.search( bbox=bb_4326, collections=collections, datetime=f"{t[0]}/{t[1]}" )
        items = list(query.get_items()) # Search the STAC catalog
        timeband_numdatasets += len(items)
        
        #print(f"Time band: {t[0]} to {t[1]} | Tile #: {i} | Found: {len(items):d} datasets")
        #print(bb_4326)
        #print("---------------------------------------------------------------")
        
        if len(items) > 0:
            surveyarea_affine = Affine(resolution[1], 0.0, bb.left, 0.0, resolution[0], bb.bottom)
            ds = odc.stac.load( items,
                                bands=bands,
                                geobox=GeoBox(survey_tilesize_pixels[0], # width in pixels
                                              survey_tilesize_pixels[1], # height in pixels
                                              surveyarea_affine,
                                              epsg3577), # Output Coordinate Reference System (CRS)
                                groupby="solar_day")

            calculate_indices(ds=ds, index=additional_bands, collection='ga_s2_1', inplace=True)
            surveyarea_ds.append([t, y, m, len(items), i, bb, ds])
    etime, durn = end_timer(stime, info=False)
    print(f' Duration(s): {durn:9.3f} | Total Dataset(s): {timeband_numdatasets:d}')
    total_durn += durn
    total_numdatasets += timeband_numdatasets
    
print(f'Total Duration(s): {total_durn:9.3f} | Total Dataset(s): {total_numdatasets:d}')

# Save statistics to txt file
original_stdout = sys.stdout # Save a reference to the original standard output
with open(basepath + filespecifier + '_log.txt', 'w') as f:
    sys.stdout = f # Change the standard output to the file we created.
    print(f'DEA_ExtractSentinel | {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
    print(f'Total Duration(s): {total_durn:9.3f} | Total Dataset(s): {total_numdatasets:d}')
    sys.stdout = original_stdout # Reset the standard output to its original value
    
# Save data set to pickle file
saveDataset(basepath + filespecifier + fileextn, surveyarea_ds)

 ■□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□ Duration(s):   627.943 | Total Dataset(s): 234
 ■□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□ Duration(s):   615.331 | Total Dataset(s): 234
 ■□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□ Duration(s):   606.101 | Total Dataset(s): 234
 ■□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□ Duration(s):   617.835 | Total Dataset(s): 234
 ■□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□ Duration(s):   632.330 | Total Dataset(s): 234
 ■□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□ Duration(s):   406.312 | Total Dataset(s): 156
 ■□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□ Duration(s):   419.234 | Total Dataset(s): 156
 ■□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□ Duration(s):   630.959 | Total Dataset(s): 234
 ■□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□ Duration(s):   586.436 | Total Dataset(s): 234
 ■□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□□ Duration(s):   351.624 | Total Dataset(s): 156


In [7]:
ds        # show data structure of last extract