A notebook to query Sentinel-2 multispectral bands for a pre-defined AOIs, calculate geomedians, NDVI and export as cog files. Can be useful for inspecting data locally and prepare example areas for report writing.

In [1]:
%matplotlib inline
import os
import datacube
import warnings
import time
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from odc.algo import xr_geomedian
import xarray as xr
from joblib import load
from deafrica_tools.dask import create_local_dask_cluster
from deafrica_tools.datahandling import load_ard
from deafrica_tools.bandindices import calculate_indice
from datacube.utils.cog import write_cog

# file paths and attributes
lesotho_tiles_shp='Results/stratified_sampling_AOIs.geojson' # randomly selected small regions
crs='epsg:4326' # input crs: WGS84
output_crs='epsg:32735' # output crs: WGS84/UTM Zone 35S

# load and get bounding boxes of tiles covering Lesotho
lesotho_tiles=gpd.read_file(lesotho_tiles_shp).to_crs(crs)
tile_bboxes=lesotho_tiles.bounds
print('tile boundaries for Lesotho: \n',tile_bboxes)

# Set up a dask cluster
create_local_dask_cluster(n_workers=1)

  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)


ncpus = 62
tile boundaries for Lesotho: 
          minx       miny       maxx       maxy
0   27.635700 -29.477456  27.646070 -29.468382
1   28.847423 -29.636949  28.857913 -29.627784
2   28.130782 -28.910120  28.141136 -28.901010
3   28.102968 -28.898528  28.113318 -28.889419
4   27.557625 -29.382681  27.567979 -29.373612
..        ...        ...        ...        ...
85  29.460999 -29.928268  29.471572 -29.919058
86  29.482590 -30.455901  29.493226 -30.446687
87  28.046525 -30.684943  28.057061 -30.675836
88  29.462675 -29.582384  29.473209 -29.573175
89  27.846329 -30.699937  27.856849 -30.690845

[90 rows x 4 columns]
loaded random forest models:
 [RandomForestClassifier(max_samples=0.5, n_estimators=50, n_jobs=-1), RandomForestClassifier(max_samples=0.5, n_estimators=50, n_jobs=-1), RandomForestClassifier(max_samples=0.5, n_estimators=50, n_jobs=-1), RandomForestClassifier(max_samples=0.5, n_estimators=50, n_jobs=-1), RandomForestClassifier(max_samples=0.5, n_estimators=50, n_jobs=

0,1
Client  Scheduler: tcp://127.0.0.1:35007  Dashboard: /user/whusggliuqx@gmail.com/proxy/8787/status,Cluster  Workers: 1  Cores: 62  Memory: 512.40 GB


In [None]:
# define a function to feature layers
def feature_layers(query): 
    #connect to the datacube
    dc = datacube.Datacube(app='feature_layers')
    # query bands
    ds = load_ard(dc=dc,
                  products=['s2_l2a'],
                  group_by='solar_day',
                  verbose=False,
                  #mask_filters=[("opening", 2)], # morphological opening by 2 pixels to remove small masked regions
                  **query)
    ds_index = calculate_indices(ds,index=['NDVI'],drop=False,satellite_mission='s2')
    del ds
    # calculate NDVI
    ds_index['NDVI']=ds_index['NDVI']
    # calculate geomedians within each two-month interval
    ds_geomedian=ds_index.resample(time='2MS').map(xr_geomedian)
    del ds_index
    # stack multi-temporal measurements and rename them
    n_time=ds_geomedian.dims['time']
    list_measurements=list(ds_geomedian.keys())
    list_stack_measures=[]
    for j in range(len(list_measurements)):
        for k in range(n_time):
            variable_name=list_measurements[j]+'_'+str(k)
            # print ('Stacking band ',list_measurements[j],' at time ',k)
            measure_single=ds_geomedian[list_measurements[j]].isel(time=k).rename(variable_name)
            list_stack_measures.append(measure_single)
    ds_stacked=xr.merge(list_stack_measures,compat='override')
    return ds_stacked

In [8]:
measurements = ['blue','green','red','red_edge_1','red_edge_2', 'red_edge_3','nir_1','nir_2','swir_1','swir_2'] # band mesurements for query
for i in range(0,len(tile_bboxes)):
    minx,miny,maxx,maxy=tile_bboxes.iloc[i]
    print('bounding box ',': minx: ',minx,'miny: ',miny,'maxx: ',maxx,'maxy: ',maxy)
    # load Sentinel-2 data
    query = {
        'x': (minx,maxx),
        'y': (miny,maxy),
        'time': ('2021-01', '2021-12'),
        'measurements': measurements,
        'resolution': (-10, 10),
            'crs':crs,
        'output_crs':output_crs,
        'dask_chunks' : {'x':-1, 'y':-1}
    #         'dask_chunks' : {'x':1100, 'y':1100}
    }
    all_data = feature_layers(query) # making sure feature order is the same to training data
    print('stacked Sentinel-2 dataset:\n',all_data)
    outname='Results/test_area_S2_geomedian_AOI_'+str(i)+'.tif'
    write_cog(all_data.to_array().compute(), outname, overwrite=True)

bounding box  : minx:  27.258289524104995 miny:  -29.877177140474014 maxx:  27.335946830175136 maxy:  -29.80364323118636
stacked Sentinel-2 dataset:
 <xarray.Dataset>
Dimensions:       (y: 818, x: 753)
Coordinates:
    time          datetime64[ns] 2021-01-01
  * y             (y) float64 6.703e+06 6.703e+06 ... 6.695e+06 6.695e+06
  * x             (x) float64 5.249e+05 5.25e+05 ... 5.325e+05 5.325e+05
Data variables: (12/66)
    blue_0        (y, x) float32 dask.array<chunksize=(818, 753), meta=np.ndarray>
    blue_1        (y, x) float32 dask.array<chunksize=(818, 753), meta=np.ndarray>
    blue_2        (y, x) float32 dask.array<chunksize=(818, 753), meta=np.ndarray>
    blue_3        (y, x) float32 dask.array<chunksize=(818, 753), meta=np.ndarray>
    blue_4        (y, x) float32 dask.array<chunksize=(818, 753), meta=np.ndarray>
    blue_5        (y, x) float32 dask.array<chunksize=(818, 753), meta=np.ndarray>
    ...            ...
    NDVI_0        (y, x) float32 dask.array<chunk

PosixPath('Results/test_area_S2_geomedian.tif')

Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/resource_tracker.py", line 201, in main
    cache[rtype].remove(name)
KeyError: '/loky-91-v43t2n3l'
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/resource_tracker.py", line 201, in main
    cache[rtype].remove(name)
KeyError: '/loky-91-ml2bb4hu'
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/resource_tracker.py", line 201, in main
    cache[rtype].remove(name)
KeyError: '/loky-91-z2t15yq0'
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/resource_tracker.py", line 201, in main
    cache[rtype].remove(name)
KeyError: '/loky-91-8i91lehp'
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/resource_tracker.py", line 201, in main
    cache[rtype].remove(name)
KeyError: '/loky-91-tfqtt2mz'
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/resource_tracker.py", line 201, 