This notebook implements extraction of Sentinel-2 bimonthly geomedian features for a given set of training data.

### load packages

In [None]:
%matplotlib inline

import datacube
import warnings
import numpy as np
import geopandas as gpd
import pandas as pd
import xarray as xr
from odc.io.cgroups import get_cpu_quota
from odc.algo import xr_geomedian
from deafrica_tools.plotting import map_shapefile
from deafrica_tools.datahandling import load_ard
from deafrica_tools.bandindices import calculate_indices
from deafrica_tools.classification import collect_training_data

### input files and parameters

In [None]:
# file paths and attributes
training_points_path = 'Results/Training_samples_Rwanda.geojson' # training points extracted from reference map
class_name = 'LC_Class_I' # class label in integer format
output_crs='epsg:32735' # WGS84/UTM Zone 35S

### load input data

In [None]:
# Load input data
training_points= gpd.read_file(training_points_path) # read training points as geopandas dataframe
training_points=training_points[[class_name,'geometry']] # select attributes
# Plot first five rows
training_points.head()

### define query and feature layer function

In [None]:
time = ('2021')
measurements = ['blue','green','red','red_edge_1','red_edge_2', 'red_edge_3','nir_1','nir_2','swir_1','swir_2']
resolution = (-10,10)
query = {
    'time': time,
    'measurements': measurements,
    'output_crs': output_crs,
    'resolution': resolution
}
# define a function to feature layers
def feature_layers(query): 
    # connect to the datacube so we can access DE Africa data
    dc = datacube.Datacube(app='feature_layers')
    
    # load Sentinel-2 analysis ready data
    ds = load_ard(dc=dc,
                  products=['s2_l2a'],
                  group_by='solar_day',
                  verbose=False,
                  **query)
    
    # calculate NDVI
    ds = calculate_indices(ds,
                           index=['NDVI'],
                           drop=False,
                           satellite_mission='s2')

    # calculate bi-monthly geomedian
    ds=ds.resample(time='2MS').map(xr_geomedian)
    
    # stack multi-temporal measurements and rename them
    n_time=ds.dims['time']
    list_measurements=list(ds.keys())
    list_stack_measures=[]
    for j in range(len(list_measurements)):
        for k in range(n_time):
            variable_name=list_measurements[j]+'_'+str(k)
            measure_single=ds[list_measurements[j]].isel(time=k).rename(variable_name)
            list_stack_measures.append(measure_single)
    ds_stacked=xr.merge(list_stack_measures,compat='override')
    return ds_stacked

### extract features

In [None]:
# detect the number of CPUs
ncpus=round(get_cpu_quota())
print('ncpus = '+str(ncpus))

# collect training data
column_names, model_input = collect_training_data(
    gdf=training_points,
    dc_query=query,
    ncpus=ncpus,
    field=class_name,
    zonal_stats=None,
    feature_func=feature_layers,
    return_coords=True)

### export training features

In [None]:
# convert the data to geopandas dataframe
pd_training_features=pd.DataFrame(data=model_input,columns=column_names)
# save as geojson file
output_file='Results/Training_features_Rwanda.txt'
pd_training_features.to_csv(output_file, header=True, index=None, sep=' ')