In [None]:
import numpy as np
import os
from osgeo import gdal, osr
import matplotlib.pyplot as plt
import sys
import tempfile
import os
import rasterio
import math
import pickle
import lithops
from lithops import Storage
from scipy import ndimage as ndi
%matplotlib inline

In [None]:
from skimage.segmentation import watershed
from skimage.feature import peak_local_max
from skimage.measure import regionprops
from sklearn.ensemble import RandomForestRegressor

In [None]:
BUCKET_NAME = 'YOUR_BUCKET_NAME'

In [None]:
storage = Storage()

## Define functions

* `plot_band_array`: function to plot NEON spatial data.

In [None]:
def plot_band_array(band_array, image_extent, title, cmap_title, colormap, colormap_limits):
    plt.imshow(band_array, extent=image_extent)
    cbar = plt.colorbar()
    plt.set_cmap(colormap)
    plt.clim(colormap_limits)
    cbar.set_label(cmap_title, rotation=270, labelpad=20)
    plt.title(title)
    ax = plt.gca()
    ax.ticklabel_format(useOffset=False, style='plain')
    rotatexlabels = plt.setp(ax.get_xticklabels(),rotation=90)

* `array2raster`: function to output geotiff files.

In [None]:
def array2raster(newRasterfn, rasterOrigin_file, pixelWidth, pixelHeight, array_file, epsg):
    storage = Storage()
    array_byte = storage.get_cloudobject(array_file)
    array_smooth = pickle.loads(array_byte)
    array = np.array(array_smooth,dtype=float)

    metadata_byte = storage.get_cloudobject(rasterOrigin_file)
    metadata = pickle.loads(metadata_byte)
    rasterOrigin = (metadata['ext_dict']['xMin'],metadata['ext_dict']['yMax'])

    cols = array.shape[1]
    rows = array.shape[0]
    originX = rasterOrigin[0]
    originY = rasterOrigin[1]

    driver = gdal.GetDriverByName('GTiff')
    outRaster = driver.Create(newRasterfn, cols, rows, 1, gdal.GDT_Float32)
    outRaster.SetGeoTransform((originX, pixelWidth, 0, originY, 0, pixelHeight))
    outband = outRaster.GetRasterBand(1)
    outband.WriteArray(array)
    outRasterSRS = osr.SpatialReference()
    outRasterSRS.ImportFromEPSG(epsg)
    outRaster.SetProjection(outRasterSRS.ExportToWkt())
    outband.FlushCache()

    with open(newRasterfn, 'rb') as tif_temp:
        storage.put_cloudobject(tif_temp.read(), key=("geotiff/" + newRasterfn))

* `raster2array`: function to conver rasters to an array.

In [None]:
def raster2array(geotif_file, bucket_name, ibm_cos):
    storage = Storage()

    ibm_cos.download_file(bucket_name, geotif_file, "/tmp/" + geotif_file.replace("chunks/", ""))

    geotif_file = geotif_file.replace("chunks/", "")

    metadata = {}
    dataset = gdal.Open("/tmp/" + geotif_file)
    metadata['array_rows'] = dataset.RasterYSize
    metadata['array_cols'] = dataset.RasterXSize
    metadata['bands'] = dataset.RasterCount
    metadata['driver'] = dataset.GetDriver().LongName
    metadata['projection'] = dataset.GetProjection()
    metadata['geotransform'] = dataset.GetGeoTransform()

    mapinfo = dataset.GetGeoTransform()
    metadata['pixelWidth'] = mapinfo[1]
    metadata['pixelHeight'] = mapinfo[5]

    metadata['ext_dict'] = {}
    metadata['ext_dict']['xMin'] = mapinfo[0]
    metadata['ext_dict']['xMax'] = mapinfo[0] + dataset.RasterXSize/mapinfo[1]
    metadata['ext_dict']['yMin'] = mapinfo[3] + dataset.RasterYSize/mapinfo[5]
    metadata['ext_dict']['yMax'] = mapinfo[3]

    metadata['extent'] = (metadata['ext_dict']['xMin'],metadata['ext_dict']['xMax'],
                          metadata['ext_dict']['yMin'],metadata['ext_dict']['yMax'])

    if metadata['bands'] == 1:
        raster = dataset.GetRasterBand(1)
        metadata['noDataValue'] = raster.GetNoDataValue()
        metadata['scaleFactor'] = raster.GetScale()

        # band statistics
        metadata['bandstats'] = {} # make a nested dictionary to store band stats in same
        stats = raster.GetStatistics(True,True)
        metadata['bandstats']['min'] = round(stats[0],2)
        metadata['bandstats']['max'] = round(stats[1],2)
        metadata['bandstats']['mean'] = round(stats[2],2)
        metadata['bandstats']['stdev'] = round(stats[3],2)

        array = dataset.GetRasterBand(1).ReadAsArray(0,0,
                                                     metadata['array_cols'],
                                                     metadata['array_rows']).astype(np.float)
        array[array == int(metadata['noDataValue'])] = np.nan
        if metadata['scaleFactor'] is not None:
            array = array / metadata['scaleFactor']

        ref_array = storage.put_cloudobject(pickle.dumps(array), key=('arrayAux/array_object_' + (geotif_file.split("COB1_")[1]).replace(".tif", "")))
        ref_metadata = storage.put_cloudobject(pickle.dumps(metadata), key=('arrayAux/metadata_object_' + (geotif_file.split("COB1_")[1]).replace(".tif", "")))

        return ref_array, ref_metadata

    elif metadata['bands'] > 1:
        print('More than one band ... need to modify function for case of multiple bands')

* `crown_geometric_volume_pth`: function to get tree crown volumn.

In [None]:
def crown_geometric_volume_pth(tree_data,min_tree_height,pth):
    p = np.percentile(tree_data, pth)
    tree_data_pth = [v if v < p else p for v in tree_data]
    crown_geometric_volume_pth = np.sum(tree_data_pth - min_tree_height)
    return crown_geometric_volume_pth, p

* `get_predictors`: function to get the trees from the biomass data.

In [None]:
def get_predictors(tree,chm_array, labels):
    indexes_of_tree = np.asarray(np.where(labels==tree.label)).T
    tree_crown_heights = chm_array[indexes_of_tree[:,0],indexes_of_tree[:,1]]

    full_crown = np.sum(tree_crown_heights - np.min(tree_crown_heights))

    crown50, p50 = crown_geometric_volume_pth(tree_crown_heights,tree.min_intensity,50)
    crown60, p60 = crown_geometric_volume_pth(tree_crown_heights,tree.min_intensity,60)
    crown70, p70 = crown_geometric_volume_pth(tree_crown_heights,tree.min_intensity,70)

    return [tree.label,
            np.float(tree.area),
            tree.major_axis_length,
            tree.max_intensity,
            tree.min_intensity,
            p50, p60, p70,
            full_crown, crown50, crown60, crown70]

* `asc_to_geotiff`: function to convert asc file to geotiff file.

In [None]:
def asc_to_geotiff(ref_file, bucket_name, ibm_cos):
    storage = Storage()
    ibm_cos.download_file(bucket_name, ref_file, "/tmp/" + ref_file)
    file = "/tmp/" + ref_file

    asc_file_name = os.path.basename(file)
    tile_id, _ = os.path.splitext(asc_file_name)
    out_path = os.path.join(tile_id + '.tiff')
    out_key = os.path.join(tile_id + '.tiff')

    print(f'Converting {tile_id} to GeoTIFF...')
    with rasterio.open(file, 'r') as src:
        profile = src.profile
        # Cloud optimized GeoTiff parameters
        profile.update(driver='GTiff')
        profile.update(blockxsize=256)
        profile.update(blockysize=256)
        profile.update(tiled=True)
        profile.update(compress='deflate')
        profile.update(interleave='band')

        with rasterio.open(out_path, 'w', **profile) as dest:
            dest.write(src.read())

        with open(out_path, 'rb') as tif_temp:
            storage.put_cloudobject(tif_temp.read(), key=out_path)

    return out_key

* `data_chunker`: function to split the geotiff file to data chunks.

In [None]:
def data_chunker(file, dst, n_splits, block_x, block_y, bucket_name, ibm_cos):
    storage = Storage()

    ibm_cos.download_file(bucket_name, file, "/tmp/" + file)

    tile_key = os.path.basename(file)
    tile_id, _ = os.path.splitext(tile_key)

    with rasterio.open("/tmp/" + file) as src:
        transform = src.transform

        # Compute working window
        step_w = src.width / n_splits
        step_h = src.height / n_splits

        offset_h = round(step_h * block_x)
        offset_w = round(step_w * block_y)

        profile = src.profile

        width = math.ceil(step_w * (block_y + 1) - offset_w)
        height = math.ceil(step_h * (block_x + 1) - offset_h)

        profile.update(width=width)
        profile.update(height=height)

        window = rasterio.windows.Window(offset_w, offset_h, width, height)

        chunk_file = os.path.join(dst, "/tmp/" + tile_id + '_' + str(block_x) + '_' + str(block_y) + '.tif')

        with rasterio.open(chunk_file, 'w', **profile) as dest:
            dest.write(src.read(window=window))

        with open(chunk_file, 'rb') as tif_temp:
            storage.put_cloudobject(tif_temp.read(), key=chunk_file.replace('/tmp', 'chunks'))

* `make_plots`: function to make plots of the obtained data.

In [None]:
def make_plots(array_file, metadata_file, j):
    storage = Storage()

    # Dowloading data
    array_byte = storage.get_cloudobject(array_file)
    array = pickle.loads(array_byte)

    metadata_byte = storage.get_cloudobject(metadata_file)
    metadata = pickle.loads(metadata_byte)

    # Making plots
    plt.clf()

    plt.figure(1)

    plot_band_array(array,metadata['extent'],
                'Canopy height Model',
                'Canopy height (m)',
                'Greens',[0, 10])

    # Path to save the plots
    path = just_chm_file[0:-5]+'_CHM_' + str(j) + '.png'

    plt.savefig(path,dpi=300,orientation='landscape',
                bbox_inches='tight',
                pad_inches=0.1)

    # Updating plots to cloud storage
    with open(path, 'rb') as png_temp:
        storage.put_cloudobject(png_temp.read(), key=("plots/CHM/" + path))

* `smooth_CHM`: function smooth the CHM.

In [None]:
#Smooth the CHM using a gaussian filter to remove spurious points
def smooth_CHM(array_file, i):
    # Dowloading data
    storage = Storage()
    array_byte = storage.get_cloudobject(array_file)

    array = pickle.loads(array_byte)

    array_smooth = ndi.gaussian_filter(array,2,mode='constant',cval=0,truncate=2.0)
    array_smooth[array==0] = 0

    # Updating plots to cloud storage
    ref_array = storage.put_cloudobject(pickle.dumps(array_smooth), key=("arrayAux/smooth_array_object_" + str(i)))

    return ref_array

* `nan_to_num`: function to convert all nans position to zero in an array.

In [None]:
# Converting nan position to zero
def nan_to_num(array_file, i):
    # Dowloading data
    storage = Storage()

    array_byte = storage.get_cloudobject(array_file)
    array = pickle.loads(array_byte)

    nan_array = np.nan_to_num(array)

    # Updating plots to cloud storage
    ref_array = storage.put_cloudobject(pickle.dumps(nan_array), key=("arrayAux/nan_array_object_" + str(i)))

    return ref_array

* `peak_max`: function to peak the position on an array with the max value.

In [None]:
def peak_max(array_file, i):
    # Dowloading data
    storage = Storage()
    array_byte = storage.get_cloudobject(array_file)
    array = pickle.loads(array_byte)

    array_peak = peak_local_max(array, indices=False, footprint=np.ones((5, 5)))

    # Updating plots to cloud storage
    ref_array = storage.put_cloudobject(pickle.dumps(array_peak), key=("arrayAux/local_maxi_array_object_" + str(i)))

    return ref_array

* `make_plots_local_maximus`: function to make plots of the local maximus data.

In [None]:
#Plot the local maximums
def make_plots_local_maximus(maxi_file, metadata_file, j):
    # Dowloading data
    storage = Storage()

    maxi_byte = storage.get_cloudobject(maxi_file)
    maxi = pickle.loads(maxi_byte)

    metadata_byte = storage.get_cloudobject(metadata_file)
    metadata = pickle.loads(metadata_byte)

    # Making the plots
    plt.clf()

    plt.figure(2)
    plot_band_array(maxi.astype(int),metadata['extent'],
                    'Maximum',
                    'Maxi',
                    'Greys',
                    [0, 1])

    # Path to save the plots
    path = just_chm_file[0:-4]+'_Maximums_' + str(j) + '.png'

    plt.savefig(path,
                dpi=300,orientation='landscape',
                bbox_inches='tight',pad_inches=0.1)

    # Updating plots to cloud storage
    with open(path, 'rb') as png_temp:
        storage.put_cloudobject(png_temp.read(), key=("plots/Maximums/" + path))

* `markers`: function to get the markers.

In [None]:
def markers(maxi_file, i):
    # Dowloading data
    storage = Storage()

    maxi_byte = storage.get_cloudobject(maxi_file)
    maxi = pickle.loads(maxi_byte)

    markers = ndi.label(maxi)[0]

    # Updating plots to cloud storage
    ref_marker = storage.put_cloudobject(pickle.dumps(markers), key=("arrayAux/marker_object_" + str(i)))

    return ref_marker

* `perform_watershed`: function to perform the watershed of our data.

In [None]:
def perform_watershed(smooth_file, marker_file, mask_file, i):
    # Dowloading data
    storage = Storage()

    smooth_byte = storage.get_cloudobject(smooth_file)
    smooth = pickle.loads(smooth_byte)

    marker_byte = storage.get_cloudobject(marker_file)
    marker = pickle.loads(marker_byte)

    mask_byte = storage.get_cloudobject(mask_file)
    array_mask = pickle.loads(mask_byte)


    labels = watershed(smooth, marker, mask=array_mask)

    labels_for_plot = labels.copy()
    labels_for_plot = np.array(labels_for_plot,dtype = np.float32)
    labels_for_plot[labels_for_plot==0] = np.nan

    max_labels = np.max(labels)

    # Updating plots to cloud storage
    ref_labels = storage.put_cloudobject(pickle.dumps(labels), key=("arrayAux/labels_object_" + str(i)))

    ref_labels_for_plot = storage.put_cloudobject(pickle.dumps(labels_for_plot), key=("arrayAux/labels_for_plot_object_" + str(i)))

    ref_max_labels = storage.put_cloudobject(pickle.dumps(max_labels), key=("arrayAux/max_labels_object_" + str(i)))

    return ref_labels, ref_labels_for_plot, ref_max_labels

* `create_mask`: function to create a mask with the smooth array.

In [None]:
def create_mask(smooth_file, i):
    # Dowloading data
    storage = Storage()

    smooth_byte = storage.get_cloudobject(smooth_file)
    smooth = pickle.loads(smooth_byte)

    chm_mask = smooth
    chm_mask[smooth != 0] = 1

    # Updating plots to cloud storage
    ref_mask = storage.put_cloudobject(pickle.dumps(chm_mask), key=("arrayAux/mask_object_" + str(i)))

    return ref_mask

* `make_plots_segments`: function to make plots of the segments.

In [None]:
def make_plots_segments(labels_for_plots_file, metadata_file, max_labels_file, j):
    # Dowloading data
    storage = Storage()

    labels_for_plots_byte = storage.get_cloudobject(labels_for_plots_file)
    labels_for_plots = pickle.loads(labels_for_plots_byte)

    metadata_byte = storage.get_cloudobject(metadata_file)
    metadata = pickle.loads(metadata_byte)

    max_labels_byte = storage.get_cloudobject(max_labels_file)
    max_labels = pickle.loads(max_labels_byte)


    plt.clf()

    plt.figure(2)
    plot_band_array(labels_for_plots,metadata['extent'],
                'Crown Segmentation','Tree Crown Number',
                'Spectral',[0, max_labels])

    # Path to save the plots
    path = just_chm_file[0:-4]+'_Segmentation_' + str(j) + '.png'

    plt.savefig(path,
                dpi=300,orientation='landscape',
                bbox_inches='tight',pad_inches=0.1)

    # Updating plots to cloud storage
    with open(path, 'rb') as png_temp:
        storage.put_cloudobject(png_temp.read(), key=("plots/Segmentation/" + path))

* `region_props`: function to get the properties of each segment. 

In [None]:
def region_props(labels_file, array_file, i):
    # Dowloading data
    storage = Storage()

    labels_byte = storage.get_cloudobject(labels_file)
    labels = pickle.loads(labels_byte)

    array_byte = storage.get_cloudobject(array_file)
    array = pickle.loads(array_byte)


    tree_properties = regionprops(labels,array)

    predictors_chm = np.array([get_predictors(tree, array, labels) for tree in tree_properties])


    if (not predictors_chm.shape[0] == 0):
        X = np.nan_to_num(predictors_chm[:,1:])
        tree_ids = np.nan_to_num(predictors_chm[:,0])
    else:
        X = np.resize(predictors_chm,(1,11))
        tree_ids = np.resize(predictors_chm,(1,11))


    # Updating plots to cloud storage
    ref_tree = storage.put_cloudobject(pickle.dumps(tree_properties), key=("arrayAux/tree_properties_object_" + str(i)))

    ref_X = storage.put_cloudobject(pickle.dumps(X), key=("arrayAux/X_object_" + str(i)))

    ref_tree_ids = storage.put_cloudobject(pickle.dumps(tree_ids), key=("arrayAux/tree_ids_object_" + str(i)))

    return ref_tree, ref_X, ref_tree_ids

* `calculate_estimated_biomass`: function to apply the model to the predictors.

In [None]:
def calculate_estimated_biomass(X_file, biomass_predictors_file, biomass_file, i):
    # Dowloading data
    storage = Storage()

    X_byte = storage.get_cloudobject(X_file)
    X = pickle.loads(X_byte)

    biomass_predictors_byte = storage.get_cloudobject(biomass_predictors_file)
    biomass_predictors = pickle.loads(biomass_predictors_byte)

    biomass_byte = storage.get_cloudobject(biomass_file)
    biomass = pickle.loads(biomass_byte)

    regr_rf.fit(biomass_predictors,biomass)

    estimated_biomass = regr_rf.predict(X)

    # Updating plots to cloud storage
    ref_estimated_biomass = storage.put_cloudobject(pickle.dumps(estimated_biomass), key=("arrayAux/estimated_biomass_object_" + str(i)))

    return ref_estimated_biomass

* `out_raster`: function to set an out raster with the same size as the labels.

In [None]:
def out_raster(labels_file, tree_ids_file, estimated_biomass_file, i):
    # Dowloading data
    storage = Storage()

    labels_byte = storage.get_cloudobject(labels_file)
    labels = pickle.loads(labels_byte)

    tree_ids_byte = storage.get_cloudobject(tree_ids_file)
    tree_ids = pickle.loads(tree_ids_byte)

    estimated_biomass_byte = storage.get_cloudobject(estimated_biomass_file)
    estimated_biomass = pickle.loads(estimated_biomass_byte)


    biomass_map =  np.array((labels),dtype=float)

    #Assign the appropriate biomass to the labels
    biomass_map[biomass_map==0] = np.nan
    for tree_id, biomass_of_tree_id in zip(tree_ids, estimated_biomass):
        biomass_map[biomass_map == tree_id] = biomass_of_tree_id

    # Updating plots to cloud storage
    ref_biomass_map = storage.put_cloudobject(pickle.dumps(biomass_map), key=("arrayAux/biomass_map_object_" + str(i)))

    return ref_biomass_map

* `biomass_stats`: function to get biomass stats for plotting.

In [None]:
def biomass_stats(estimated_biomass_file, i):
    # Dowloading data
    storage = Storage()

    estimated_biomass_byte = storage.get_cloudobject(estimated_biomass_file)
    estimated_biomass = pickle.loads(estimated_biomass_byte)


    mean_biomass = np.mean(estimated_biomass)
    std_biomass = np.std(estimated_biomass)
    min_biomass = np.min(estimated_biomass)
    sum_biomass = np.sum(estimated_biomass)


    # Updating plots to cloud storage
    ref_mean_biomass = storage.put_cloudobject(pickle.dumps(mean_biomass), key=("arrayAux/mean_biomass_object_" + str(i)))
    ref_std_biomass = storage.put_cloudobject(pickle.dumps(std_biomass), key=("arrayAux/std_biomass_object_" + str(i)))
    ref_min_biomass = storage.put_cloudobject(pickle.dumps(min_biomass), key=("arrayAux/min_biomass_object_" + str(i)))
    ref_sum_biomass = storage.put_cloudobject(pickle.dumps(sum_biomass), key=("arrayAux/sum_biomass_object_" + str(i)))

    return ref_mean_biomass, ref_std_biomass, ref_min_biomass, ref_sum_biomass

* `make_plots_biomass`: function to make plots of the biomass data.

In [None]:
def make_plots_biomass(biomass_map_file, metadata_file, mean_biomass_file, std_biomass_file, min_biomass_file, sum_biomass_file, i):
    # Dowloading data
    storage = Storage()

    biomass_map_byte = storage.get_cloudobject(biomass_map_file)
    biomass_map = pickle.loads(biomass_map_byte)

    metadata_byte = storage.get_cloudobject(metadata_file)
    metadata = pickle.loads(metadata_byte)

    mean_biomass_byte = storage.get_cloudobject(mean_biomass_file)
    mean_biomass = pickle.loads(mean_biomass_byte)

    std_biomass_byte = storage.get_cloudobject(std_biomass_file)
    std_biomass = pickle.loads(std_biomass_byte)

    min_biomass_byte = storage.get_cloudobject(min_biomass_file)
    min_biomass = pickle.loads(min_biomass_byte)

    sum_biomass_byte = storage.get_cloudobject(sum_biomass_file)
    sum_biomass = pickle.loads(sum_biomass_byte)


    plt.clf()

    plt.figure(5)
    plot_band_array(biomass_map,metadata['extent'],
                'Biomass (kg)','Biomass (kg)',
                'winter',
                [min_biomass+std_biomass, mean_biomass+std_biomass*3])

    # Path to save the plots
    path = 'ResultBiomass_' + str(i) + '.png'

    plt.savefig(path,
            dpi=300,orientation='landscape',
            bbox_inches='tight',
            pad_inches=0.1)


    # Updating plots to cloud storage
    with open(path, 'rb') as png_temp:
        storage.put_cloudobject(png_temp.read(), key=("plots/resultBiomass/" + path))

## Data Preparation: Canopy Height Models

In [None]:
SPLITS = 50

In [None]:
chm_file = 'NDSM-Vegetacion-ETRS89-H31-0473-COB1.asc'

In [None]:
with open(chm_file, 'rb') as asc_temp:
    storage.put_cloudobject(asc_temp.read(), key=chm_file)

In [None]:
# Convert asc file to geotiff
fexec = lithops.FunctionExecutor(runtime_memory=2048)

fexec.map(asc_to_geotiff, (chm_file, BUCKET_NAME))

fexec.get_result()

In [None]:
# Preparing data
chm_chunk_files = []
iterdata = []
for i in range(SPLITS):
     for j in range(SPLITS):
            file_name = 'chunks/' + chm_file.replace('.asc', '') + '_' + str(i) + '_' + str(j) + '.tif'
            iterdata.append((chm_file,'chunks',SPLITS,i,j, BUCKET_NAME))
            chm_chunk_files.append((file_name , BUCKET_NAME))


# Transform the geotiff file to little data chunks
fexec = lithops.FunctionExecutor(runtime_memory=1024)

fexec.map(data_chunker, iterdata)

fexec.get_result()

When we output the results, we will want to include the same file information as the input, so we will gather the file name information.

In [None]:
#Get info from chm file for outputting results
just_chm_file = os.path.basename(chm_file)
print(just_chm_file)

Now we will get the CHM data...

In [None]:
# chm_chunk_files

In [None]:
# Converting chunk file to numpy array
fexec = lithops.FunctionExecutor()

fexec.map(raster2array, chm_chunk_files)

results = fexec.get_result()

..., plot it, and save the figure.

In [None]:
# Preparing data
i = 0
iterdata = []
array_only = []
metadata_only = []

for array, metadata in results:
    array_only.append(array)
    metadata_only.append(metadata)
    iterdata.append((array, metadata, i))
    i = i + 1

#Plot the original CHM
fexec = lithops.FunctionExecutor()

fexec.map(make_plots, iterdata)

results = fexec.get_result()

It looks like SJER primarily has low vegetation with scattered taller trees.

## Create Filtered CHM

Now we will use a Gaussian smoothing kernal (convolution) across the data set to remove spurious high vegetation points. This will help ensure we are finding the treetops properly before running the watershed segmentation algorithm.

For different forest types it may be necessary to change the input parameters. Information on the function can be found in the <a href="https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.filters.gaussian_filter.html" target="_blank">SciPy documentation</a>.

Of most importance are the second and fifth inputs. The second input defines the standard deviation of the Gaussian smoothing kernal. Too large a value will apply too much smoothing, too small and some spurious high points may be left behind. The fifth, the truncate value, controls after how many standard deviations the Gaussian kernal will get cut off (since it theoretically goes to infinity).

In [None]:
# Preparing data
iterdata = []
i = 0
for array in array_only:
    iterdata.append((array, i))
    i = i + 1

# Remove spurious points from array
fexec = lithops.FunctionExecutor()

fexec.map(smooth_CHM, iterdata)

results = fexec.get_result()

array_smooth_only = results

Now save a copy of filtered CHM. We will later use this in our code, so we'll output it into our data directory.

In [None]:
# Preparing data

iterdata = []
i = 0

for metadata, smooth in zip(metadata_only, array_smooth_only):
    iterdata.append(('chm_filter_' + str(i) + '.tif',metadata,1,-1,smooth,32611))
    i = i + 1

#Save the smoothed CHM
fexec = lithops.FunctionExecutor()

fexec.map(array2raster, iterdata)

results = fexec.get_result()

In [None]:
#Plot the filtered CHM
i = len(metadata_only) + 1
iterdata = []

for metadata, smooth in zip(metadata_only, array_smooth_only):
    iterdata.append((smooth, metadata, i))
    i = i + 1

fexec = lithops.FunctionExecutor()

fexec.map(make_plots, iterdata)

results = fexec.get_result()

In [None]:
# Preparing data
iterdata = []
i = 0

for array in array_only:
    iterdata.append((array, i))
    i = i + 1

fexec = lithops.FunctionExecutor()

fexec.map(nan_to_num, iterdata)

array_smooth_only = fexec.get_result()

In [None]:
# array_smooth_only

## Determine local maximums

Now we will run an algorithm to determine local maximums within the image. Setting indices to 'False' returns a raster of the maximum points, as opposed to a list of coordinates. The footprint parameter is an area where only a single peak can be found. This should be approximately the size of the smallest tree. Information on more sophisticated methods to define the window can be found in Chen (2006).

In [None]:
# Calculate local maximum points in the smoothed CHM
# Preparing data
iterdata = []
i = 0

for array in array_smooth_only:
    iterdata.append((array, i))
    i = i + 1

fexec = lithops.FunctionExecutor(runtime_memory=2048)

fexec.map(peak_max, iterdata)

results = fexec.get_result()

local_maxi = results

Our new object `local_maxi` is an array of boolean values where each pixel is identified as either being the local maximum (`True`) or not being the local maximum (`False`).

In [None]:
# local_maxi

This is very helpful, but it can be difficult to visualizee boolean values using our typical numeric plotting procedures as defined in the `plot_band_array` function above. Therefore, we will need to convert this boolean array to an numeric format to use this function. Booleans convert easily to integers with values of `False=0` and `True=1` using the `.astype(int)` method.

In [None]:
# local_maxi.astype(int)

Next ,we can plot the raster of local maximums bo coercing the boolean array into an array ofintegers inline. The following figure shows the difference in finding local maximums for a filtered vs. non-filtered CHM.

We will save the graphics (.png) in an outputs folder sister to our working directory and data outputs (.tif) to our data directory.

In [None]:
# Preparing data
iterdata = []
j = 0

for maxi, metadata in zip(local_maxi, metadata_only):
    iterdata.append((maxi, metadata, j))
    j = j + 1

fexec = lithops.FunctionExecutor()

# Making plots of local_maximus
fexec.map(make_plots_local_maximus, iterdata)

fexec.get_result()

In [None]:
# Preparing data
iterdata = []
i = 0

for metadata, maxi in zip(metadata_only, local_maxi):
    iterdata.append(('maximum_' + str(i) + '.tif',metadata,1,-1,maxi,32611))
    i = i + 1

# Converting array to geotiff file
fexec = lithops.FunctionExecutor()

fexec.map(array2raster, iterdata)

fexec.get_result()

If we were to look at the overlap between the tree crowns and the local maxima from each method, it would appear a bit like this raster.

 <figure>
	<a href="https://raw.githubusercontent.com/NEONScience/NEON-Data-Skills/main/graphics/raster-general/raster-classification-filter-vs-nonfilter.jpg">
	<img src="https://raw.githubusercontent.com/NEONScience/NEON-Data-Skills/main/graphics/raster-general/raster-classification-filter-vs-nonfilter.jpg"></a>
	<figcaption> The difference in finding local maximums for a filtered vs.
	non-filtered CHM.
	Source: National Ecological Observatory Network (NEON)
	</figcaption>
</figure>


Apply labels to all of the local maximum points

In [None]:
#Identify all the maximum points
iterdata = []
i = 0

for maxi in local_maxi:
    iterdata.append((maxi, i))
    i = i + 1

fexec = lithops.FunctionExecutor()

fexec.map(markers, iterdata)

markers_only = fexec.get_result()

Next we will create a mask layer of all of the vegetation points so that the watershed segmentation will only occur on the trees and not extend into the surrounding ground points. Since 0 represent ground points in the CHM, setting the mask to 1 where the CHM is not zero will define the mask

In [None]:
#Create a CHM mask so the segmentation will only occur on the trees
fexec = lithops.FunctionExecutor()

fexec.map(create_mask, iterdata)

mask_only = fexec.get_result()

## Watershed segmentation

As in a river system, a watershed is divided by a ridge that divides areas. Here our watershed are the individual tree canopies and the ridge is the delineation between each one.

<figure>
	<a href="https://raw.githubusercontent.com/NEONScience/NEON-Data-Skills/main/graphics/raster-general/raster-classification-watershed-segments.png">
	<img src="https://raw.githubusercontent.com/NEONScience/NEON-Data-Skills/main/graphics/raster-general/raster-classification-watershed-segments.png"></a>
	<figcaption> A raster classified based on watershed segmentation.
	Source: National Ecological Observatory Network (NEON)
	</figcaption>
</figure>

Next, we will perform the watershed segmentation which produces a raster of labels.

In [None]:
# Preparing data
iterdata = []
i = 0

for smooth, mark, mask in zip(array_smooth_only, markers_only, mask_only):
    iterdata.append((smooth, mark, mask, i))
    i = i + 1

fexec = lithops.FunctionExecutor()

fexec.map(perform_watershed, iterdata)

labels_only = []
labels_for_plot_only = []
max_labels_only = []

results = fexec.get_result()
for labels, labels_for_plot, max_labels in results:
    labels_only.append(labels)
    labels_for_plot_only.append(labels_for_plot)
    max_labels_only.append(max_labels)

In [None]:
# Preparing data
iterdata = []
i = 0

for labels_for_plots, metadata, max_labels in zip(labels_for_plot_only, metadata_only, max_labels_only):
    iterdata.append((labels_for_plots, metadata, max_labels, i))
    i = i + 1

# Making plots of segments
fexec = lithops.FunctionExecutor()

fexec.map(make_plots_segments, iterdata)

fexec.get_result()

In [None]:
# Preparing data
iterdata = []
i = 0

for labels, metadata in zip(labels_only, metadata_only):
    iterdata.append(('labels' + str(i) + '.tif',metadata,1,-1,labels,32611))
    i = i + 1

# Converting labels to geotiff file
fexec = lithops.FunctionExecutor()

fexec.map(array2raster, iterdata)

fexec.get_result()

Now we will get several properties of the individual trees will be used as predictor variables.

In [None]:
# Preparing data
iterdata = []
i = 0

for labels, array in zip(labels_only, array_only):
    iterdata.append((labels, array, i))
    i = i + 1

fexec = lithops.FunctionExecutor()

fexec.map(region_props, iterdata)

results = fexec.get_result()

tree_properties_only = []
X_only = []
tree_ids_only = []

for tree_properties, X, tree_ids in results:
    tree_properties_only.append(tree_properties)
    X_only.append(X)
    tree_ids_only.append(tree_ids)