This notebook generates raster validation images with the same size as the Planet RGB image chunks. The input field boundary validation data can be provided in raster or vector format. 

## Load packages and modules

In [9]:
from glob import glob
import os
import rasterio
from rasterio import features
import numpy as np
import geopandas as gpd
import sys
import cv2
from shapely.geometry import box
from scipy import ndimage
from skimage import morphology,measure
import subprocess

## Set parameters

In [6]:
# input vector or raster file of the validation crop field polygons
# crop_field_path='input_data/Crop field boundry mapping Mozambique.shp'
crop_field_path='input_data/nasa_rwanda_field_boundary_competition_labels_train_mosaic.tif'

# input folder for the Planet image chunks
input_folder='results/RGB_chunks'

# output folder to store validation rasters
out_folder='results/groundtruth'

# prefix of output validation field exents and boundaries rasters
country='Rwanda'
prefix_extent=country+'_groundtruth_crop_field_extent_'
prefix_bound=country+'_groundtruth_crop_field_bound_'

## Search for RGB image chunks

In [4]:
# strings of the year and a month to search for RGB chunks
str_year='2021'
str_month='12'
# search Planet image chunks
fn_prefix=country+'_planet_medres_visual_'
images=glob(input_folder+'/'+fn_prefix+str_year+'_'+str_month+'*.tif')
print('found {} Planet RGB chunks'.format(len(images)))

found 18113 Planet RGB chunks


In [5]:
if not os.path.isdir(out_folder):
    os.makedirs(out_folder)

## Prepare validation data chunks

If field boundaries are provided as vector, following steps are carried out first:
- Rasterise field polygon shapefile to extent of Planet RGB chunks
- Extract field extent raster (boundary excluded to keep crop fields disconnected)
- Extract field boundary raster (optionally with a 2 pixel dilation)

Rasters are saved only if they contain crop fields.

In [None]:
if (crop_field_path.split('.')[-1]=='geojson') or (crop_field_path.split('.')[-1]=='shp'):
    print('reading crop field boundary in vector format')
    
    # open one raster to check crs
    ds=rasterio.open(images[0])
    crs=ds.crs.to_string()
    
    # read in vector polygons
    crop_field=gpd.read_file(crop_field_path)
    # reproject to the same crs
    crop_field=crop_field.to_crs(crs)

    print('{} polygons in the vector validation file'.format(len(crop_field)))
    # Get list of geometries for all features in vector file
    geoms = [shape for shape in crop_field.geometry]
    geoms_bound=[geom.boundary for geom in geoms]
    for image in images: # loop through chunks
        # open raster
        ds=rasterio.open(image)
        crs=ds.crs.to_string()

        # reproject to the same crs
        #crop_field=crop_field.to_crs(crs)

        # Rasterize polygon to get initial extent (boundary included)
        extent = rasterio.features.rasterize(geoms,
                                        out_shape = ds.shape,
                                        fill = 0,
                                        out = None,
                                        transform = ds.transform,
                                        all_touched = False,
                                        default_value = 1,
                                        dtype = None)
        # rasterise boundary of polygon
        bound=rasterio.features.rasterize(geoms_bound,
                                        out_shape = ds.shape,
                                        fill = 0,
                                        out = None,
                                        transform = ds.transform,
                                        all_touched = True,
                                        default_value = 1,
                                        dtype = None)

        # remove field boundary pixels from extent so that the fields are not connected
        extent[bound==1]=0

        # dilate field boundary
#         bound = cv2.dilate(bound,cv2.getStructuringElement(cv2.MORPH_CROSS,(2,2)),iterations = 1)

        # export chunk if not empty
        if np.nansum(extent)>0:
            # extract metadata from RGB chunk
            kwargs = ds.meta
            kwargs.update(dtype=rasterio.uint8,count=1,compress='deflate')
            chunk_id=os.path.basename(image)[:-4].split('_')[-2:]

            # save extent chunk raster
            out_exent=os.path.join(out_folder, prefix_extent+'_'.join(chunk_id)+'.tif')
            with rasterio.open(out_exent, 'w', **kwargs) as dst:
                dst.write_band(1, extent)   
            # save boundary chunk raster
            out_bound=os.path.join(out_folder, prefix_bound+'_'.join(chunk_id)+'.tif')
            with rasterio.open(out_bound, 'w', **kwargs) as dst:
                dst.write_band(1, bound)
        ds=None
elif crop_field_path.split('.')[-1]=='tif':
    print('reading crop field boundary in raster format')
    
    # read in mosaic tif
    ds_validation=rasterio.open(crop_field_path)
    np_label=ds_validation.read(1)
    
    # get bounding box
    validation_bbox=box(*ds_validation.bounds)
    print('bbox of validation raster: ',validation_bbox)
    
    # get extent from boundary
    # get extent areas by filling holes of boundaries
    np_extent=ndimage.binary_fill_holes(np_label)
    # close boundaries using diamond kernel size of 1 (not exactly equal to cv2.MORPH_CROSS) to fill small gaps between boundaries
    np_label_closed=morphology.closing(np_label,footprint=morphology.diamond(1))
    # exclude closed boundaries from exent
    np_extent[np_label_closed==1]=0
    
    # export mosaic extent as geotiff
    kwargs = ds_validation.meta
    kwargs.update(dtype=rasterio.uint8,count=1,compress='deflate')
    mosaic_extent=os.path.join(out_folder, country+'_valiadation_field_extent_mosaic.tif')
    with rasterio.open(mosaic_extent, 'w', **kwargs) as dst:
        dst.write_band(1, np_extent)
        
    # identify chunks intersecting the validation extent/boundary raster
    for image in images:
        ds_chunk=rasterio.open(image)
        crs=ds_chunk.crs.to_string()
        minx,miny,maxx,maxy=ds_chunk.bounds
        chunk_geom=box(*ds_chunk.bounds)
        ds_chunk.close()
        chunk_id=os.path.basename(image)[:-4].split('_')[-2:]
        if chunk_geom.intersects(validation_bbox):
            # clip validation extent and bounday rasters to the extent of the identified chunk
            # clip extent
            out_exent=os.path.join(out_folder, prefix_extent+'_'.join(chunk_id)+'.tif')
            gdal_cmd=["gdal_translate", "-of", "GTiff",'-co','COMPRESS=DEFLATE',
                      "-projwin",str(minx),str(maxy),str(maxx),str(miny),
                      '-projwin_srs',crs,mosaic_extent,out_exent]
            p1=subprocess.run(gdal_cmd,stdout=subprocess.DEVNULL)
            # clip boundary
            out_bound=os.path.join(out_folder, prefix_bound+'_'.join(chunk_id)+'.tif')
            gdal_cmd=["gdal_translate", "-of", "GTiff",'-co','COMPRESS=DEFLATE',
                      "-projwin",str(minx),str(maxy),str(maxx),str(miny),
                      '-projwin_srs',crs,crop_field_path,out_bound]
            p2=subprocess.run(gdal_cmd,stdout=subprocess.DEVNULL)
            if (p1.returncode==0)and(p2.returncode==0):
                # remove empty chunks
                ds_extent=rasterio.open(out_exent)
                extent=ds_extent.read(1)
                n_valid=np.nansum(extent)
                ds_extent.close()
                if not n_valid>0:
                    os.remove(out_exent)
                    os.remove(out_bound)

reading crop field boundary in raster format
bbox of validation raster:  POLYGON ((3386733.6303 -177052.044, 3386733.6303 -154923.5244, 3372076.8301 -154923.5244, 3372076.8301 -177052.044, 3386733.6303 -177052.044))




In [15]:
# do mosaic to test if chunks correctly covered all fields
! gdal_merge.py -o results/groundtruth/Rwanda_extent_mosaic.tif -co COMPRESS=Deflate results/groundtruth/Rwanda_groundtruth_crop_field_extent*.tif
! gdal_merge.py -o results/groundtruth/Rwanda_bound_mosaic.tif -co COMPRESS=Deflate results/groundtruth/Rwanda_groundtruth_crop_field_bound*.tif

0...10...20...30...40...50...60...70...80...90...100 - done.
0...10...20...30...40...50...60...70...80...90...100 - done.
