Notebook to rasterise and clip crop field validation polygons, so that the validation extent and boundary chunks have the same size as the Planet RGB chunks. 

## Load packages and modules

In [1]:
from glob import glob
import os
import rasterio
from rasterio import features
import numpy as np
import geopandas as gpd
import sys
import cv2



## Set parameters

In [2]:
# input vector file of the validation crop field polygons
crop_field_path='input_data/Crop field boundry mapping Mozambique.shp'

# input folder for the Planet image chunks
input_folder='results/RGB_chunks'

# output folder to store validation rasters
out_folder='results/groundtruth'

# prefix of output validation field exents and boundaries rasters
country='Mozambique'
prefix_extent=country+'_groundtruth_crop_field_extent_'
prefix_bound=country+'_groundtruth_crop_field_bound_'

## Search for RGB image chunks and read validation polygons

In [3]:
# strings of the year and a month to search for RGB chunks
str_year='2021'
str_month='12'
# search Planet image chunks
fn_prefix='Mozambique_planet_medres_visual_'
images=glob(input_folder+'/'+fn_prefix+str_year+'_'+str_month+'*.tif')
print('found {} Planet RGB chunks'.format(len(images)))

found 1007 Planet RGB chunks


In [4]:
crop_field=gpd.read_file(crop_field_path)
print('{} polygons in the vector validation file'.format(len(crop_field)))

1691 polygons in the vector validation file


In [5]:
if not os.path.isdir(out_folder):
    os.makedirs(out_folder)

## Rasterise field polygons to field extent and boundary chunks with empty chunks excluded
- Rasterise field polygon shapefile to extent of each Planet RGB chunk
- Extract field extent raster (boundary excluded to keep crop fields disconnected)
- Extract field boundary raster (2 pixels dilation)
- Export above two rasters only if field exists in chunk

In [6]:
for image in images:
    # open raster
    ds=rasterio.open(image)
    crs=ds.crs.to_string()

    # reproject to the same crs
    crop_field=crop_field.to_crs(crs)

    # Get list of geometries for all features in vector file
    geoms = [shape for shape in crop_field.geometry]

    # Rasterize polygon to get initial extent (boundary included)
    extent = rasterio.features.rasterize(geoms,
                                    out_shape = ds.shape,
                                    fill = 0,
                                    out = None,
                                    transform = ds.transform,
                                    all_touched = False,
                                    default_value = 1,
                                    dtype = None)
    # rasterise boundary of polygon
    geoms_bound=[geom.boundary for geom in geoms]
    bound=rasterio.features.rasterize(geoms_bound,
                                    out_shape = ds.shape,
                                    fill = 0,
                                    out = None,
                                    transform = ds.transform,
                                    all_touched = True,
                                    default_value = 1,
                                    dtype = None)
    
    # remove field boundary pixels from extent so that the fields are not connected
    extent[bound==1]=0
    
    # dilate field boundary
    bound = cv2.dilate(bound,cv2.getStructuringElement(cv2.MORPH_CROSS,(2,2)),iterations = 1)
    
    # export chunk if not empty
    if np.nansum(extent)>0:
        # extract metadata from RGB chunk
        kwargs = ds.meta
        kwargs.update(dtype=rasterio.uint8,count=1,compress='deflate')
        chunk_id=os.path.basename(image)[:-4].split('_')[-2:]
        
        # save extent chunk raster
        out_exent=os.path.join(out_folder, prefix_extent+'_'.join(chunk_id)+'.tif')
        with rasterio.open(out_exent, 'w', **kwargs) as dst:
            dst.write_band(1, extent)   
        # save boundary chunk raster
        out_bound=os.path.join(out_folder, prefix_bound+'_'.join(chunk_id)+'.tif')
        with rasterio.open(out_bound, 'w', **kwargs) as dst:
            dst.write_band(1, bound)
    ds=None

In [22]:
# do mosaic to test if chunks correctly covered all fields
! gdal_merge.py -o results/groundtruth/Mozmabique_extent_mosaic.tif -co COMPRESS=Deflate results/groundtruth/Mozambique_groundtruth_crop_field_extent*.tif
! gdal_merge.py -o results/groundtruth/Mozmabique_bound_mosaic.tif -co COMPRESS=Deflate results/groundtruth/Mozambique_groundtruth_crop_field_bound*.tif

0...10...20...30...40...50...60...70...80...90...100 - done.
0...10...20...30...40...50...60...70...80...90...100 - done.
