Notebook to clip SPOT RGB images into image chunks of required size by the crop boundary deep learning model, and remove chunks outside AOIs or with no crop using DE Africa's crop mask 2019.

## Load packages and modules

In [1]:
from glob import glob
import os
import subprocess
import rasterio
import datacube
import numpy as np
import geopandas as gpd
from shapely.geometry import box



## Set parameters

In [2]:
# country/region name
country = 'Rwanda'
#country = 'Mozambique'

mask_crop = False

In [3]:
if country == 'Rwanda':
    # input shapefile of AOI used for downloading Planet images
    AOI_path='input_data/Rwanda_Boundary.shp'
    
    # year of data
    str_year='2020'
else:
    AOI_path='input_data/Mozambique_AOI_for_crop_boundary_validation.shp'
    # year of data
    str_year='2021'

# input folder for the mosaic images
input_folder='results'

# output folder to store image chunks
out_folder=input_folder+'/RGB_chunks'

# tiling parameters
tile_size=256 # chunk size

overlap=8 # number of overlapping pixels between chunks

In [4]:
if not os.path.isdir(out_folder):
    os.makedirs(out_folder)

## Do clipping into chunks and abandon those totally outside AOI or with no crops

In [5]:
images=glob(input_folder+'/'+country+'_*_mosaic.tif')# search files
print('found {} mosaic images'.format(len(images)))

found 1 mosaic images


In [6]:
AOIs=gpd.read_file(AOI_path)
AOIs

Unnamed: 0,ADM0_CODE,ADM0_NAME,CONTINENT,ISO3,ISO2,UNI,UNDP,FAOSTAT,GAUL,RIC_ISO3,REC_ISO3,HIH,geometry
0,205,Rwanda,Africa,RWA,RW,646.0,RWA,184.0,205.0,ICPAC-RWA,OTHER-RWA,1,"POLYGON ((30.46679 -1.06294, 30.46446 -1.06678..."


- Extract chunk window (starting from left-top coner of mosaic image)
- Query DE Africa crop mask within window only when chunk window intersects AOI
- Export chunk as RGB bands raster only when crop exists within chunk

In [7]:
%%time
for image in images:  # loop through all monthly mosaics
    print('clipping file', image, 'into chunks...')

    # read in and get information of the mosaic
    ds = rasterio.open(image)
    crs = ds.crs.to_string()
    # x_min,y_min,x_max,y_max=ds.bounds
    width, height = ds.width, ds.height
    AOIs = AOIs.to_crs(crs)

    id_column = 0  # record column id
    for column_start in range(0, width-tile_size+overlap, tile_size-overlap):
        id_column += 1
        id_row = 0  # record row id
        for row_start in range(0, height-tile_size+overlap, tile_size-overlap):
            id_row += 1

            # calculate clipping window
            row_end, column_end = row_start+tile_size, column_start+tile_size

            # load DE Africa crop mask 2019
            x_min, y_max = ds.xy(row_start, column_start)
            x_max, y_min = ds.xy(row_end, column_end)

            # get chunk bounding box
            chunk_geom = box(x_min, y_min, x_max, y_max)

            for index, row in AOIs.iterrows():  # only do clipping when quad intersects with AOI
                AOI = AOIs.iloc[[index]]
                if chunk_geom.intersects(AOI.geometry.iloc[0]):
                    cropped = True
                    if mask_crop:
                        dc = datacube.Datacube(app='cropland_extent')
                        query = {
                            'time': ('2019'),
                            'x': (x_min, x_max),
                            'y': (y_min, y_max),
                            'resolution': (-10, 10),
                            'crs': crs,
                        }
                        cm = dc.load(product='crop_mask', **query).squeeze()
                        np_crop_mask = cm['mask'].to_numpy()

                        # only do clipping and exporting if crop exists in the chunk
                        if np.nansum(np_crop_mask)==0: cropped=False
                    if cropped:
                        # NOTE: increase zfill when needed
                        outname = os.path.basename(image)
                        outname = outname[:outname.index('_mosaic')]
                        outname = os.path.join(out_folder, '_'.join(
                            [outname, str(id_row).zfill(3), str(id_column).zfill(3)])+'.tif')
                        if not os.path.exists(outname):
                            # print('writing to file ',outname)
                            gdal_cmd = ["gdal_translate", "-of", "GTiff", "-b", "1", "-b", "2", "-b", "3", '-co', 'COMPRESS=DEFLATE',
                                        "-srcwin", str(column_start), str(row_start), str(tile_size), str(tile_size), image, outname]
                            subprocess.call(
                                gdal_cmd, stdout=subprocess.DEVNULL)
                            # remove empty tiles
                            image_tile = rasterio.open(outname)
                            if image_tile.statistics(2).std==0:
                                #print("Empty tile removed")
                                os.system(f"rm -f {outname}")
    ds = None

clipping file results/Rwanda_2020-02-09_mosaic.tif into chunks...
CPU times: user 22.3 s, sys: 3.31 s, total: 25.6 s
Wall time: 1min 29s


In [8]:
# # test if mosaic correctly removes unwanted chunks
# ! gdal_merge.py -o ../experiments/Mozambique_RGB/clipped/mosaic_test.tif -co COMPRESS=Deflate ../experiments/Mozambique_RGB/clipped/*.tif