This notebook crops field crop and boundary predictions, masks out field instances with no crops using DE Africa crop mask, and mosaick all chunks.

## Load packages and modules

In [1]:
from glob import glob
import os
import subprocess
import rasterio
import datacube
import numpy as np
import geopandas as gpd
import rioxarray
from skimage import measure,morphology
from osgeo import gdal



## Define parameters

In [2]:
country = 'Mozambique'
str_year='2021'
# input folder of input chunks
input_folder='results'
# folder to export results
out_folder='results/processed'
# number of pixels to crop from chunk borders
crop_size=2
# number of pixels for dilation of crop mask
n_dilate=2

In [3]:
if not os.path.isdir(out_folder):
    os.makedirs(out_folder)

## Identify list of segmented instances chunks

In [4]:
files_instances=glob(input_folder+'/'+country+'_average_field_instance_'+str_year+'*.tif')
print('found {} cropped field instances chunks'.format(len(files_instances)))

found 1007 cropped field instances chunks


## Crop extent, boundary and instance chunks

In [5]:
%%time
# list of file names (cropped)
files_instances_cropped=[]
files_bound_cropped=[]
files_extent_cropped=[]
for file_instances in files_instances:
    # extract size info
    ds = gdal.Open(file_instances)
    xsize=ds.RasterXSize-2*crop_size
    ysize=ds.RasterYSize-2*crop_size
    ds=None
    
    # crop field instances chunk
    outname_instances=os.path.join(out_folder,os.path.basename(file_instances)[:-4]+'_cropped.tif')
    files_instances_cropped.append(outname_instances)
#     if not os.path.exists(outname):
    gdal_cmd=["gdal_translate", "-of", "GTiff","-srcwin",str(crop_size),str(crop_size),str(xsize),str(ysize),file_instances,outname_instances]
    subprocess.run(gdal_cmd,stdout=subprocess.DEVNULL)
    
    # crop field boundary prediction chunk
    file_bound_pred=file_instances.replace('field_instance','bound_prob')
    outname_bound=os.path.join(out_folder,os.path.basename(file_bound_pred)[:-4]+'_cropped.tif')
    files_bound_cropped.append(outname_bound)
    gdal_cmd=["gdal_translate", "-of", "GTiff","-srcwin",str(crop_size),str(crop_size),str(xsize),str(ysize),file_bound_pred,outname_bound]
    subprocess.run(gdal_cmd,stdout=subprocess.DEVNULL)
    
    # crop field extent prediction chunk
    file_extent_pred=file_instances.replace('field_instance','extent_prob')
    outname_extent=os.path.join(out_folder,os.path.basename(file_extent_pred)[:-4]+'_cropped.tif')
    files_extent_cropped.append(outname_extent)
    gdal_cmd=["gdal_translate", "-of", "GTiff","-srcwin",str(crop_size),str(crop_size),str(xsize),str(ysize),file_extent_pred,outname_extent]
    subprocess.run(gdal_cmd,stdout=subprocess.DEVNULL)

CPU times: user 1.27 s, sys: 10.4 s, total: 11.7 s
Wall time: 3min 22s


## Mask noncrop instances for all chunks and export as geotiff

- Read in chunk and load/resample DE Africa crop mask layer; 
- Only instances with no overlapping crop pixels will be masked out;
- Export masked chunk as geotiff

In [6]:
%%time
files_instances_masked=[] # list of file names of masked field instance chunks
for file_instances_cropped in files_instances_cropped:
    # read in raster
    ds = rasterio.open(file_instances_cropped)
    crs=ds.crs.to_string()
#     bbox=ds.bounds
    arr_instances=ds.read(1)

    # load DE Africa crop mask 2019
    dc = datacube.Datacube(app='cropland_extent')
#     x_min,y_min,x_max,y_max=bbox[0],bbox[1],bbox[2],bbox[3]
#     query = {
#         'time': ('2019'),
#         'x': (x_min,x_max),
#         'y': (y_min,y_max),
#         'resolution':(-ds.res[0], ds.res[1]),
#         'crs':crs,
#         'output_crs':crs
#     }
#     cm = dc.load(product='crop_mask',**query).squeeze()
    xr_ds=rioxarray.open_rasterio(file_instances_cropped).to_dataset(name='field_instance')
    
    # extract numpy arrays
    cm = dc.load(product='crop_mask',like=xr_ds,time=('2019')).squeeze()
    np_crop_mask=cm['mask'].to_numpy()
    
#     # dilate crop mask to keep more crop fields
#     np_crop_mask=morphology.binary_dilation(np_crop_mask,footprint=morphology.disk(n_dilate))
    
    # mask noncrop instances as background (0)
    n_instances=np.max(arr_instances)
    arr_instances_masked=arr_instances.copy()
    for value in range(1,n_instances+1):
        if np.nansum((arr_instances==value)&(np_crop_mask==1))==0:
            arr_instances_masked[arr_instances==value]=0
    
    # export as geotiff
    out_meta = ds.meta
    outname=file_instances_cropped[:-4]+'_masked.tif'
    files_instances_masked.append(outname)
    with rasterio.open(outname, 'w', **out_meta) as dst:
        dst.write(arr_instances_masked, 1)

CPU times: user 3min 31s, sys: 1min 25s, total: 4min 56s
Wall time: 3min 15s


## Mosaic all chunks

In [7]:
# mosaic boundary probabilities
! gdal_merge.py -o results/processed/Mozambique_bound_prob_2021_04_08_12_mosaic.tif -co COMPRESS=Deflate results/processed/*average_bound_prob*_cropped.tif

0...10...20...30...40...50...60...70...80...90...100 - done.


In [8]:
# mosaic extent results
! gdal_merge.py -o results/processed/Mozambique_extent_prob_2021_04_08_12_mosaic.tif -co COMPRESS=Deflate results/processed/*average_extent_prob*_cropped.tif

0...10...20...30...40...50...60...70...80...90...100 - done.


In [9]:
# mosaic masked instance results
! gdal_merge.py -o results/processed/Mozambique_field_instance_2021_04_08_12_mosaic.tif -co COMPRESS=Deflate results/processed/*average_field_instance*_cropped_masked.tif

0...10...20...30...40...50...60...70...80...90...100 - done.


## Delete intermediate cropped chunks to save space (optional)

In [10]:
for fn in files_instances_cropped:
    if os.path.exists(fn):
        os.remove(fn)

In [11]:
for fn in files_bound_cropped:
    if os.path.exists(fn):
        os.remove(fn)

In [12]:
for fn in files_extent_cropped:
    if os.path.exists(fn):
        os.remove(fn)