This notebook implements retiling of the predictions into larger tiles, instance segmentation and postprocessing of the segmentation results.

## Load packages and modules

In [None]:
import numpy as np
import imageio.v2 as imageio
from osgeo import gdal
import os
from glob import glob
from skimage import measure, util
import pandas as pd
import geopandas as gpd
from shapely.ops import unary_union
import rasterio
from deafrica_tools.spatial import xr_rasterize
import subprocess
import sys
from shapely.geometry import box
import rioxarray
import datacube
module_paths=['../1_Identify_months_thresholds_model_evaluation']
for module_path in module_paths:
    if module_path not in sys.path:
        sys.path.append(module_path)

In [2]:
# import functions from modules
from datasets import export_geotiff
from instance_segment import InstSegm

## Define parameters

In [3]:
# tiles_shp='input_data/Rwanda_tiles_edited.shp'
# extent_mosaic='results/processed/Rwanda_extent_prob_2021_04_10_12_mosaic.tif'
# bound_mosaic='results/processed/Rwanda_bound_prob_2021_04_10_12_mosaic.tif'
# instance_mosaic='results/processed/Rwanda_field_instance_2021_04_10_12_mosaic.tif'
# grd_search_df='../1_Identify_months_thresholds_model_evaluation/results/averaged/Rwanda_grid_search_thresholds.csv' # grid search results of thresholds
# country = 'Rwanda'
# str_year='2021'
# out_crs='epsg:3857'

In [8]:
tiles_shp='input_data/Mozambique_AOI_for_crop_boundary_validation.shp'
extent_mosaic='results/processed/Mozambique_extent_prob_2021_04_08_12_mosaic.tif'
bound_mosaic='results/processed/Mozambique_bound_prob_2021_04_08_12_mosaic.tif'
instance_mosaic='results/processed/Mozambique_field_instance_2021_04_08_12_mosaic.tif'
grd_search_df='../1_Identify_months_thresholds_model_evaluation/results/averaged/grid_search_thresholds.csv' # grid search results of thresholds
country = 'Mozambique'
str_year='2021'
out_crs='epsg:3857'

In [9]:
out_folder='results_retiled_new'
if not os.path.isdir(out_folder):
    os.makedirs(out_folder)

## Read in tiles

In [10]:
tiles=gpd.read_file(tiles_shp).to_crs(out_crs)
bboxes=tiles.bounds
crs=tiles.crs.to_string()
tiles

Unnamed: 0,ADM2_PT,ADM1_PT,Area_km2,geometry
0,Nicoadala,Zambezia,1202.755,"POLYGON ((4080024.698 -1975813.774, 4081285.53..."


In [11]:
# read in or provide the best thresholds
if not grd_search_df is None:
    hp_df=pd.read_csv(grd_search_df)
    t_ext_best=hp_df.iloc[hp_df['mIoU'].idxmax()]['t_ext']
    t_bnd_best=hp_df.iloc[hp_df['mIoU'].idxmax()]['t_bound']
else:
    t_ext_best=0.3
    t_bnd_best=0.1

### Generating vector and raster field boundary outputs
Following steps are implemented:
* clip boundary and extent probabilities mosaics to tile extent
* do instance segmentation on the tiled predictions
* label individual crop fields
* filter out crop field instances without crops using DE Africa crop extent layer and wofs layer
* polygonise and export masked fields in vector and raster formats

In [12]:
t_pct_crop=0 # threshold percentage of crop pixels within field object
t_wofs=0.5 # threshold wet frequency for wofs

In [13]:
shp_tiles_masked=[]
for index,tile in tiles.iterrows():
    print('processing tile ',index)
    # get bbox
    minx,miny,maxx,maxy=bboxes.iloc[index]
    # clip predictions mosaic using tile
    print('retiling the boundary, extent and instance rasters...')
    out_bound=os.path.join(out_folder, country+'_bound_prob_tile_'+str(index)+'.tif')
    gdal_cmd=["gdal_translate", "-of", "GTiff",
              "-projwin",str(minx),str(maxy),str(maxx),str(miny),
              '-projwin_srs',crs,bound_mosaic,out_bound]
    subprocess.run(gdal_cmd,stdout=subprocess.DEVNULL)
    
    out_extent=out_bound.replace('bound','extent')
    gdal_cmd=["gdal_translate", "-of", "GTiff",
          "-projwin",str(minx),str(maxy),str(maxx),str(miny),
          '-projwin_srs',crs,extent_mosaic,out_extent]
    subprocess.run(gdal_cmd,stdout=subprocess.DEVNULL)
    
    out_instance=out_bound.replace('bound_prob','field_instance')
    gdal_cmd=["gdal_translate", "-of", "GTiff",
          "-projwin",str(minx),str(maxy),str(maxx),str(miny),
          '-projwin_srs',crs,instance_mosaic,out_instance]
    subprocess.run(gdal_cmd,stdout=subprocess.DEVNULL)
    
    # read in clipped predictions
    ds_extent = gdal.Open(out_extent)
    geotrans=ds_extent.GetGeoTransform()
    proj=ds_extent.GetProjection()
    ds_extent=None

    extent_prob=imageio.imread(out_extent)
    bound_prob=imageio.imread(out_bound)
    field_instance=imageio.imread(out_instance)

    # do segmentation
    print('doing segmentation...')
    instances_predicted=InstSegm(extent_prob, bound_prob, t_ext=t_ext_best, t_bound=t_bnd_best)
    instances_predicted+=1 # non-field/background value from -1 to 0
    print('number of field instances after segmentation: ',len(np.unique(instances_predicted))-1)

    # label connected regions, non-field will be labelled as 0
    instances_labelled,n_features= measure.label(instances_predicted, background=0,return_num=True)
    print('number of field instances after labelling: ',n_features)
    
    # mask out originally nodata pixels
    instances_labelled[field_instance==0]=0
    
    # export labelled instances as geotiff
    outname=os.path.join(out_folder,os.path.basename(extent_mosaic).replace('mosaic','unmasked_tile_')[:-4]+str(index)+'.tif')
    outname=outname.replace('extent_prob','field_instance')
    export_geotiff(outname,instances_labelled,geotrans,proj,gdal.GDT_Int32)
    
    # mask using DE Africa crop mask and wofs
    # load DE Africa crop mask 2019
    xr_ds=rioxarray.open_rasterio(outname).to_dataset(name='field_instance')
    dc = datacube.Datacube(app='cropland_extent')
    cm = dc.load(product='crop_mask',measurements=['filtered'],like=xr_ds,time=('2019'))
    np_crop_mask=cm['filtered'].squeeze().to_numpy()
    np_crop_mask=np_crop_mask==1
    
    # load wofs layer
    dc = datacube.Datacube(app='water_extent')
    wofs = dc.load(product="wofs_ls_summary_alltime",measurements=['frequency'],like=xr_ds,resampling='nearest')
#     wofs = dc.load(product="wofs_ls_summary_annual",measurements=['frequency'],like=xr_ds,time=('2021'),resampling='nearest')
    water_mask=wofs['frequency'].squeeze().to_numpy()
    
    overall_mask=(np_crop_mask)&((water_mask<t_wofs)|(np.isnan(water_mask)))

    table = measure.regionprops_table(
    instances_labelled,
    overall_mask,
    properties=('label','intensity_mean'),
    )
    condition = table['intensity_mean']>t_pct_crop
    # zero out labels not meeting condition
    input_labels = table['label']
    output_labels = input_labels * condition
    instances_labelled_masked = util.map_array(
        instances_labelled, input_labels, output_labels
    )

    # export masked as geotiff
    outname_masked=outname.replace('unmasked','masked')
    export_geotiff(outname_masked,instances_labelled_masked,geotrans,proj,gdal.GDT_Int32)
        
    # polygonise masked
    print('polygonizing filtered field instances...')
    outname_shp=outname_masked.replace('.tif','.shp')
    shp_tiles_masked.append(outname_shp)
    if os.path.exists(outname_shp):
        print('file existing, skipping...')
    else:
        cmd=['gdal_polygonize.py','-8','-mask',outname_masked,'-b','1',outname_masked,outname_shp]
        subprocess.run(cmd)
        
    # remove rasters to save space
    os.remove(out_bound)
    os.remove(out_extent)
    os.remove(out_instance)
    os.remove(outname)

processing tile  0
retiling the boundary, extent and instance rasters...
doing segmentation...
number of field instances after segmentation:  322687
number of field instances after labelling:  328255
polygonizing filtered field instances...
0...10...20...30...40...50...60...70...80...90...Creating output results_retiled_new/Mozambique_field_instance_2021_04_08_12_masked_tile_0.shp of format ESRI Shapefile.
100 - done.


In [None]:
# gdf_tiles_masked=[gpd.read_file(x) for x in shp_tiles_masked]
# gdf_merged_masked=gpd.GeoDataFrame(pd.concat(gdf_tiles_masked))
# crs=gdf_merged_masked.crs
# gdf_merged_masked=gdf_merged_masked.buffer(-2,resolution=1)
# geoms_dissolved = gdf_merged_masked.unary_union
# geoms_dissolved=geoms_dissolved.buffer(2,resolution=1)
# gdf_dissolved=gpd.GeoDataFrame({"geometry": [geoms_dissolved]}, crs=crs)
# gdf_dissolved.to_file('results_retiled/Rwanda_field_instance_2021_04_10_12_tiles_masked_merged.shp')

## Do mosaic

In [2]:
!gdal_merge.py -o results_retiled_new/Rwanda_field_instance_2021_04_10_12_masked_mosaic.tif results_retiled_new/Rwanda_field_instance_2021_04_10_12_masked_tile_*.tif -co COMPRESS=DEFLATE

0...10...20...30...40...50...60...70...80...90...100 - done.
