Test vecterise field instances in each chunk and merge.

For each region of interest, e.g. a district, following steps can be used to extract all field instances:
- find all field instance chunks
- trim the tile edges while preserving small overlap between tiles
- convert instances to polygons
- shinks the polygons so they don't overlap
- dissolve all polygons, merging overlapping shapes
- explode multi-part geometries into seperate single geometries
- expand the instances to reduce boundary area
- (optionally) remove geometries that are too small
- simplify the geometries and save the results

In [1]:
from glob import glob
import os
import subprocess
import rasterio
import datacube
import numpy as np
import geopandas as gpd
import rioxarray
from skimage import measure,morphology
from osgeo import gdal
import rioxarray as rxr

from deafrica_tools.spatial import xr_vectorize
from datacube.utils.geometry import assign_crs




In [2]:
country = 'Mozambique'
str_year='2021'
# input folder of input chunks
input_folder='results'
# folder to export results
out_folder='results/processed'
# number of pixels to crop from chunk borders
# noting the actual overlap is 4 pixels 
crop_size=2
# number of pixels for dilation of crop mask
n_dilate=2

In [3]:
files_instances=glob(input_folder+'/'+country+'_average_field_instance_'+str_year+'*.tif')
print('found {} cropped field instances chunks'.format(len(files_instances)))

found 456 cropped field instances chunks


In [4]:
%%time
# list of file names (cropped)
files_instances_cropped = []

field_instances_all = []
for file_instances in files_instances:
    # extract size info
    ds = rasterio.open(file_instances)
    crs = ds.crs.to_string()
    xsize = ds.width-2*crop_size
    ysize = ds.height-2*crop_size
    ds = None

    # crop field instances chunk
    outname_instances = os.path.join(
        out_folder, os.path.basename(file_instances)[:-4]+'_edgecropped.tif')
    files_instances_cropped.append(outname_instances)
#     if not os.path.exists(outname):
    gdal_cmd = ["gdal_translate", "-of", "GTiff", "-srcwin", str(crop_size), str(
        crop_size), str(xsize), str(ysize), file_instances, outname_instances]
    subprocess.run(gdal_cmd, stdout=subprocess.DEVNULL)

    # vector output
    outname_vector = os.path.join(out_folder, os.path.basename(
        file_instances)[:-4]+'_edgecropped_vector.geojson')
    da = rxr.open_rasterio(outname_instances)
    da = assign_crs(da, crs)
    field_instances = xr_vectorize(da,
                                   attribute_col='attribute',
                                   # transform=None,
                                   crs=da.geobox.crs,
                                   dtype='float32',
                                   export_shp=outname_vector,
                                   verbose=False
                                   )
    da = None
    field_instances_all.append(field_instances)

CPU times: user 1min, sys: 4.73 s, total: 1min 5s
Wall time: 1min 35s


In [5]:
import pandas as pd

field_instances_merged = pd.concat(field_instances_all)

In [6]:
len(field_instances_merged)

49983

In [33]:
field_instances_combined = field_instances_merged.copy()
field_instances_combined['geometry'] = field_instances_combined.buffer(-3)

In [34]:
field_instances_combined = field_instances_combined.dissolve().explode(index_parts=True)
field_instances_combined.index = field_instances_combined.index.droplevel(level=0)

In [35]:
len(field_instances_combined)

31025

In [36]:
field_instances_combined['attribute'] = field_instances_combined.index.values
field_instances_combined['geometry'] = field_instances_combined.buffer(2).simplify(1)

In [37]:
field_instances_combined.head()

Unnamed: 0,attribute,geometry
0,0,"POLYGON ((4100053.977 -2002149.593, 4100058.34..."
1,1,"POLYGON ((4100080.817 -2002040.659, 4100104.95..."
2,2,"POLYGON ((4099915.154 -2001949.226, 4099923.68..."
3,3,"POLYGON ((4099927.390 -2001483.070, 4099926.18..."
4,4,"POLYGON ((4099902.269 -2001907.945, 4099904.15..."


In [38]:
path = "results/processed/Mozambique_field_instances_vector.geojson"
field_instances_combined.to_file(path, driver="GeoJSON")

In [48]:
len(field_instances_combined[field_instances_combined.area>20])

30727