This notebook implements morphological filtering and rule-based reclassification using external layers.

## load packages

In [None]:
%matplotlib inline
import os
import datacube
import warnings
import numpy as np
import geopandas as gpd
import pandas as pd
import xarray as xr
import rioxarray
from rasterio.enums import Resampling
from datacube.utils.cog import write_cog
from deafrica_tools.spatial import xr_rasterize
from skimage.morphology import binary_dilation,disk,area_closing,remove_small_holes
from skimage.filters.rank import modal
from skimage.segmentation import expand_labels
from odc.algo import xr_reproject
import matplotlib.pyplot as plt
from deafrica_tools.bandindices import calculate_indices
from deafrica_tools.coastal import get_coastlines
from glob import glob
import subprocess

### set input layers and parameters

In [None]:
output_crs='epsg:32736' # output crs: WGS84/UTM Zone 36S 
dict_map={'Tree crops':11,'Field crops':12,'Forest plantations':21,'Grassland':31,
                 'Wetland':41,'Water body':44,
                 'Settlements':51,'Bare soils':61,'Mangrove':70,'Mecrusse':71,
                'Broadleaved (Semi-) evergreen forest':72,'Broadleaved (Semi-) deciduous forest':74,'Mopane':75} # a dictionary of pixel value for each class
# file paths
mozambique_boundary_shp='Data/Mozambique_boundary.shp'
river_network_shp='Data/hotosm_moz_waterways_lines_filtered.shp' # OSM river network data
road_network_shp='Data/hotosm_moz_roads_lines_filtered.shp' # OSM road network data
google_building_raster='Data/GoogleBuildingLayer_Mozambique_rasterised.tif' # google bulding layer
hand_raster='Data/hand_Mozambique_UInt16.tif' # Hydrologically adjusted elevations, i.e. height above the nearest drainage (hand)
wsf2019_raster='Data/WSF2019_v1_Mozambique_clipped.tif' # 2019 WSF raster

## load layers

In [None]:
# import mozambique boundary and get bounding box
mozambique_boundary=gpd.read_file(mozambique_boundary_shp).to_crs(output_crs)
# load and pre-process external vector layers
road_network=gpd.read_file(road_network_shp).to_crs(output_crs) # import OSM road network data and reproject
road_network=road_network.loc[road_network['surface'].isin(['asphalt', 'paved', 'compacted', 'cobblestone', 
                                                             'concrete', 'metal', 'paving_stones', 
                                                             'paving_stones:30'])] # select road network by attributes
road_network.geometry=road_network.geometry.buffer(10) # buffer the road network by 10m

river_network=gpd.read_file(river_network_shp).to_crs(output_crs) # import OSM river network data and reproject
river_network=river_network.loc[river_network['waterway'].isin(['canal','river'])] # select river network by attribute

# load coastline layer and buffer
shorelines_gdf = get_coastlines(mozambique_boundary.bounds.iloc[0],crs=output_crs,layer='shorelines').to_crs(output_crs)
shorelines_gdf_2021=shorelines_gdf[shorelines_gdf['year']=='2021'] # select only 2021
shorelines_gdf_2021.geometry=shorelines_gdf_2021.geometry.buffer(50000) # buffer the road network by 50km

### Find prediction tiles

In [None]:
classification2021_rasters=glob("Results/Mozmabique_land_cover_prediction_tile_*.tif")
len(classification2021_rasters)

### loop through tiles for reclassification and export as geotiffs

In [None]:
# loop through tiles for reclassification
# for classification2021_raster in classification2021_rasters:
for classification2021_raster in classification2021_rasters:
    print('processing ',classification2021_raster)
    outname_postprocessed=classification2021_raster.replace('prediction','prediction_postprocessed')
    if os.path.exists(outname_postprocessed):
        print('tile processed, skipping...')
        continue
        
    # clip raster to tile extent
    tile_shp='Results/Mozambique_tile_extent.shp' # output tile extent
    if os.path.exists(tile_shp):
        os.remove(tile_shp)
    google_building_clipped=google_building_raster[:-4]+'_clipped.tif' # clipped google bulding mask layer
    hand_raster_clipped=hand_raster[:-4]+'_clipped.tif' # clipped hand layer
    wsf2019_raster_clipped=wsf2019_raster[:-4]+'_clipped.tif' # clipped WSF 2019 layer
    subprocess.run(['gdaltindex',tile_shp,classification2021_raster])
    subprocess.run(['gdalwarp','-cutline',tile_shp,'-crop_to_cutline', '-t_srs',output_crs,'-tr','10','10',
                    '-r','near',google_building_raster,google_building_clipped,'-overwrite','-ot','Byte'])
    subprocess.run(['gdalwarp','-cutline',tile_shp,'-crop_to_cutline','-t_srs',output_crs,'-tr','10','10',
                    '-r','bilinear',hand_raster,hand_raster_clipped,'-overwrite','-ot','UInt16'])
    subprocess.run(['gdalwarp','-cutline',tile_shp,'-crop_to_cutline', '-t_srs',output_crs,'-tr','10','10',
                    '-r','near',wsf2019_raster,wsf2019_raster_clipped,'-overwrite','-ot','Byte'])
    
    # read in tiled raster
    landcover2021=rioxarray.open_rasterio(classification2021_raster).astype(np.uint8).squeeze()
    # get geobox
    ds_geobox=landcover2021.geobox
    
    # load s2 annual geomedian and calcualte MNDWI
    bbox=ds_geobox.extent.boundingbox
    dc = datacube.Datacube(app='s2_geomedian')
    query_geomedian= {
        'time': ('2021'),
        'x': (bbox[0],bbox[2]),
        'y': (bbox[1],bbox[3]),
        'resolution':(-10, 10),
        'crs':output_crs,
        'output_crs': output_crs,
        'measurements':['green','swir_1']
    }
    ds_geomedian = dc.load(product="gm_s2_annual", **query_geomedian)
    ds_MNDWI = calculate_indices(ds=ds_geomedian, index='MNDWI', satellite_mission='s2',drop=True).squeeze()
    
    # load DE Africa crop mask 2019
    dc = datacube.Datacube(app='cropland_extent')
    query = {
        'time': ('2019'),
        'x': (bbox[0],bbox[2]),
        'y': (bbox[1],bbox[3]),
        'resolution':(-10, 10),
        'crs':output_crs,
        'output_crs': output_crs,
    }
    # now load the crop-mask using the query
    cm = dc.load(product='crop_mask',**query).squeeze()
    np_crop_mask=cm['mask'].to_numpy()
    
    # data array to numpy array
    np_landcover2021=landcover2021.squeeze().to_numpy()
    
    # initialise post-processed numpy array
    np_landcover2021_post=np_landcover2021.copy() 
    
    # mode filtering for a smoother classification map
    np_landcover2021_post=modal(np_landcover2021,footprint=disk(2),mask=np_landcover2021!=0)
    
#     # assign Field crops pixels outside DE Africa 2019 cropland mask as Grassland
#     np_landcover2021_post[(np_landcover2021_post==dict_map['Field crops'])&(np_crop_mask!=1)]=dict_map['Grassland']
    
    # assgin Grassland pixels inside DE Africa 2019 cropland mask as tree crops
    np_landcover2021_post[(np_landcover2021_post==dict_map['Grassland'])&(np_crop_mask==1)]=dict_map['Tree crops']
    
    # assign bare soil pixels inside DE Africa 2019 cropland mask as Field crops
    np_landcover2021_post[(np_landcover2021_post==dict_map['Bare soils'])&(np_crop_mask==1)]=dict_map['Field crops']
    
    # assign Wetland pixels inside DE Africa 2019 cropland mask as Field crops
    np_landcover2021_post[(np_landcover2021_post==dict_map['Wetland'])&(np_crop_mask==1)]=dict_map['Field crops']
    
    # reassign forest classes smaller than 1 hectare to surrounding class
    forest_mask=np.full(np_landcover2021_post.shape,True)
    forest_mask[(np_landcover2021_post==dict_map['Broadleaved (Semi-) evergreen forest'])
                |(np_landcover2021_post==dict_map['Broadleaved (Semi-) deciduous forest'])
                |(np_landcover2021_post==dict_map['Forest plantations'])]=0 # identify all forest pixels
    forest_mask_filled=remove_small_holes(forest_mask, area_threshold=100, connectivity=2) # fill holes smaller than 100 pixels
    forest_mask=(forest_mask!=forest_mask_filled) # identify the filled small regions
    lc_copy=np_landcover2021_post.copy()
    lc_copy[forest_mask==1]=0 # assign the regions as background
    lc_copy_filled=expand_labels(lc_copy,distance=10000) # expand surrounding classes
    mask=(lc_copy_filled!=lc_copy) # identify filled/changed areas
    np_landcover2021_post[mask]=lc_copy_filled[mask] # copy the filled/changed pixels
    
    # Make sure water is (only occuring at bottom of watersheds) or fallen within OSM river networks
    # assign water pixels outside these areas as surrounding class
    hand=xr.open_dataset(hand_raster_clipped,engine="rasterio").squeeze() # import hand layer
    hand=xr_reproject(hand, ds_geobox, resampling="average")
    np_hand=hand.to_array().squeeze().to_numpy()
    del hand
    np_river_network_mask=xr_rasterize(gdf=river_network,da=landcover2021.squeeze(),
                                    transform=landcover2021.geobox.transform,crs=output_crs) # rasterise OSM river network layer
    np_river_network_mask=xr_reproject(np_river_network_mask, ds_geobox, resampling="nearest")
    np_river_network_mask=np_river_network_mask.squeeze().to_numpy() # data array to numpy array
    temp=np_landcover2021_post.copy()
    temp[(np_landcover2021_post==dict_map['Water body'])&(np_hand>45)&(np_river_network_mask!=1)]=0
    temp_closed=expand_labels(temp,distance=10000)
    mask=(temp!=temp_closed)
    np_landcover2021_post[mask]=temp_closed[mask]
    
    # assign pixels overlapping OSM river network as Water Body
    np_landcover2021_post[np_river_network_mask==1]=dict_map['Water body']
    
    # assign Wetland pixels outside DE Africa 2019 cropland mask and hand>90m as surrounding classes
    temp=np_landcover2021_post.copy()
    temp[(np_landcover2021_post==dict_map['Wetland'])&(np_hand>90)&(np_crop_mask!=1)]=0
    temp_closed=expand_labels(temp,distance=10000)
    mask=(temp!=temp_closed)
    np_landcover2021_post[mask]=temp_closed[mask]
    
    # assign pixels overlapping google building polygons or WSF 2019 as built-up
    google_buildings=xr.open_dataset(google_building_clipped,engine="rasterio").astype(np.int8).squeeze() # import google bulding layer
    google_buildings=xr_reproject(google_buildings, ds_geobox, resampling="nearest")
    np_google_buildings=google_buildings.to_array().squeeze().to_numpy() # data array to numpy array
    del google_buildings
    np_wsf2019=xr.open_dataset(wsf2019_raster_clipped,engine="rasterio").astype(np.int32).squeeze() # import WSF2019 layers
    np_wsf2019=xr_reproject(np_wsf2019, ds_geobox, resampling="nearest") # load and clip WSF layers
    np_wsf2019=np_wsf2019.to_array().squeeze().to_numpy()
    np_landcover2021_post[(np_google_buildings==1)|(np_wsf2019==255)]=dict_map['Settlements'] # apply rules
    
    # assign pixesl overlapping buffered OSM road network as built-up class
    np_road_network_mask=xr_rasterize(gdf=road_network,da=landcover2021.squeeze(),
                                   transform=ds_geobox.transform,crs=output_crs) # # rasterise buffered OSM road network layer
    np_road_network_mask=np_road_network_mask.squeeze().to_numpy() # data array to numpy array
    np_landcover2021_post[np_road_network_mask==1]=dict_map['Settlements'] # burn in buffered OSM road network polygons
    
#     # reclassify wetlands around (within 50m of) built-up areas as tree crops
#     urban_buffered=binary_dilation(np_landcover2021_post==51,footprint=disk(5)) # dilating built-up regions
#     np_landcover2021_post[(urban_buffered==1)&(np_landcover2021_post==41)]=11 # apply rule
    
    # reassign water using NDWI calculated from annual S2 geomedian
    np_MNDWI=ds_MNDWI['MNDWI'].to_numpy()
    np_landcover2021_post[np_MNDWI>=0]=dict_map['Water body']
    
    # reassign mangroves outside 50km of coastline as Forest Plantation
    np_shorelines_2021_mask=xr_rasterize(gdf=shorelines_gdf_2021,da=landcover2021.squeeze(),
                                    transform=ds_geobox.transform,crs=output_crs) # rasterise layer
    np_shorelines_2021_mask=np_shorelines_2021_mask.squeeze().to_numpy()
    np_landcover2021_post[(np_shorelines_2021_mask==0)&(np_landcover2021_post==dict_map['Mangrove'])]=dict_map['Forest plantations']
    
    # convert back result back to DataArray
    landcover2021_post=xr.DataArray(data=np_landcover2021_post,dims=['y','x'],coords={'y':landcover2021.y.to_numpy(), 'x':landcover2021.x.to_numpy()})
    landcover2021_post.rio.write_crs(output_crs, inplace=True)
    
    # export as geotiff
    write_cog(landcover2021_post, outname_postprocessed, overwrite=True)