In [1]:
#Standard libraries
import os
import time
import sys
#Third party libraries
import rasterio
import rasterio.plot
import rasterio.mask
import rasterio.io
import matplotlib.pyplot as plt
import numpy as np
np.set_printoptions(legacy='1.25')
import geopandas
import pandas
import fiona
from tqdm.notebook import tqdm
from shapely.geometry import Polygon
#Local applications
package_dir = os.path.dirname(os.getcwd())
if package_dir not in sys.path:
    sys.path.insert(0, package_dir)
from hpr_detection_toolkit import utils
from hpr_detection_toolkit.line_detection import LineSegmentDetector
from hpr_detection_toolkit.hpr_detection import HprDitchDetector

# Setting up
First we define the coordinate reference system in which we want to work in. 

In [2]:
target_crs = "EPSG:31370"

Next, we get point our program to the necessary data. Later, we load the data using the `utils` module of this package, which will check if the crs of the data sets and performs a reprojection on the fly if needed.

In this example, we will use the vector data of the biological value map (BWK) and the agricultural usages plots (Lgp or Lbgebrperc), and the raster data of the VITO AI map regarding microrelief.

> **Adjust the filepath for your own local set-up!**

In [13]:
bwk_filename = 'E:/Stage INBO/Data/BWK_2_20231107_GML/GML/BwkHab.gml'  # change this file path!
lgp_filename = 'E:/Stage INBO/Data/Landbouwaangifte 2016-2020/Lbgb2016_2020.gdb'  # change this file path!
VITO_dirname = 'E:/Stage INBO/Data/VITO_microrelief'  # change this file path!

map_sheet_numbers = range(19,22)  # inspect KB19, KB20 and KB21
map_sheet_numbers = [20]  # inspect only KB20

output_dirname = 'DetectionMicrorelief'
if not os.path.isdir(output_dirname): os.makedirs(output_dirname)

# Masking the search region in the BWK
Not all grasslands should be inspected. The once that are already categorised as valuable don't need to be processed. To evaluate this, we use the BWK, making a filter out of it to select the right grasslands to perform the analyses on. 

1. We should only analyse grasslands that are not a habitat (HAB1 contains `gh` or `rbb`)
2. Grasslands that are already categorised as `hpr`of `hpr+` in EENH1, don't need to be analysed

To easily work with the search and nosearch region, we unify all the polygons into one geometry and store it as a geopandas.DataFrame. This step may already been preprocessed in the past. Therefore we check if the file already exist, and if not we calculate it and store it for future analyses.

In [4]:
preprocessing_filename = 'data/DetectionMicrorelief_preprocessing.gpkg'
preprocessed_layers = {}

layers_to_load = ['search_region', 'nosearch_region']
if os.path.isfile(preprocessing_filename):
    print(f'Preprocessed file detected.')
    print(f'Checking layers in {preprocessing_filename}')
    for layer_name in layers_to_load:
        if layer_name in fiona.listlayers(preprocessing_filename):
            preprocessed_layers[layer_name] = geopandas.read_file(preprocessing_filename, layer=layer_name)
            print(f'Layer {layer_name} found in {preprocessing_filename} and loaded in')

for layer_name in layers_to_load:
    if layer_name not in preprocessed_layers.keys():
        print(f'Layer {layer_name} not found in {preprocessing_filename}')
        if layer_name == 'nosearch_region':
            nosearch_region = bwk[~mask].geometry.union_all()
            preprocessed_layer['nosearch_region'] = geopandas.GeoDataFrame(geometry=[nosearch_region], crs=target_crs)
            preprocessed_layer['nosearch_region'].to_file(preprocessing_filename, driver='GPKG', mode='a', layer='nosearch_region')
            print(f'Layer {layer_name} calculated and saved to {preprocessing_filename}')
        if layer_name == 'search_region':
            search_region = bwk[mask].geometry.union_all()
            preprocessed_layer['search_region'] = geopandas.GeoDataFrame(geometry=[search_region], crs=target_crs)
            preprocessed_layer['search_region'].to_file(preprocessing_filename, driver='GPKG', mode='a', layer='search_region')
            print(f'Layer {layer_name} calculated and saved to {preprocessing_filename}')

Preprocessed file detected.
Checking layers in data/DetectionMicrorelief_preprocessing.gpkg
Layer search_region found in data/DetectionMicrorelief_preprocessing.gpkg and loaded in
Layer nosearch_region found in data/DetectionMicrorelief_preprocessing.gpkg and loaded in


# Analysing the permanent grasslands
The categorisation of the grassland is based on the presence of ditches in the landplot. When a buffer zone around the ditches of 15 m covers 70% of the landplot's area, the grassland can be categorised as HPR. Therefore, we will try to detect the ditches and calculate the buffer fraction, which we store as a new attribute for each landplot.

The landplots that are grassland can be obtained from the agriculture usage. We look only at permanent grassland, meaning the landplot has always been categorised as grassland during a certain time period (here 2016 to 2023)

In [5]:
lgp = utils.open_vector_data(lgp_filename, layer='Lbgebrperc2016_2023', target_crs=target_crs)
grasslands = lgp[lgp['lgp_7j_BWK'] == 'Permanent grasland - hp']

Vector data in E:/Stage INBO/Data/Landbouwaangifte 2016-2020/Lbgb2016_2020.gdb in EPSG:31370, no reprojection.


Next we analyse the permanent grasslands. Here we loop over all the different map sheets of the VITO map that we want to inspect. Because of this loop the process cannot be broken up to explain the different steps. For a more detailed explanation, look into the notebook that only handles map sheet 20 (KB20).

In [6]:
for KBnr in map_sheet_numbers:
    title_string = f'##### Analysing KB{KBnr} #####'
    print('-' * len(title_string))
    print(title_string)
    print('-' * len(title_string))

    
    output_filename = output_dirname + f'/KB{KBnr}.gpkg'
    
    
    VITO_map = utils.open_raster_data(VITO_dirname + f'/KB{KBnr}_predicted_logits.tif', target_crs=target_crs)
    left, bottom, right, top = VITO_map.bounds
    KB_boarder = Polygon([(left, bottom), (left, top), (right, top), (right, bottom)])
    grasslands_KB = grasslands[grasslands.within(KB_boarder)]
    print(f'KB{KBnr} contains {len(lgp[lgp.within(KB_boarder)])} landplots,' +
          f' of which {len(grasslands_KB)} ({len(grasslands_KB)/len(lgp[lgp.within(KB_boarder)])*100:.0f}%) are permanent grasslands')

    
    print(f'Evaluating which grasslands to process')
    grasslands_to_inspect = None
    if os.path.isfile(output_filename):
        print(f"> Output file '{output_filename}' detected.")
        print(f"> Checking if file contains selection of grasslands to inspect.")
        if 'grassland_selection' in fiona.listlayers(output_filename):
            grasslands_to_inspect = geopandas.read_file(output_filename, layer='grassland_selection')
            print(f'> Grassland selection found in and loaded in')
    if grasslands_to_inspect is None:
        mask_grasslands = np.zeros(len(grasslands), dtype=bool)
        for i in tqdm(range(len(grasslands_KB)), total=len(grasslands_KB), desc="Selecting grasslands to analyse"):
            intersection = grasslands_KB.iloc[i]['geometry'].intersection(preprocessed_layers['search_region'].loc[0,'geometry'])
            overlap_fraction = intersection.area / grasslands_KB.iloc[i]['geometry'].area
            if overlap_fraction > .75:
                mask_grasslands[i] = True
        grasslands_to_inspect = grasslands_KB[mask_grasslands]
        grasslands_to_inspect.to_file(output_filename, driver='GPKG', mode='a', layer='grassland_selection')
        print(f"> Grasslands selected and stored to layer 'grassland_selection' in {processed_filename}.")    
    print(f'KB{KBnr} contains {len(grasslands_KB)} permanent grasslands, of which {len(grasslands_to_inspect)}' + 
          f' ({len(grasslands_to_inspect)/len(grasslands_KB)*100:.0f}%) need to be analysed.')

    
    print('Initialising the hpr ditch detector')
    grasslands_to_inspect['ditch_buffer_fraction'] = np.nan
    lsd = LineSegmentDetector()
    user_config = {'buffer_zone': {
                        'distance': 15.},
                   'filter_background': {
                        'threshold_value': .5*255}
                  }
    hpr_detector = HprDitchDetector(VITO_map, lsd, config=user_config)

    
    ditches = [None] *len(grasslands_to_inspect)
    buffer_zones = [None] *len(grasslands_to_inspect)
    for i in tqdm(range(len(grasslands_to_inspect)), desc="Processeing selected grasslands"):
        index = grasslands_to_inspect.index[i]
        landplot = grasslands_to_inspect.iloc[i:i+1]
        hpr_detector.process(landplot)
        ditches[i] = hpr_detector.get_ditches(multilinestring=True).geometry.loc[0]
        buffer_zones[i] = hpr_detector.get_buffer_zone().geometry
        if buffer_zones[i] is not None: buffer_zones[i] = buffer_zones[i].loc[0]
        grasslands_to_inspect.loc[index,'ditch_buffer_fraction'] = hpr_detector.get_hpr_fraction()  
    ditches = geopandas.GeoDataFrame(geometry=ditches, crs=target_crs)
    buffer_zones = geopandas.GeoDataFrame(geometry=buffer_zones, crs=target_crs)
    print(f'Of the {len(grasslands_to_inspect)} inspected grasslands in KB{KBnr}, ' + 
      f'{len(grasslands_to_inspect[grasslands_to_inspect.ditch_buffer_fraction > .65])} ' + 
      f'of them are hpr candidates based on ditch detection during this processing')

    print('Saving the processed grasslands, ditches and buffer zones')
    grasslands_to_inspect.to_file(output_filename, driver='GPKG', layer='grasslands_processed')
    ditches.to_file(output_filename, driver='GPKG', layer='ditches')
    buffer_zones.to_file(output_filename, driver='GPKG', layer='buffer_zones')
    

    plot_directory = output_dirname + '/plot')
    if not os.path.isdir(plot_directory): os.makedirs(plot_directory)
    for i in tqdm(range(len(grasslands_to_inspect)), desc="Plotting newly detected hpr grasslands"):
        grassland = grasslands_to_inspect.iloc[i]
        
        if grassland.ditch_buffer_fraction > .65:          
            clipped_image = utils.clip_raster(VITO_map, grassland.geometry.geoms)
        
            fig, ax = plt.subplots()
            # Display the raster image
            rasterio.plot.show(clipped_image, ax=ax, cmap='gray_r')
            # Plot the detected lines
            grasslands_to_inspect[i:i+1].plot(ax=ax, edgecolor='green', lw=1, label='Perceel', facecolor='none')
            buffer_zones.iloc[i:i+1].plot(ax=ax, color='orange', alpha=0.5, label='30m Buffer')
            ditches.iloc[i:i+1].plot(ax=ax, color='purple', linewidth=2, label='Grachtjes')
            
            fig.suptitle("Detected Lines on Georeferenced Image")
            ax.set_title(f"Ditches detected with a buffer zone fraction of {grassland.ditch_buffer_fraction}")
            ax.set_xlabel("Longitude")
            ax.set_ylabel("Latitude")
        
            fig.savefig(plot_directory + f'/KB20_grassland-{grassland.OBJECTID}.png')
            plt.close(fig)

    print('-' * len(title_string))

--------------------------
##### Analysing KB20 #####
--------------------------
Raster data in E:/Stage INBO/Data/VITO_microrelief/KB20_predicted_logits.tif in EPSG:31370, no reprojection.
KB20 contains 44999 landplots, of which 10882 (24%) are permanent grasslands
Evaluating which grasslands to process


Selecting grasslands to analyse:   0%|          | 0/10882 [00:00<?, ?it/s]

DataSourceError: sqlite3_open(./DetectionMicrorelief\KB20.gpkg) failed: unable to open database file