In [9]:
import os

import fiona
import geopandas as gpd
import numpy as np
import pandas as pd
import rasterio
from rasterio.features import geometry_mask, rasterize
from rasterio.transform import from_bounds
from shapely.geometry import mapping

In [10]:
labels_dir = "../data/labels"
masks_dir = "../data/masks"
metadata_file = "../data/metadata.csv"
df_meta = pd.read_csv(metadata_file)
df_meta.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 174 entries, 0 to 173
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   filename  174 non-null    object 
 1   left      174 non-null    float64
 2   right     174 non-null    float64
 3   bottom    174 non-null    float64
 4   top       174 non-null    float64
 5   width     174 non-null    int64  
 6   height    174 non-null    int64  
dtypes: float64(4), int64(2), object(1)
memory usage: 9.6+ KB


In [11]:
# add new filename map column to find metadata for each image
df_meta["filename_map"] = df_meta["filename"].str.replace("_ortho.tif", "")
df_meta.head()

Unnamed: 0,filename,left,right,bottom,top,width,height,filename_map
0,uavforsat_2017_CFB017_ortho.tif,8.220366,8.222638,47.812257,47.814575,28922,29500,uavforsat_2017_CFB017
1,uavforsat_2017_CFB129_ortho.tif,7.977607,7.980319,48.012641,48.014601,22612,16339,uavforsat_2017_CFB129
2,uavforsat_2020_CFB181_ortho.tif,8.16446,8.167462,48.163655,48.165445,28367,16914,uavforsat_2020_CFB181
3,uavforsat_2020_CFB132_ortho.tif,8.366478,8.36945,48.14807,48.150446,33871,27088,uavforsat_2020_CFB132
4,uavforsat_2019_CFB028_ortho.tif,8.010098,8.01328,47.914517,47.917256,20292,17465,uavforsat_2019_CFB028


In [12]:
# assert there is metadata for all gpkg files in given directory
for filename in os.listdir(labels_dir):
    assert filename.endswith(".gpkg")
    assert (
        filename.replace("_ortho_polygons.gpkg", "") in df_meta["filename_map"].values
    )

In [13]:
# iterate over all gpkg files in given directory
for filename in os.listdir(labels_dir):
    if filename.endswith(".gpkg"):
        filepath = os.path.join(labels_dir, filename)

        # Get metadata for current gpkg file
        filename_map = filename.replace("_ortho_polygons.gpkg", "")
        file_meta = df_meta.loc[df_meta["filename_map"] == filename_map].to_dict(
            "records"
        )[0]
        resolution = file_meta["width"] / abs(file_meta["right"] - file_meta["left"])

        # crop the image based on the extend of the aoi layer
        gdf_aoi = gpd.read_file(filepath, layer="aoi")
        aoi_bounds = gdf_aoi.total_bounds
        out_image_height = int(abs(aoi_bounds[1] - aoi_bounds[3]) * resolution)
        out_image_width = int(abs(aoi_bounds[0] - aoi_bounds[2]) * resolution)

        out_image = np.zeros((out_image_height, out_image_width), dtype=np.uint8)
        transform = from_bounds(
            *aoi_bounds, width=out_image_width, height=out_image_height
        )

        # Read in gpkg file and determine if standing deadwood is present
        layers = fiona.listlayers(filepath)
        if "standing_deadwood" in layers:
            gdf_label = gpd.read_file(filepath, layer="standing_deadwood")
            # Rasterize polygons
            for _, row in gdf_label.iterrows():
                geom = mapping(row["geometry"])
                mask = geometry_mask(
                    [geom], transform=transform, invert=True, out_shape=out_image.shape
                )
                out_image[mask] = 1

        # Save image
        with rasterio.open(
            os.path.join(masks_dir, filename_map + "_ortho_mask.tif"),
            "w",
            driver="GTiff",
            height=out_image.shape[0],
            width=out_image.shape[1],
            count=1,
            dtype="uint8",
            crs="EPSG:4326",
            transform=transform,
        ) as dst:
            dst.write(out_image, 1)