# **Semantic Segmentation of Aerial Imagery for Building Footprint Extraction Using U-Net: Part 1**

## Install required packages
Before starting the building footprint extraction workflow using U-Net, we need to install a few essential Python libraries that are not included by default in Google Colab. These installations are done using the pip package manager, and the --quiet flag suppresses verbose output to keep the notebook clean.

In [None]:
# Install required packages
!pip install torchgeo --quiet
!pip install buildingregulariser --quiet
!pip install torchsummary --quiet

## Import required libraries

In [None]:
# Import libraries
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchsummary import summary
import matplotlib.pyplot as plt
import seaborn as sns
import rasterio
from rasterio import features
import shapely.geometry as shp_geom
from buildingregulariser import regularize_geodataframe
from sklearn.metrics import f1_score, jaccard_score
import scipy.ndimage as nd

## Mount Google Drive and set the working directory

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive/')
os.chdir('/content/drive/MyDrive/Chit_TR9207')

## Load the raster image and rasterize building footprints

In [None]:
# Load raster and geojson

# Define the path to the aerial image (GeoTIFF file)
img_path = 'TR9207a.tif'

# Open the raster image using rasterio
with rasterio.open(img_path) as src:
    transform = src.transform               # Get the affine transformation (pixel-to-coordinate mapping)
    height, width = src.height, src.width   # Extract image dimensions (rows and columns)
    raster_crs = src.crs                    # Get the coordinate reference system (CRS) of the raster
    image = src.read()                      # Read the raster data as a NumPy array (shape: [bands, height, width])

# Read building footprint polygons from the GeoJSON file into a GeoDataFrame
gdf = gpd.read_file('Bld_DSG_TR9207.geojson')

# Rasterize the vector geometries into a binary mask
# Each building polygon is assigned a value of 1, and the background is filled with 0
# Rasterize the vector geometries into a binary mask
# Each building polygon is assigned a value of 1, and the background is filled with 0
mask = features.rasterize(
    ((geom, 1) for geom in gdf.geometry),   # Generator of (geometry, value) pairs
    out_shape=(height, width),             # Output mask will match the raster's dimensions
    transform=transform,                   # Use the same affine transform as the raster
    fill=0,                                # Set background (non-building) pixels to 0
    dtype=np.uint8                         # Use 8-bit unsigned integer type for the mask
)

## Define the custom dataset for patch extraction

In [None]:
# Define a custom PyTorch dataset for aerial image building segmentation
class AerialBuildingDataset(torch.utils.data.Dataset):
    def __init__(self, image, mask):
        self.image = image                  # Multi-band aerial image (NumPy array of shape: [bands, height, width])
        self.mask = mask                    # Corresponding binary mask (NumPy array of shape: [height, width])
        self.patch_size = 256               # Define the patch size (e.g., 256x256 pixels)
        self.bands, self.height, self.width = image.shape  # Extract dimensions from the image

        # Generate top-left coordinates for each non-overlapping patch in the image
        self.patch_coords = [
            (i, j)
            for i in range(0, self.height - self.patch_size + 1, self.patch_size)
            for j in range(0, self.width - self.patch_size + 1, self.patch_size)
        ]

    def __len__(self):
        # Return the total number of patches available
        return len(self.patch_coords)

    def __getitem__(self, idx):
        # Get top-left corner (i, j) of the patch based on index
        i, j = self.patch_coords[idx]

        # Extract image patch and corresponding mask patch
        img_patch = self.image[:, i:i+self.patch_size, j:j+self.patch_size]   # Shape: [bands, patch_size, patch_size]
        mask_patch = self.mask[i:i+self.patch_size, j:j+self.patch_size]      # Shape: [patch_size, patch_size]

        # Convert both to PyTorch tensors
        img_patch = torch.tensor(img_patch, dtype=torch.float32)              # Float tensor for image
        mask_patch = torch.tensor(mask_patch, dtype=torch.long)              # Long tensor for mask (for classification)

        return img_patch, mask_patch

## Create the dataset and data loaders

In [None]:
# Create an instance of the custom dataset using the full image and mask
dataset = AerialBuildingDataset(image, mask)

# Define the training set size as 80% of the total dataset
train_size = int(0.8 * len(dataset))

# Define the test set size as the remaining 20%
test_size = len(dataset) - train_size

# Randomly split the dataset into training and testing subsets
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create a DataLoader for the training set
# - batch_size=4: feed 4 patches at a time
# - shuffle=True: randomly shuffle data each epoch to improve generalization
# - drop_last=True: drop the last batch if it's smaller than the batch size
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, drop_last=True)

# Create a DataLoader for the test set
# - shuffle=False: maintain original order for evaluation consistency
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)