# Load packages and set wd

In [879]:
import geopandas as gpd
import os
import pandas as pd
import rasterio
import rasterio.features
from rasterio.features import geometry_mask
from rasterio.plot import show
from rasterio.windows import Window
import ast
import matplotlib.pyplot as plt
import numpy as np
from numpy import random
from PIL import Image
from shapely import box
import glob

In [880]:
# Set wd
os.chdir('/Users/benediktkorbach/Documents/GitHub/remote-sensing-of-parking-areas')

print("Working directory:", os.getcwd())

Working directory: /Users/benediktkorbach/Documents/GitHub/remote-sensing-of-parking-areas


# Import parking area data

In [881]:
# Load the GeoJSON files of verified rest stops
parking_areas_ver = gpd.read_file("02_data_acquisition/verified_parking_data/parking_areas_ver_final.geojson") # polygons of verified car and truck parking space
rest_stations_ver = gpd.read_file("02_data_acquisition/verified_parking_data/rest_stations_ver_final.geojson") # polygons of verified rest stops
rest_stations_ver_bbox = pd.read_csv("02_data_acquisition/verified_parking_data/rest_stations_ver_bbox_512_final.csv") # bounding boxes of verified rest stops

# Load the GeoJSON files of wrongly annotated rest stops
parking_areas_wrong = gpd.read_file("02_data_acquisition/verified_parking_data/parking_areas_wrong.geojson") # polygons of wrong car and truck parking space
rest_stations_wrong = gpd.read_file("02_data_acquisition/verified_parking_data/rest_stations_wrong.geojson") # polygons of verified rest stops

# Change bbox to list
rest_stations_ver_bbox['bbox'] = rest_stations_ver_bbox['bbox'].map(ast.literal_eval)

In [882]:
# Check dimensions of the dataframes
print("parking_areas_ver:", parking_areas_ver.shape)
print("rest_stations_ver:", rest_stations_ver.shape)
print("rest_stations_ver_bbox:", rest_stations_ver_bbox.shape)
print("parking_areas_wrong:", parking_areas_wrong.shape)
print("rest_stations_wrong:", rest_stations_wrong.shape)

parking_areas_ver: (1184, 4)
rest_stations_ver: (265, 4)
rest_stations_ver_bbox: (265, 4)
parking_areas_wrong: (416, 4)
rest_stations_wrong: (239, 4)


# Show poylgons on images

In [883]:
def plot_image_with_masks(id, output_path=None, attribution_text=None):
    """
    Plot the satellite image with the selected (green), other verified (yellow) and other wrongly annotated (red) rest stations and parking areas by id.

    Parameters:
    id (str): The rest_id of the selected rest station
    output_path (path): The path to save the plot
    attribution_text (str): The text to display at the bottom right corner of the plot
    """

    # Get the rest station, name, image path and the bounding box of the selected rest station
    selected_rest_station = rest_stations_ver[rest_stations_ver["id_rest"] == id]
    selected_rest_station_name = rest_stations_ver.loc[rest_stations_ver["id_rest"] == id, "name"].values[0]
    image_path = os.path.join("02_data_acquisition/tif_download", f"{id}_{selected_rest_station_name}.tif")
    image_bbox = rest_stations_ver_bbox.loc[rest_stations_ver_bbox["id_rest"] == id, "bbox"].values[0]

    # Load the satellite image
    with rasterio.open(image_path) as src:
        # Calculate the figure size to maintain the image resolution
        dpi = 600 
        fig_width = src.width / dpi
        fig_height = src.height / dpi
        
        # Create new figure
        plt.ioff()  # Turn off interactive mode to avoid backend GUI elements
        fig, ax = plt.subplots(figsize=(fig_width, fig_height), dpi=dpi)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0) 
        
        # Show the satellite image
        rasterio.plot.show(src, ax=ax)

        # Get the bounding box and create a shapely geometry
        bbox = box(*image_bbox)

        # Find intersecting correctly labbelled service stations
        intersecting_rest_stations_correct = rest_stations_ver[rest_stations_ver.intersects(bbox)]

        # Find intersecting wrongly annotated service stations
        intersecting_rest_stations_wrong = rest_stations_wrong[rest_stations_wrong.intersects(bbox)]

        # Get the parking areas for the verified and correctly annotated service stations
        parking_correct = parking_areas_ver[parking_areas_ver["id_rest"].isin(intersecting_rest_stations_correct["id_rest"])]
        car_parking_correct = parking_correct[parking_correct["type"] == "car"]
        truck_parking_correct = parking_correct[parking_correct["type"] == "truck"]

        # Plot correct polygons (service station, car parking, truck parking)
        if not intersecting_rest_stations_correct.empty:
            intersecting_rest_stations_correct.plot(ax=ax, facecolor="yellow", alpha=0.2, edgecolor="blue", label="Rest Station Correct")
            if not car_parking_correct.empty:
                car_parking_correct.plot(ax=ax, facecolor="none", edgecolor="red", label="Car Parking")
            if not truck_parking_correct.empty:
                truck_parking_correct.plot(ax=ax, facecolor="none", edgecolor="green", label="Truck Parking")

        # Get the parking areas for the selected rest_station
        parking_selected = parking_areas_ver[parking_areas_ver["id_rest"] == id]
        car_parking_selected = parking_selected[parking_selected["type"] == "car"]
        truck_parking_selected = parking_selected[parking_selected["type"] == "truck"]

        # Plot polygons (service station, car parking, truck parking) for selected rest_station
        if not selected_rest_station.empty:
            selected_rest_station.plot(ax=ax, facecolor="green", alpha=0.2, edgecolor="blue", label="Selected Rest Station")
        if not car_parking_selected.empty:
            car_parking_selected.plot(ax=ax, facecolor="none", edgecolor="red", label="Car Parking")
        if not truck_parking_selected.empty:
            truck_parking_selected.plot(ax=ax, facecolor="none", edgecolor="green", label="Truck Parking")

        # Plot wrongly annotated service station polygons
        if not intersecting_rest_stations_wrong.intersects(bbox).empty:
            intersecting_rest_stations_wrong.plot(ax=ax, facecolor="red", alpha=0.2, edgecolor="blue", label="Rest Stations Wrong")
        
        # Add attribution text at the bottom right corner
        if attribution_text is not None:
            plt.text(x=1, y=0, s=attribution_text,
                    horizontalalignment='right',
                    verticalalignment='bottom',
                    transform=ax.transAxes,
                    color="white",  
                    fontsize=7,  
                    bbox=dict(facecolor='black', alpha=0.5, edgecolor='none', boxstyle='round,pad=0.1'))

        # Set legend and axis off for clarity
        ax.set_axis_off()

        # Save the figure
        if output_path is not None:
            plt.savefig(output_path, bbox_inches="tight", pad_inches=0)

        plt.show()

In [885]:
## Test the function

# Define test index
test_index = 19

# Get the rest_id, bbox and name of the rest station
test_id = rest_stations_ver.loc[test_index, "id_rest"]
test_bbox = rest_stations_ver_bbox.loc[test_index, "bbox"]
test_name = rest_stations_ver.loc[test_index, "name"]

print("Test ID:", test_id)
print("Test bbox:", test_bbox)
print("Test name:", test_name)

Test ID: lon_10.0214344_lat_54.2446698
Test bbox: [10.017521837999134, 54.241986887074795, 10.025191482830632, 54.246478865064965]
Test name: Rumohr


Index 19 shows a service station without polygon -> Wrongly labelled?

In [None]:
# Plot the test case
plot_image_with_masks(test_id)

# Show and download masks

In this section, the a mask is created for every selected service station in rest_stations_verified.

For the creation of training/validation data, the masks should denote parking areas of other, non-selected service stations specifically, so that they can be excluded during tiling. This leads to the following intensity annotations:

- background: 0
- car parking selected service station: 100
- truck parking selected service station: 200
- other service station parking areas: 255

For the creation of test data, the masks should include correctly annotated parking areas of non-selected parking areas. Wrongly annotated, non-selected parking areas should be flagged. This leads to the following intensity annotations:

- background: 0
- car parking selected service station: 100
- car parking at non-selected, correctly annotated service stations: 100
- truck parking selected service station: 200
- truck parking at non-selected, correctly annotated service stations: 200
- wrongly annotated parking areas at non-sekected service stationss: 255

To select between including and excluding correctly annotated non-selected parking areas, choose other_correct = "include/"other_correct = "exclude"

In [888]:
def create_mask_png(id, other_correct = "exclude", output_path=None, attribution_text=None):
    """
    Create a mask of the verified (and of closeby correctly/wrongly) annotated rest stations and parking areas and save it as a PNG file

    Parameters:
    id (str): The rest_id of the rest station
    output_path (path): The path to save the mask
    attribution_text (str): The text to display at the bottom right corner of the mask
    """

    # Get the name, image path and the bounding box of the selected rest station
    rest_station_name = rest_stations_ver.loc[rest_stations_ver["id_rest"] == id, "name"].values[0]
    image_path = os.path.join("02_data_acquisition/tif_download", f"{id}_{rest_station_name}.tif")
    image_bbox = rest_stations_ver_bbox.loc[rest_stations_ver_bbox["id_rest"] == id, "bbox"].values[0]

    print("Downloading image:", image_path, "\n")

    # Load the satellite image
    with rasterio.open(image_path) as src:

        # Get the bounding box and create a shapely geometry
        bbox = box(*image_bbox)

        # Get the parking areas for the selected rest_station
        parking_selected = parking_areas_ver[parking_areas_ver["id_rest"] == id]
        car_parking_selected = parking_selected[parking_selected["type"] == "car"]
        truck_parking_selected = parking_selected[parking_selected["type"] == "truck"]

        # Find intersecting service stations
        intersecting_rest_stations_correct = rest_stations_ver[rest_stations_ver.intersects(bbox)]
        intersecting_rest_stations_wrong = rest_stations_wrong[rest_stations_wrong.intersects(bbox)]

        # Get the parking areas for the verified and correctly annotated service stations
        parking_correct = parking_areas_ver[parking_areas_ver["id_rest"].isin(intersecting_rest_stations_correct["id_rest"])]
        car_parking_correct = parking_correct[parking_correct["type"] == "car"]
        truck_parking_correct = parking_correct[parking_correct["type"] == "truck"]

        # Get the paeking areas for the wrongly annotated service stations
        parking_wrong = parking_areas_wrong[parking_areas_wrong["id_rest"].isin(intersecting_rest_stations_wrong["id_rest"])]
        car_parking_wrong = parking_wrong[parking_wrong["type"] == "car"]
        truck_parking_wrong = parking_wrong[parking_wrong["type"] == "truck"]

        def create_mask(geodataframe, bbox, size = 2560, intensity = 255):
            """
            Create a mask of the geometries in the GeoDataFrame.
            Parameters:
            geodataframe (GeoDataFrame): The GeoDataFrame containing the geometries to mask
            bbox (list): The bounding box of the image
            size (int): The size of the mask
            intensity (int): The intensity value of the mask
            """

            # Create an affine transform for the raster
            transform = rasterio.transform.from_bounds(*bbox, size, size)

            # Create the mask
            geometries = geodataframe.geometry
            mask = geometry_mask(geometries, transform=transform, invert=True, out_shape=(size, size))

            # Adjust the intensity of the mask
            mask = mask.astype(np.uint8) * intensity

            return mask
        
        # Initialize mask with zeros
        mask = np.zeros((2560, 2560))
        
        # Set the intensity of the verified and correctly annotated rest stations and parking areas
        if other_correct == "exclude":
            car_parking_correct_intensity = 255
            truck_parking_correct_intensity = 255
        elif other_correct == "include":
            car_parking_correct_intensity = 100
            truck_parking_correct_intensity = 200
        else:
            raise ValueError("other_correct must be 'exclude' or 'include'")
        
        # Create masks for the verified(and of closeby correctly/wrongly) annotated rest stations and parking areas
        if not car_parking_correct.empty:
            car_parking_correct_mask = create_mask(car_parking_correct, image_bbox, size=2560, intensity=car_parking_correct_intensity)
            mask += car_parking_correct_mask
        if not truck_parking_correct.empty:
            truck_parking_correct_mask = create_mask(truck_parking_correct, image_bbox, size=2560, intensity=truck_parking_correct_intensity)
            mask += truck_parking_correct_mask
        if not car_parking_wrong.empty:
            car_parking_wrong_mask = create_mask(car_parking_wrong, image_bbox, size=2560, intensity=255)
            mask += car_parking_wrong_mask
        if not truck_parking_wrong.empty:
            truck_parking_wrong_mask = create_mask(truck_parking_wrong, image_bbox, size=2560, intensity=255)
            mask += truck_parking_wrong_mask

        # Set the intensity of the selected rest station and parking areas if other_correct is "exclude"
        if other_correct == "exclude":
            if not car_parking_selected.empty:
                car_mask_selected_neutralize_mask = create_mask(car_parking_selected, image_bbox, size=2560, intensity=-car_parking_correct_intensity) # Neutralize the intensity of the selected car parking
                car_mask_selected = create_mask(car_parking_selected, image_bbox, size=2560, intensity=100) # Set the intensity of the selected car parking to 100
                mask += car_mask_selected_neutralize_mask + car_mask_selected
            else:
                print("No car parking found")
            if not truck_parking_selected.empty:
                truck_mask_selected_neutralize_mask = create_mask(truck_parking_selected, image_bbox, size=2560, intensity=-truck_parking_correct_intensity) # Neutralize the intensity of the selected truck parking
                truck_mask_selected = create_mask(truck_parking_selected, image_bbox, size=2560, intensity=200) # Set the intensity of the selected truck parking to 200
                mask += truck_mask_selected_neutralize_mask + truck_mask_selected
            else:
                print("No truck parking found")
        
        # Print all unique intensity values present in the mask
        unique_intensities = np.unique(mask)
        print(f"All intensity values present in the mask before clipping: {unique_intensities}")
        
        # Clip the mask to the range [0, 255] and convert it to a uint8 array
        mask = np.clip(mask, 0, 255).astype(np.uint8)

        # Print all unique intensity values present in the mask
        unique_intensities = np.unique(mask)
        print(f"All intensity values present in the mask after clipping: {unique_intensities}")
        
        # Plot the mask
        plt.imshow(mask, cmap="gray", vmin=0, vmax=255)
        plt.title("Mask of all Geometries")
        plt.show()
        
        # Convert the mask to a PIL image
        mask_image = Image.fromarray(mask)

        # Save the image as a PNG file
        if output_path is not None:
            mask_image.save(output_path)
    
    print("Downloaded image:", image_path)
    print("--------------------------------------------------------------------------", "\n")

In [None]:
## Mass download (including other annotated parking areas, therefore set to 100/200)

# Create masks for all verified rest stations
for row, index in rest_stations_ver.iterrows():

    # Get the rest_id and name
    id = rest_stations_ver.loc[row, "id_rest"]
    name = rest_stations_ver.loc[row, "name"]

    # Create the mask
    output_path = f"02_data_acquisition/mask_download_512_include_other_correct/{id}_{name}.png"
    create_mask_png(id=id, other_correct="include", output_path=output_path) # Other_correct is set to "include" to include other correctly annotated parking areas

In [None]:
## Mass download (excluding other annotated parking areas, therefore set to 255)

# Create masks for all verified rest stations
for row, index in rest_stations_ver.iterrows():

    # Get the rest_id and name
    id = rest_stations_ver.loc[row, "id_rest"]
    name = rest_stations_ver.loc[row, "name"]

    # Create the mask
    output_path = f"02_data_acquisition/mask_download_512_exclude_other_correct/{id}_{name}.png"
    create_mask_png(id=id, other_correct="exclude", output_path=output_path) # Other_correct is set to "exclude" to exclude other correctly annotated parking areas

# Train-Val / Test Split

In this section, the train/validation - test split is performed.

For the test data, whole images/full masks are moved to a seperate folder. The images only include correctly labelled parking spaces of non-selected service stations. If another service station is included somewhere in a test image, it is not selected as another training/testing instance.

For the training data, the remaining images are split into tiles in the next section of this script. Here, tiles that include parking areas of other non-selected service stations are excluded.

## Create Test Set

In [891]:
# Create dataframe of all verified rest stations that do not have any wrongly annotated service stations or parking areas in their corresponding mask

# Create a list of all the mask files
mask_files = os.listdir("02_data_acquisition/mask_download_512_include_other_correct")

# Create a list of all the verified rest stations
verified_rest_stations = rest_stations_ver["id_rest"].tolist()

# Create a list of all the verified rest stations that do not have any wrongly annotated service stations or parking areas in their corresponding mask
verified_rest_stations_no_wrong = []

# Iterate through all the verified rest stations
for row, index in rest_stations_ver.iterrows():

    # Get the rest_id and name
    id = rest_stations_ver.loc[row, "id_rest"]
    name = rest_stations_ver.loc[row, "name"]
    
    # Check if the mask file exists
    if f"{id}_{name}.png" in mask_files:
        # Open the mask file
        mask = Image.open(f"02_data_acquisition/mask_download_512_include_other_correct/{id}_{name}.png")
        # Check if the mask contains any wrong labels
        if 255 not in np.array(mask):
            verified_rest_stations_no_wrong.append(id)

# Create a dataframe of all the verified rest stations that do not have any wrongly annotated service stations or parking areas in their corresponding mask
verified_rest_stations_no_wrong_df = rest_stations_ver[rest_stations_ver["id_rest"].isin(verified_rest_stations_no_wrong)]

In [892]:
# Print the shape of the verified_rest_stations_no_wrong_df
print("Number of service stations without wrongly annotated adjacent service statiosn:", verified_rest_stations_no_wrong_df.shape)

Number of service stations without wrongly annotated adjacent service statiosn: (246, 4)


In total, 265 verified service stations exist. To create the test cases, we only want to select service stations whose corresponding satellite image do not contain other wrongly annotated service stations.

We want to perform a 85/15 split for training/validation and test cases.

We therefore select 40 service stations from the 246 verified service stations, as they do not contain any other wrongly labelled service stations.

In [893]:
# Randomly order verified_rest_stations_no_wrong_df and save as dataframe
verified_rest_stations_no_wrong_random_order_df = verified_rest_stations_no_wrong_df.sample(frac=1, random_state=123)

In [894]:
verified_rest_stations_no_wrong_random_order_df.shape

(246, 4)

In [895]:
# Load the test masks from the mask_download and the test images from the image folder and copy them to the test folders
test_image_counter = 0
test_rest_stations = []

while test_image_counter < 40:
    # Get the first row of the verified_rest_stations_no_wrong_random_order_df
    row = verified_rest_stations_no_wrong_random_order_df.iloc[0]

    # Get the rest_id and name
    id = row["id_rest"]
    name = row["name"]

    # Get the name, image path and the bounding box of the rest station
    image_path = os.path.join("02_data_acquisition/tif_download_512", f"{id}_{name}.tif")
    mask_path = f"02_data_acquisition/mask_download_512_include_other_correct/{id}_{name}.png"
    image_bbox = rest_stations_ver_bbox.loc[rest_stations_ver_bbox["id_rest"] == id, "bbox"].values[0]

    # Load the satellite image
    with rasterio.open(image_path) as src:

        # Get the bounding box
        bbox_pol = box(*image_bbox)

        # Find intersecting service station rest_id
        intersecting_rest_stations_correct = rest_stations_ver[rest_stations_ver.intersects(bbox_pol)]
    
        # Get the number of intersecting service station rest_ids
        intersecting_rest_stations_correct_count = len(intersecting_rest_stations_correct)
        
        # Print the number of intersecting service station rest_ids
        print(f"Number of intersecting correct rest stations: {intersecting_rest_stations_correct_count}") 

        # Increase the test_image_counter by the amount of intersecting service station rest_ids
        test_image_counter += intersecting_rest_stations_correct_count

        # Drop intersecting_rest_stations_correct from the verified_rest_stations_no_wrong_random_order_df via ids
        intersecting_rest_stations_correct_ids = intersecting_rest_stations_correct["id_rest"].tolist()
        verified_rest_stations_no_wrong_random_order_df = verified_rest_stations_no_wrong_random_order_df[~verified_rest_stations_no_wrong_random_order_df["id_rest"].isin(intersecting_rest_stations_correct_ids)]

        # Print ids and names of dropped rest stations
        print(f"Dropped correct rest stations: {intersecting_rest_stations_correct_ids}")
        print(f"Dropped correct rest stations names: {intersecting_rest_stations_correct['name'].tolist()}")

        # Add intersecting_rest_stations_correct_ids to dropped_rest_stations
        test_rest_stations.extend(intersecting_rest_stations_correct_ids)
        
        # Print the test_image_counter
        print(f"Test image counter: {test_image_counter}")
    
    mask_output_path = f"03_create_masks/test_512/masks/{id}_{name}.png"
    os.system(f'cp "{mask_path}" "{mask_output_path}"')

    # print confirmation if mask was copied
    if os.path.exists(mask_output_path):
        print(f"Mask copied: {id}_{name}.png")
    else:
        print(f"Mask not copied: {id}_{name}.png")

    # Copy the image to the test folder
    image_path = f"02_data_acquisition/tif_download_512/{id}_{name}.tif"

    image_output_path = f"03_create_masks/test_512/images/{id}_{name}.tif"
    os.system(f'cp "{image_path}" "{image_output_path}"')

    # print confirmation if image was copied
    if os.path.exists(image_output_path):
        print(f"Image copied: {id}_{name}.tif")
    else:
        print(f"Image not copied: {id}_{name}.tif")

Number of intersecting correct rest stations: 2
Dropped correct rest stations: ['lon_12.0055212_lat_51.5564663', 'lon_12.0026469_lat_51.5585101']
Dropped correct rest stations names: ['Petersberg', 'Petersberg']
Test image counter: 2
Mask copied: lon_12.0055212_lat_51.5564663_Petersberg.png
Image copied: lon_12.0055212_lat_51.5564663_Petersberg.tif
Number of intersecting correct rest stations: 2
Dropped correct rest stations: ['lon_8.4094366_lat_50.0433047', 'lon_8.409903_lat_50.0435443']
Dropped correct rest stations names: ['Johannispfad', 'Oberbach']
Test image counter: 4
Mask copied: lon_8.4094366_lat_50.0433047_Johannispfad.png
Image copied: lon_8.4094366_lat_50.0433047_Johannispfad.tif
Number of intersecting correct rest stations: 2
Dropped correct rest stations: ['lon_12.8828997_lat_52.2006051', 'lon_12.8828606_lat_52.1997595']
Dropped correct rest stations names: ['Borker Heide', 'Zauche']
Test image counter: 6
Mask copied: lon_12.8828606_lat_52.1997595_Zauche.png
Image copied:

In [896]:
verified_rest_stations_no_wrong_random_order_df.shape

(205, 4)

In [897]:
test_rest_stations

['lon_12.0055212_lat_51.5564663',
 'lon_12.0026469_lat_51.5585101',
 'lon_8.4094366_lat_50.0433047',
 'lon_8.409903_lat_50.0435443',
 'lon_12.8828997_lat_52.2006051',
 'lon_12.8828606_lat_52.1997595',
 'lon_12.0740424_lat_53.2849259',
 'lon_12.0732519_lat_53.2843058',
 'lon_11.8452583_lat_50.957723',
 'lon_11.8511459_lat_50.9610243',
 'lon_13.9559485_lat_53.2109455',
 'lon_10.4537487_lat_50.4697772',
 'lon_10.4530068_lat_50.4711034',
 'lon_9.7447582_lat_50.4360417',
 'lon_10.6158177_lat_53.5536922',
 'lon_11.8748503_lat_50.7596459',
 'lon_11.8732122_lat_50.759018',
 'lon_9.9242982_lat_53.8199638',
 'lon_9.9208466_lat_53.8213382',
 'lon_9.4507501_lat_50.3248577',
 'lon_9.4544286_lat_50.3278031',
 'lon_10.8893354_lat_50.4230805',
 'lon_10.8907057_lat_50.4209612',
 'lon_9.2831674_lat_51.3432322',
 'lon_10.5152409_lat_52.0369274',
 'lon_8.2810289_lat_52.8853475',
 'lon_9.6802586_lat_50.6048477',
 'lon_8.6692184_lat_50.618922',
 'lon_8.6698347_lat_50.6184956',
 'lon_9.9061758_lat_51.6070521

In [898]:
len(test_rest_stations)

41

In [899]:
# Create df of test_rest_stations by filtering verified_rest_stations_no_wrong by ids
test_rest_stations_df = rest_stations_ver[rest_stations_ver["id_rest"].isin(test_rest_stations)]

test_rest_stations_df.shape

(41, 4)

In total, 26 test images containing 26 service stations fully, and 16 service stations at least partly, are created.

## Create Training/Validation Set

In [900]:
# Create df of train_rest_stations_df by dropping all service stations in test_rest_stations_df from rest_stations_ver by ids
train_rest_stations_df = rest_stations_ver[~rest_stations_ver["id_rest"].isin(test_rest_stations)]

train_rest_stations_df.shape

(224, 4)

# Create tiles for train/val from images and masks

In [907]:
def crop_and_save_tiles_restricted(id, name, image_path, mask_path, tile_width, tile_height, image_output_folder, mask_output_folder, output_format="png"):
    """
    Crop and save image and mask tiles of the given size from the given image and mask, not including other, non-selected service stations

    Parameters:
    id (str): The rest_id of the rest station
    name (str): The name of the rest station
    image_path (path): The path to the image file
    mask_path (path): The path to the mask file
    tile_width (int): The width of the tiles
    tile_height (int): The height of the tiles
    image_output_folder (path): The path to the folder to save the image tiles
    mask_output_folder (path): The path to the folder to save the mask tiles
    """
    
    excluded_tiles_count = 0
    excluded_of_interest_count = 0

    # Create output folders if they don't exist
    if not os.path.exists(image_output_folder):
        os.makedirs(image_output_folder)
    if not os.path.exists(mask_output_folder):
        os.makedirs(mask_output_folder)

    # Open the image and mask
    with rasterio.open(image_path) as src, rasterio.open(mask_path) as mask_src:
        # Get image and mask dimensions
        image_width, image_height = src.width, src.height
        mask_width, mask_height = mask_src.width, mask_src.height

        # Ensure image and mask dimensions are the same
        if image_width != mask_width or image_height != mask_height:
            raise ValueError("Image and mask dimensions do not match")

        # Calculate number of rows and columns
        num_rows = image_height // tile_height
        num_cols = image_width // tile_width

        # Loop through each tile
        for row in range(num_rows):
            for col in range(num_cols):
                # Calculate window for current tile
                window = Window(col * tile_width, row * tile_height, tile_width, tile_height)

                # Process mask tile first to check for 255 values
                mask_tile_data = mask_src.read(1, window=window)
                # Check if the mask tile contains any 255 values
                if np.any(mask_tile_data == 255):
                    print(f"tile_{row}_{col} contains restricted areas (255 value) and is skipped.")
                    excluded_tiles_count += 1
                    if np.any(~np.isin(mask_tile_data, [0, 255])):
                        excluded_of_interest_count += 1
                    continue  # Skip this tile

                # Since no 255 values are present, process image tile
                if output_format == "png":
                    image_tile_data = src.read(window=window)
                    image_tile_filename = f"{id}_{name}_{row}_{col}.png" # Change the file extension to .png
                    image_tile_output_path = os.path.join(image_output_folder, image_tile_filename)
                    image_tile_image = Image.fromarray(image_tile_data.transpose(1, 2, 0)) # Convert from CHW to HWC format
                    image_tile_image.save(image_tile_output_path) # Save as PNG using PIL
                elif output_format == "tif":
                    image_tile_data = src.read(window=window)
                    image_transform = rasterio.windows.transform(window, src.transform)
                    image_tile_filename = f"{id}_{name}_{row}_{col}.tif"
                    image_tile_output_path = os.path.join(image_output_folder, image_tile_filename)
                    with rasterio.open(image_tile_output_path, 'w', driver='GTiff', width=tile_width, height=tile_height, count=src.count, dtype=src.dtypes[0], crs=src.crs, transform=image_transform) as dst:
                        dst.write(image_tile_data)
                else :
                    raise ValueError("Output format not supported.")

                # Process and save mask tile
                mask_tile_data = mask_tile_data.astype(np.uint8)
                mask_tile_filename = f"{id}_{name}_{row}_{col}.png"
                mask_tile_output_path = os.path.join(mask_output_folder, mask_tile_filename)
                mask_tile_image = Image.fromarray(mask_tile_data)
                mask_tile_image.save(mask_tile_output_path)

    print(f"Excluded tiles: {excluded_tiles_count}")
    print(f"Excluded tiles of interest: {excluded_of_interest_count}")

    return excluded_tiles_count, excluded_of_interest_count

In [850]:
# Example usage:
image_path = "02_data_acquisition/tif_download/lon_7.0388979_lat_52.3124962_Bentheimer Wald.tif"
mask_path = "02_data_acquisition/mask_download_exclude_other_correct/lon_7.0388979_lat_52.3124962_Bentheimer Wald.png"
image_tile_output_path = "02_data_acquisition/test_image_download/Test Download image tiles restricted"
mask_tile_output_path = "02_data_acquisition/test_image_download/Test Download mask tiles restricted"
id = "lon_7.0388979_lat_52.3124962"
name = "Bentheimer Wald"
crop_and_save_tiles_restricted(id, name, image_path, mask_path, 500, 500, image_tile_output_path, mask_tile_output_path, output_format="png")

tile_0_1 contains restricted areas (255 value) and is skipped.
tile_0_2 contains restricted areas (255 value) and is skipped.
tile_0_3 contains restricted areas (255 value) and is skipped.
tile_0_4 contains restricted areas (255 value) and is skipped.
tile_1_1 contains restricted areas (255 value) and is skipped.
tile_1_2 contains restricted areas (255 value) and is skipped.


In [910]:
# Mass download of tiles
total_excluded_tiles_count = 0
total_excluded_of_interest_count = 0

for row, index in train_rest_stations_df.iterrows():
    id = rest_stations_ver.loc[row, "id_rest"]
    name = rest_stations_ver.loc[row, "name"]
    image_path = f"02_data_acquisition/tif_download_512/{id}_{name}.tif"
    mask_path = f"02_data_acquisition/mask_download_512_exclude_other_correct/{id}_{name}.png"
    image_output_folder = "03_create_masks/train_validate_256/images"
    mask_output_folder = "03_create_masks/train_validate_256/masks"
    tile_width, tile_height = 256, 256
    
    print(f"Start tiling for: {id}_{name}")
    excluded_tiles_count, excluded_of_interest_count = crop_and_save_tiles_restricted(id, name, image_path, mask_path, tile_width, tile_height, image_output_folder, mask_output_folder, output_format="png")
    total_excluded_tiles_count += excluded_tiles_count
    total_excluded_of_interest_count += excluded_of_interest_count
    print(f"Total excluded tiles count: {total_excluded_tiles_count}")
    print(f"Total excluded of interest count: {total_excluded_of_interest_count}")
    print(f"Created tiles for: {id}_{name}")
    print("\n")

Start tiling for: lon_8.6630029_lat_50.2542348_Schäferborn
Excluded tiles: 0
Excluded tiles of interest: 0
Total excluded tiles count: 0
Total excluded of interest count: 0
Created tiles for: lon_8.6630029_lat_50.2542348_Schäferborn


Start tiling for: lon_11.3531491_lat_50.9378362_Habichtsfang
tile_8_2 contains restricted areas (255 value) and is skipped.
tile_8_3 contains restricted areas (255 value) and is skipped.
tile_8_4 contains restricted areas (255 value) and is skipped.
tile_8_5 contains restricted areas (255 value) and is skipped.
tile_8_6 contains restricted areas (255 value) and is skipped.
tile_9_2 contains restricted areas (255 value) and is skipped.
tile_9_3 contains restricted areas (255 value) and is skipped.
tile_9_4 contains restricted areas (255 value) and is skipped.
tile_9_5 contains restricted areas (255 value) and is skipped.
tile_9_6 contains restricted areas (255 value) and is skipped.
Excluded tiles: 10
Excluded tiles of interest: 0
Total excluded tiles coun

For the 512x512 tiles, 394 tiles for excluded as they contain excluded areas. 103 of those also contained areas of interest (parking spaces of the rest station in question).

For the 256x256 tiles, 813 tiles for excluded as they contain excluded areas. 12 of those also contained areas of interest (parking spaces of the rest station in question).

# Inspect tiles

In [866]:
def analyze_image_tiles(folder_path):
    """
    Analyze grayscale PNG image tiles in a specified folder.

    Parameters:
    folder_path (str): Path to the folder containing the image tiles.

    Prints:
    - The total number of tiles.
    - The number of tiles that contain a pixel intensity other than 0.
    - The number of tiles that contain only pixel intensities of 0.
    - The number of tiles that contain a pixel intensity of 100.
    - The number of tiles that contain a pixel intensity of 200.
    - All unique pixel intensities found across all tiles.
    """
    tiles_with_non_zero = 0
    tiles_with_only_zero = 0
    tiles_with_100 = 0
    tiles_with_200 = 0
    total_tiles = 0
    all_intensities = set()

    pattern = os.path.join(folder_path, "*.png")
    for image_path in glob.glob(pattern):
        total_tiles += 1

        image = Image.open(image_path).convert('L')
        image_array = np.array(image)

        unique_intensities = np.unique(image_array)
        all_intensities.update(unique_intensities)

        if np.any(image_array):
            tiles_with_non_zero += 1
            if 100 in unique_intensities:
                tiles_with_100 += 1
            if 200 in unique_intensities:
                tiles_with_200 += 1
        else:
            tiles_with_only_zero += 1

    print(f"Total number of tiles: {total_tiles}")
    print(f"Number of tiles that contain no parking areas: {tiles_with_only_zero}")
    print(f"Number of tiles that contain parking areas: {tiles_with_non_zero}")
    print(f"   of that number of tiles containing cars: {tiles_with_100}")
    print(f"   of that number of tiles containing trucks: {tiles_with_200}")
    print(f"All pixel intensities found across all tiles: {sorted(all_intensities)}")

In [867]:
# Example usage:
folder_path = "03_create_masks/train_validate/masks"
analyze_image_tiles(folder_path)

Total number of tiles: 7641
Number of tiles that contain no parking areas: 6806
Number of tiles that contain parking areas: 835
   of that number of tiles containing cars: 542
   of that number of tiles containing trucks: 682
All pixel intensities found across all tiles: [0, 100, 200]
