<a href="https://colab.research.google.com/github/benedikt-korbach/remote-sensing-of-parking-areas/blob/main/03d_create_train_val.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import packages, mount drive, set wd

In [1]:
import os
import cv2
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import datetime
import random
import shutil

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Set wd
#os.chdir("/Users/benediktkorbach/Documents/GitHub/remote-sensing-of-parking-areas")
#os.chdir("/content/drive/MyDrive/Master Thesis")

# Set wd
os.chdir('/Users/benediktkorbach/Documents/GitHub/remote-sensing-of-parking-areas')

print("Working directory:", os.getcwd())


Working directory: /Users/benediktkorbach/Documents/GitHub/remote-sensing-of-parking-areas


# Check train_validate_512

In [3]:
def count_files_in_folder(folder_path):
    """Counts the number of files in the specified folder."""
    # List all entries in the folder
    entries = os.listdir(folder_path)

    # Count only files
    file_count = sum(os.path.isfile(os.path.join(folder_path, entry)) for entry in entries)

    return file_count

In [5]:
# Count images and masks
mask_path = "03_create_masks/train_validate_512_incl_quick/masks"
image_path = "03_create_masks/train_validate_512_incl_quick/images"

num_masks = count_files_in_folder(mask_path)
num_images = count_files_in_folder(image_path)

print(f"There are {num_images} images and {num_masks} masks in the folders {image_path} and {mask_path} respectively.")

There are 5793 images and 5793 masks in the folders 03_create_masks/train_validate_512_incl_quick/images and 03_create_masks/train_validate_512_incl_quick/masks respectively.


There are 5,793 images/masks in the train_validate_512_incl_quick folder.

# Reduce background images/masks

In [6]:
def create_target_dirs(target_dir):
    """Create target directories for images and masks."""
    os.makedirs(target_dir, exist_ok=True)
    os.makedirs(os.path.join(target_dir, "images"), exist_ok=True)
    os.makedirs(os.path.join(target_dir, "masks"), exist_ok=True)

def reduce_background_images(image_folder, mask_folder, target_folder, black_mask_inclusion_prob=0.2):
    """
    Reduces the number of pure background images by selecting black_mask_inclusion_prob percent of pure background images.
    """
    # Create target directory
    create_target_dirs(target_folder)

    # Initialize counters for sanity check
    image_counter = 0
    parking_images_copied = 0
    background_images_copied = 0
    background_images_discarded = 0

    # Initialize start time
    start_time = datetime.datetime.now()
    print(f"Copying started at {start_time.strftime('%Y-%m-%d %H:%M:%S')}")

    # Loop through every image/mask
    for img_name in sorted(os.listdir(image_folder)):
        if not img_name.lower().endswith(".png"):
            continue

        image_counter += 1

        img_path = os.path.join(image_folder, img_name)
        mask_path = os.path.join(mask_folder, img_name)

        # Check if the mask is completely black (background)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if np.unique(mask).size == 1 and np.unique(mask)[0] == 0:
            # Include the black mask with a certain probability
            if random.random() < black_mask_inclusion_prob:
                shutil.copy(img_path, os.path.join(target_folder, "images", img_name))
                shutil.copy(mask_path, os.path.join(target_folder, "masks", img_name))
                print(f"Background added: {img_name}")
                background_images_copied += 1
            else:
                background_images_discarded += 1
        else:
            # Include every non-background image
            shutil.copy(img_path, os.path.join(target_folder, "images", img_name))
            shutil.copy(mask_path, os.path.join(target_folder, "masks", img_name))
            print(f"Parking added: {img_name}")
            parking_images_copied += 1

    # Stop time and calculate total copied
    now = datetime.datetime.now()
    duration = (now - start_time).total_seconds()
    total_copied = background_images_copied + parking_images_copied

    # Output copies made by category
    print(f"Finished copying at {now.strftime('%Y-%m-%d %H:%M:%S')} in {duration:.2f} seconds.")
    print(f"Total evaluated: {image_counter}")
    print(f"Total background discarded: {background_images_discarded}")
    print(f"Total copied: {total_copied}")
    print(f"thereof parking: {parking_images_copied}")
    print(f"thereof background: {background_images_copied}")

In [7]:
# Reduce the number of pure background images and save them in the target folder

image_folder = "03_create_masks/train_validate_512_incl_quick/images"
mask_folder = "03_create_masks/train_validate_512_incl_quick/masks"
target_folder = "03_create_masks/train_validate_512_incl_quick/train_validate_512_incl_quick_reduced_background"
reduce_background_images(image_folder, mask_folder, target_folder)

Copying started at 2024-03-24 13:38:06
Background added: lon_10.0214344_lat_54.2446698_Rumohr_0_1.png
Parking added: lon_10.0214344_lat_54.2446698_Rumohr_0_3.png
Background added: lon_10.0214344_lat_54.2446698_Rumohr_0_4.png
Background added: lon_10.0214344_lat_54.2446698_Rumohr_1_0.png
Parking added: lon_10.0214344_lat_54.2446698_Rumohr_1_3.png
Background added: lon_10.0214344_lat_54.2446698_Rumohr_4_0.png
Background added: lon_10.0335813_lat_51.7803957_Am Bierberg-Ost_0_2.png
Background added: lon_10.0335813_lat_51.7803957_Am Bierberg-Ost_0_4.png
Parking added: lon_10.0335813_lat_51.7803957_Am Bierberg-Ost_1_1.png
Parking added: lon_10.0335813_lat_51.7803957_Am Bierberg-Ost_1_2.png
Parking added: lon_10.0335813_lat_51.7803957_Am Bierberg-Ost_2_1.png
Parking added: lon_10.0335813_lat_51.7803957_Am Bierberg-Ost_2_2.png
Background added: lon_10.0335813_lat_51.7803957_Am Bierberg-Ost_3_4.png
Parking added: lon_10.0338812_lat_51.7811423_Am Bierberg_1_1.png
Parking added: lon_10.0338812_la

From the 5,793 evaluated images, 3,871 were discarded as pure background images. The final dataset contains 1,922 images, of which 921 include parking spaces (both truck and car) as well as 1001 pure background images.

# Create train/val folder

In [8]:
def split_and_copy_data(base_input_path, test_size=0.2, random_seed=42):
    """
    Splits data into training and validation and copies images/masks to respective folders.
    """
    # Define directories
    images_dir = os.path.join(base_input_path, "images")
    masks_dir = os.path.join(base_input_path, "masks")
    train_image_dir = os.path.join(base_input_path, "train", "images")
    train_mask_dir = os.path.join(base_input_path, "train", "masks")
    val_image_dir = os.path.join(base_input_path, "val", "images")
    val_mask_dir = os.path.join(base_input_path, "val", "masks")

    # Create directories
    os.makedirs(train_image_dir, exist_ok=True)
    os.makedirs(train_mask_dir, exist_ok=True)
    os.makedirs(val_image_dir, exist_ok=True)
    os.makedirs(val_mask_dir, exist_ok=True)

    # Retrieve and sort filenames
    image_files = [f for f in os.listdir(images_dir) if f.endswith('.png')]
    mask_files = [f for f in os.listdir(masks_dir) if f.endswith('.png')]
    image_files.sort()
    mask_files.sort()

    # Split data into training and validation sets
    image_train, image_val, mask_train, mask_val = train_test_split(
        image_files, mask_files, test_size=test_size, random_state=random_seed)

    # Function to copy files
    def copy_files(file_list, src_dir, dest_dir):
        for file in file_list:
            src_file_path = os.path.join(src_dir, file)
            dest_file_path = os.path.join(dest_dir, file)
            shutil.copy(src_file_path, dest_file_path)

    # Copy files to respective directories
    copy_files(image_train, images_dir, train_image_dir)
    copy_files(mask_train, masks_dir, train_mask_dir)
    copy_files(image_val, images_dir, val_image_dir)
    copy_files(mask_val, masks_dir, val_mask_dir)

    print(f"Data successfully split and copied.\nTraining images: {len(image_train)}, Validation images: {len(image_val)}")

In [9]:
# Create training and validation folders
base_input_path = "03_create_masks/train_validate_512_incl_quick/train_validate_512_incl_quick_reduced_background"
split_and_copy_data(base_input_path, test_size=0.2, random_seed=42)

Data successfully split and copied.
Training images: 1537, Validation images: 385


The training folder contains 1,537 images while the validation folder includes 385 images.

In [14]:
def count_image_intensities(folder_dir):
    """
    Count car, truck and pure background masks in folder.
    """
    count_all_zero = 0
    count_include_100 = 0
    count_include_200 = 0
    count_include_250 = 0
    count_include_100_200_250 = 0

    # Iterate through all image files in the folder
    for filename in os.listdir(folder_dir):
        if filename.lower().endswith('.png'):
            img_path = os.path.join(folder_dir, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

            # Check if the entire image is black
            if np.all(img == 0):
                count_all_zero += 1

            # Check if the image includes pixel intensities of 100
            if np.any(img == 100):
                count_include_100 += 1

            # Check if the image includes pixel intensities of 200
            if np.any(img == 200):
                count_include_200 += 1

            # Check if the image includes pixel intensities of 250
            if np.any(img == 250):
                count_include_250 += 1

            # Check if the image includes pixel intensities of 100 or 200
            if np.any(img == 100) or np.any(img == 200) or np.any(img == 250):
                count_include_100_200_250 += 1

            total_count = count_all_zero + count_include_100_200_250

    return total_count, count_all_zero, count_include_100, count_include_200, count_include_250, count_include_100_200_250

In [19]:
# Count the instances of car and truck parking as well as background in the train/val folder
train_dir = "03_create_masks/train_validate_512_incl_quick/train_validate_512_incl_quick_reduced_background/train/masks"
val_dir = "03_create_masks/train_validate_512_incl_quick/train_validate_512_incl_quick_reduced_background/val/masks"

total_count, count_all_zero, count_include_100, count_include_200, count_include_250, count_include_100_200_250 = count_image_intensities(val_dir)

print(f"Total count: {total_count}")
print(f"Background images: {count_all_zero}")
print(f"Images including car parking: {count_include_100}")
print(f"Images including truck parking: {count_include_200}")
print(f"Images including quick parking: {count_include_250}")
print(f"Images including car, truck or quick parking: {count_include_100_200_250}")

Total count: 385
Background images: 201
Images including car parking: 117
Images including truck parking: 125
Images including quick parking: 58
Images including car, truck or quick parking: 184


The training set includes 1,537 images, of which 415 include car parking, 461 include truck parking, 304 include quick parking and 737 include car, truck or quick parking. 800 images are pure background images.

The validation set includes 385 images, of which 117 include car parking, 125 include truck parking, 58 include quick parking and 184 include car, truck or quick parking. 117 images are pure background images.