Import and Setups

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install rasterio numpy

Collecting rasterio
  Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m79.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl (11 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1.2 cligj-0.7.2 rasterio-1.4.3


In [None]:
import os
import numpy as np
import rasterio
from rasterio.enums import Resampling
from rasterio.errors import NotGeoreferencedWarning
import warnings

Identify Nan and Inf values in the dataset and replace them.

In [None]:

#  Suppress georeferencing warnings
warnings.filterwarnings("ignore", category=NotGeoreferencedWarning)

#  Set your image folder path (modify this for your case)
image_dir = "/content/drive/MyDrive/Sentinel_training"

#  Loop through all TIF files
for filename in os.listdir(image_dir):
    if filename.lower().endswith(".tif"):
        filepath = os.path.join(image_dir, filename)
        print(f"Checking {filename}...")

        # Open and read the image
        with rasterio.open(filepath, "r+") as dataset:
            data = dataset.read().astype(np.float32)
            nan_mask = np.isnan(data)
            inf_mask = np.isinf(data)

            num_nan = nan_mask.sum()
            num_inf = inf_mask.sum()

            if num_nan > 0 or num_inf > 0:
                print(f"  ⚠️ Found NaNs: {num_nan}, Infs: {num_inf} → Replacing with 0.0")

                # Replace NaNs and Infs with 0
                data = np.nan_to_num(data, nan=0.0, posinf=0.0, neginf=0.0)

                # Overwrite the file in-place
                dataset.write(data.astype(dataset.dtypes[0]))
                print(f"  ✅ Fixed and saved: {filename}")
            else:
                print(f"  ✅ No issues found.")

print("✅ Finished checking and fixing all GeoTIFFs.")

Checking Sentinel_AllBands_Training_Id_1.tif...
  ✅ No issues found.
Checking Sentinel_AllBands_Training_Id_2.tif...
  ✅ No issues found.
Checking Sentinel_AllBands_Training_Id_17.tif...
  ✅ No issues found.
Checking Sentinel_AllBands_Training_Id_19.tif...
  ✅ No issues found.
Checking Sentinel_AllBands_Training_Id_18.tif...
  ✅ No issues found.
Checking Sentinel_AllBands_Training_Id_32.tif...
  ✅ No issues found.
Checking Sentinel_AllBands_Training_Id_33.tif...
  ✅ No issues found.
Checking Sentinel_AllBands_Training_Id_38.tif...
  ✅ No issues found.
Checking Sentinel_AllBands_Training_Id_36.tif...
  ✅ No issues found.
Checking Sentinel_AllBands_Training_Id_37.tif...
  ✅ No issues found.
Checking Sentinel_AllBands_Training_Id_39.tif...
  ✅ No issues found.
Checking Sentinel_AllBands_Training_Id_40.tif...
  ✅ No issues found.
Checking Sentinel_AllBands_Training_Id_45.tif...
  ✅ No issues found.
Checking Sentinel_AllBands_Training_Id_44.tif...
  ✅ No issues found.
Checking Sentinel_AllB

Check for mismatched pairs.

In [None]:

image_dir = "/content/drive/MyDrive/P2 - Amazon ITU - PESU/GEE_Exports_all"
mask_dir = "/content/drive/MyDrive/P2 - Amazon ITU - PESU/All_Masks_TIF"

bad_pairs = []

for img_name in os.listdir(image_dir):
    if not img_name.endswith(".tif"):
        continue
    mask_name = img_name.replace("id_", "[01]_id_")
    img_path = os.path.join(image_dir, img_name)
    mask_path = os.path.join(mask_dir, mask_name)

    if not os.path.exists(mask_path):
        print(f"❌ Mask not found for {img_name}")
        continue

    with rasterio.open(img_path) as img_src, rasterio.open(mask_path) as mask_src:
        img_shape = (img_src.height, img_src.width)
        mask_shape = (mask_src.height, mask_src.width)

        if img_shape != mask_shape:
            print(f"⚠️ Mismatch: {img_name} → Image: {img_shape}, Mask: {mask_shape}")
            bad_pairs.append((img_path, mask_path))

print(f"\n🔎 Found {len(bad_pairs)} mismatched pairs.")


❌ Mask not found for id_1185.tif
❌ Mask not found for id_472.tif
❌ Mask not found for id_478.tif
❌ Mask not found for id_115.tif
❌ Mask not found for id_1017.tif
❌ Mask not found for id_94.tif
❌ Mask not found for id_823.tif
❌ Mask not found for id_1183.tif
❌ Mask not found for id_818.tif
❌ Mask not found for id_91.tif
❌ Mask not found for id_1025.tif
❌ Mask not found for id_1182.tif
❌ Mask not found for id_89.tif
❌ Mask not found for id_454.tif
❌ Mask not found for id_825.tif
❌ Mask not found for id_1198.tif
❌ Mask not found for id_820.tif
❌ Mask not found for id_480.tif
❌ Mask not found for id_468.tif
❌ Mask not found for id_1013.tif
❌ Mask not found for id_1193.tif
❌ Mask not found for id_79.tif
❌ Mask not found for id_1020.tif
❌ Mask not found for id_822.tif
❌ Mask not found for id_1177.tif
❌ Mask not found for id_95.tif
❌ Mask not found for id_824.tif
❌ Mask not found for id_461.tif
❌ Mask not found for id_826.tif
❌ Mask not found for id_450.tif
❌ Mask not found for id_1018.tif
❌ 

Resize imgs that dont match mask sizes.

In [None]:
from rasterio.enums import Resampling

def resize_mask_to_match_image(image_path, mask_path, output_path=None):
    with rasterio.open(image_path) as img_src, rasterio.open(mask_path) as mask_src:
        image_shape = (img_src.height, img_src.width)

        mask_data = mask_src.read(
            out_shape=(mask_src.count, image_shape[0], image_shape[1]),
            resampling=Resampling.nearest
        )

        transform = mask_src.transform
        transform = mask_src.transform * mask_src.transform.scale(
            (mask_src.width / mask_data.shape[-1]),
            (mask_src.height / mask_data.shape[-2])
        )

        profile = mask_src.profile
        profile.update(height=image_shape[0], width=image_shape[1], transform=transform)

        save_path = output_path if output_path else mask_path
        with rasterio.open(save_path, "w", **profile) as dst:
            dst.write(mask_data)


In [None]:
for img_path, mask_path in bad_pairs:
    print(f"Resizing mask: {mask_path}")
    resize_mask_to_match_image(img_path, mask_path)


Resizing mask: /content/drive/MyDrive/training_masks/Mask_Buffer20m_Id_212.tif
Resizing mask: /content/drive/MyDrive/training_masks/Mask_Buffer20m_Id_195.tif
Resizing mask: /content/drive/MyDrive/training_masks/Mask_Buffer20m_Id_226.tif


Copy masks to new folder removing runway-no runway information.

In [None]:

import shutil
import re

# 📁 Set paths
source_folder = "/content/drive/MyDrive/P2 - Amazon ITU - PESU/All_Masks_TIF"   # <- Change this
destination_folder = "/content/drive/MyDrive/Masks 800-1650"

# 📌 Make sure destination exists
os.makedirs(destination_folder, exist_ok=True)

# 🧠 Pattern to extract ID from filenames like "0_id_800.tif"
pattern = re.compile(r"[01]_id_(\d+)\.tif")

# 🔁 Iterate over all files in the source folder
for filename in os.listdir(source_folder):
    if filename.endswith(".tif"):
        match = pattern.match(filename)
        if match:
            id_number = int(match.group(1))
            if 800 <= id_number <= 1650:
                new_filename = f"Mask_Buffer20m_Id_{id_number}.tif"
                src_path = os.path.join(source_folder, filename)
                dst_path = os.path.join(destination_folder, new_filename)

                shutil.copyfile(src_path, dst_path)
                print(f"✅ Copied: {filename} → {new_filename}")

print("🎉 Done copying and renaming mask files.")

✅ Copied: 0_id_1211.tif → Mask_Buffer20m_Id_1211.tif
✅ Copied: 0_id_1216.tif → Mask_Buffer20m_Id_1216.tif
✅ Copied: 1_id_1217.tif → Mask_Buffer20m_Id_1217.tif
✅ Copied: 1_id_1219.tif → Mask_Buffer20m_Id_1219.tif
✅ Copied: 1_id_1222.tif → Mask_Buffer20m_Id_1222.tif
✅ Copied: 0_id_1224.tif → Mask_Buffer20m_Id_1224.tif
✅ Copied: 0_id_1226.tif → Mask_Buffer20m_Id_1226.tif
✅ Copied: 0_id_1227.tif → Mask_Buffer20m_Id_1227.tif
✅ Copied: 0_id_1228.tif → Mask_Buffer20m_Id_1228.tif
✅ Copied: 1_id_1229.tif → Mask_Buffer20m_Id_1229.tif
✅ Copied: 0_id_1231.tif → Mask_Buffer20m_Id_1231.tif
✅ Copied: 1_id_1232.tif → Mask_Buffer20m_Id_1232.tif
✅ Copied: 0_id_1233.tif → Mask_Buffer20m_Id_1233.tif
✅ Copied: 0_id_1234.tif → Mask_Buffer20m_Id_1234.tif
✅ Copied: 1_id_1236.tif → Mask_Buffer20m_Id_1236.tif
✅ Copied: 0_id_1237.tif → Mask_Buffer20m_Id_1237.tif
✅ Copied: 1_id_1239.tif → Mask_Buffer20m_Id_1239.tif
✅ Copied: 1_id_1240.tif → Mask_Buffer20m_Id_1240.tif
✅ Copied: 0_id_1241.tif → Mask_Buffer20m_Id_12

In [None]:
#  Set your source and destination folders
source_folder = "/content/drive/MyDrive/Masks 800-1650"
destination_folder = "/content/drive/MyDrive/training_masks"

#  Create destination folder if it doesn't exist
os.makedirs(destination_folder, exist_ok=True)

#  Move all .tif files
for filename in os.listdir(source_folder):
    if filename.lower().endswith(".tif"):
        src_path = os.path.join(source_folder, filename)
        dst_path = os.path.join(destination_folder, filename)

        shutil.move(src_path, dst_path)
        print(f"🚚 Moved: {filename}")

print("✅ All .tif files moved and deleted from source.")

🚚 Moved: Mask_Buffer20m_Id_1211.tif
🚚 Moved: Mask_Buffer20m_Id_1216.tif
🚚 Moved: Mask_Buffer20m_Id_1217.tif
🚚 Moved: Mask_Buffer20m_Id_1219.tif
🚚 Moved: Mask_Buffer20m_Id_1222.tif
🚚 Moved: Mask_Buffer20m_Id_1224.tif
🚚 Moved: Mask_Buffer20m_Id_1226.tif
🚚 Moved: Mask_Buffer20m_Id_1227.tif
🚚 Moved: Mask_Buffer20m_Id_1228.tif
🚚 Moved: Mask_Buffer20m_Id_1229.tif
🚚 Moved: Mask_Buffer20m_Id_1231.tif
🚚 Moved: Mask_Buffer20m_Id_1232.tif
🚚 Moved: Mask_Buffer20m_Id_1233.tif
🚚 Moved: Mask_Buffer20m_Id_1234.tif
🚚 Moved: Mask_Buffer20m_Id_1236.tif
🚚 Moved: Mask_Buffer20m_Id_1237.tif
🚚 Moved: Mask_Buffer20m_Id_1239.tif
🚚 Moved: Mask_Buffer20m_Id_1240.tif
🚚 Moved: Mask_Buffer20m_Id_1241.tif
🚚 Moved: Mask_Buffer20m_Id_1242.tif
🚚 Moved: Mask_Buffer20m_Id_1244.tif
🚚 Moved: Mask_Buffer20m_Id_1245.tif
🚚 Moved: Mask_Buffer20m_Id_1248.tif
🚚 Moved: Mask_Buffer20m_Id_1252.tif
🚚 Moved: Mask_Buffer20m_Id_1254.tif
🚚 Moved: Mask_Buffer20m_Id_1255.tif
🚚 Moved: Mask_Buffer20m_Id_1256.tif
🚚 Moved: Mask_Buffer20m_Id_1

Check for any missing images to the corresponding new masks and add them.

In [None]:

# 📁 Define paths
mask_folder = "/content/drive/MyDrive/training_masks"                   # Folder with mask files
primary_image_folder = "/content/drive/MyDrive/Sentinel_training" # Destination folder for images
third_image_folder = "/content/drive/MyDrive/P2 - Amazon ITU - PESU/GEE_Exports_all"     # Source folder with 'id_800.tif'

# 🧠 Regex to extract ID from mask filenames like Mask_Buffer20m_Id_800.tif
pattern = re.compile(r"Mask_Buffer20m_Id_(\d+)\.tif")

# 🔁 Process each mask
for mask_file in os.listdir(mask_folder):
    if not mask_file.lower().endswith(".tif"):
        continue

    match = pattern.match(mask_file)
    if not match:
        continue

    id_number = match.group(1)
    target_filename = f"Sentinel_AllBands_Training_Id_{id_number}.tif"
    target_path = os.path.join(primary_image_folder, target_filename)

    # Check if file already exists in primary image folder
    if os.path.exists(target_path):
        print(f"✔️ Image already exists: {target_filename}")
        continue

    # Check if corresponding image exists in the third folder
    source_filename = f"id_{id_number}.tif"
    source_path = os.path.join(third_image_folder, source_filename)

    if os.path.exists(source_path):
        shutil.copyfile(source_path, target_path)
        print(f"✅ Copied and renamed: {source_filename} → {target_filename}")
    else:
        print(f"⚠️ Not found in backup: {source_filename}")

print("🎉 Done copying missing images.")


✔️ Image already exists: Sentinel_AllBands_Training_Id_1.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_2.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_9.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_10.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_11.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_17.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_18.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_19.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_20.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_21.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_22.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_23.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_28.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_30.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_31.tif
✔️ Image already exists: Sentinel_AllBands_Training_Id_32.tif
✔️ Image al

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
import shutil


Mounted at /content/drive


In [None]:
# Replace these paths with yours
src_folder = "/content/drive/MyDrive/P2 - Amazon ITU - PESU/GEE_Exports_all"
dst_folder = "/content/drive/MyDrive/P2 - Amazon ITU - PESU/Copy_GEE_Exports_all"

# Create destination folder if it doesn't exist
os.makedirs(dst_folder, exist_ok=True)

# Copy only .tif files
for file in os.listdir(src_folder):
    if file.lower().endswith(".tif"):
        shutil.copy(os.path.join(src_folder, file), dst_folder)

print("Copy completed.")

OSError: [Errno 107] Transport endpoint is not connected