<a href="https://colab.research.google.com/github/liangchow/zindi-amazon-secret-runway/blob/main/utils/Generate_airstrip_masks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports and Setup.

In [None]:
%%capture
!pip -q install rasterio
!pip -q install gdown

In [None]:
import rasterio
import geopandas as gpd
from rasterio.features import rasterize
import numpy as np
import os
from shapely.geometry import LineString
from shapely.ops import transform
from pyproj import Transformer
import gdown
import shutil

## Create folder to store masks in Colab Runtime
This folder is deleted at the end of your session.

In [None]:
# Define the path for the new folder
working_path = '/content/masks'

# Create the folder if it doesn't already exist
if not os.path.exists(working_path):
    os.makedirs(working_path)
    print(f"Folder created: {working_path}")
else:
    print(f"Folder already exists: {working_path}")

Folder created: /content/masks


# Download data to local compute node

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# List the contents of your Google Drive path to verify the folder name and structure
drive_path = '/content/drive/MyDrive/P2 - Amazon ITU - PESU' # Adjust this path if necessary
!ls "{drive_path}"

'Amazon ITU - Tracker.gsheet'  'Geojson_tif_small_2(ONLY RUNWAYS)'
'Annotated images'	        Geojson_tif_small_2.zip
'Dataset extend.zip'	        Images
'Explore Sample.gdoc'	        Sentinel_inference
'Exploring Training.gdoc'      'Week 1 - Understanding the Project.gslides'
 generated_tif		       'Zindi_Amazon_Results (Sentinel_inf)'
 Geojson_tif_small


## Download training images from your Google Drive

**Note**: If you are working with your own data, edit the path to your training images in the cell below

In [None]:
# Define the path to your training images in Google Drive
drive_path = '/content/drive/MyDrive/P2 - Amazon ITU - PESU/Sentinel_inference'
local_zip_path = '/content/Sentinel_inference.zip'
local_unzip_path = '/content/'

# Copy the folder from Google Drive to the local Colab environment
!cp -r "{drive_path}" /content/

# Navigate to the local folder
%cd /content/Sentinel_inference

# Zip the data
!zip -r {local_zip_path} .

# Navigate back to content before unzipping
%cd /content

# Unzip the files to the content directory
!unzip {local_zip_path} -d {local_unzip_path}

# Navigate back to content
%cd /content

/content/drive/.shortcut-targets-by-id/1-DbkyCW1BaeQ_nKLgfuH-kj036N57maK/Sentinel_inference
  adding: Sentinel_AllBands_Inference_2020_03.tif (deflated 5%)
  adding: Sentinel_AllBands_Inference_2020_02.tif (deflated 5%)
  adding: Sentinel_AllBands_Inference_2020_01.tif (deflated 5%)
  adding: Sentinel_AllBands_Inference_2021_02.tif (deflated 5%)
  adding: Sentinel_AllBands_Inference_2021_01.tif (deflated 5%)
  adding: Sentinel_AllBands_Inference_2021_04.tif (deflated 4%)
  adding: Sentinel_AllBands_Inference_2021_03.tif (deflated 5%)
  adding: Sentinel_AllBands_Inference_2022_01.tif (deflated 5%)
  adding: Sentinel_AllBands_Inference_2022_02.tif (deflated 5%)
  adding: Sentinel_AllBands_Inference_2024_01.tif (deflated 5%)
  adding: Sentinel_AllBands_Inference_2023_01.tif (deflated 5%)
/content
Archive:  /content/Sentinel_inference.zip
  inflating: /content/Sentinel_AllBands_Inference_2020_03.tif  
  inflating: /content/Sentinel_AllBands_Inference_2020_02.tif  
  inflating: /content/Sen

## Connect to team GitHub Repo

Clone the main branch of TerraPulse team's GitHub repo to access files

In [None]:
# clone the main branch from GitHub to get all the data and files from there onto the current runtime session
!apt-get install git
!git clone https://github.com/liangchow/zindi-amazon-secret-runway.git
!git pull # pulls the latest changes from repo

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.15).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.
Cloning into 'zindi-amazon-secret-runway'...
remote: Enumerating objects: 510, done.[K
remote: Counting objects: 100% (122/122), done.[K
remote: Compressing objects: 100% (87/87), done.[K
remote: Total 510 (delta 72), reused 39 (delta 35), pack-reused 388 (from 1)[K
Receiving objects: 100% (510/510), 64.14 MiB | 17.28 MiB/s, done.
Resolving deltas: 100% (240/240), done.
fatal: not a git repository (or any of the parent directories): .git


## Download training airstrips

In [None]:
# Load the polygon shapefile
airstrips_gdf = gpd.read_file("/content/zindi-amazon-secret-runway/Data_Visualization/data/pac_2024_training/pac_2024_training.shp")

# Functions for creating binary masks

In [None]:
# Define buffer distance
buffer_distance = 20

In [None]:
def check_mask_values(mask):
    """
    Checks whether a binary mask contains both 0 and 1 values, used for validating binary masks.

    Args:
      mask (np.ndarray): A NumPy array representing the binary mask, where values are expected to be 0 or 1.

    Returns:
      bool: True if the mask contains both 0 and 1 values; False otherwise.

    Prints:
      - "The mask contains both 0 and 1." if both values are present.
      - "The mask contains only 0." if only 0 is present.
      - "The mask contains only 1." if only 1 is present.
      - "The mask does not contain 0 or 1." if neither value is found.
    """

    has_zero = np.any(mask == 0)
    has_one = np.any(mask == 1)

    if has_zero and has_one:
        print("The mask contains both 0 and 1.")
        return True
    elif has_zero:
        print("The mask contains only 0.")
        return False
    elif has_one:
        print("The mask contains only 1.")
        return False
    else:
        print("The mask does not contain 0 or 1.")
        return False

In [None]:
def reproject_geometry(geometry, from_crs, to_crs):
    """
    Reprojects a given geometry from one coordinate reference system (CRS) to another.

    Args:
      geometry (shapely.geometry.base.BaseGeometry): The geometry object to reproject (e.g., Polygon, Point).
      from_crs (str): The source CRS of the geometry, specified as an EPSG code (e.g., 'EPSG:4326')
      to_crs (str): The target CRS to reproject the geometry to, specified as an EPSG code

    Returns:
      shapely.geometry.base.BaseGeometry: The reprojected geometry in the target CRS.
    """

    transformer = Transformer.from_crs(from_crs, to_crs, always_xy=True)
    return transform(transformer.transform, geometry)

# Create binary mask

The code below does the following for each airstrip:
- Check if there is a corresponding training image
- Buffer the airstrip polyline by `buffer_distance`
- Create a binary mask with the same dimensions as the training image
- Set all pixel values inside the buffer to 1 and outside the buffer to 0


In [None]:
import os
import rasterio
import geopandas as gpd
from rasterio.features import rasterize
import numpy as np
from shapely.geometry import mapping, box

# Define the directory containing the TIFF files
image_directory = '/content/Sentinel_inference/'

# List all TIFF files in the directory
image_files = [f for f in os.listdir(image_directory) if f.endswith('.tif')]

# Iterate through each image file
for image_filename in image_files:
    image_path = os.path.join(image_directory, image_filename)
    print(f"Processing image: {image_filename}")

    with rasterio.open(image_path) as img_src:
        # Get image metadata
        img_meta = img_src.meta.copy()
        img_transform = img_src.transform
        img_crs = img_src.crs
        img_width = img_src.width
        img_height = img_src.height

        # Get the image bounds and create a polygon from them
        img_bounds = img_src.bounds
        img_polygon = box(img_bounds.left, img_bounds.bottom, img_bounds.right, img_bounds.top)
        img_bbox = gpd.GeoDataFrame(geometry=[img_polygon], crs=img_crs)


        # Reproject airstrips_gdf to the image's CRS for spatial filtering
        airstrips_reprojected = airstrips_gdf.to_crs(img_crs)

        # Filter airstrips that intersect with the image bounds
        intersecting_airstrips = airstrips_reprojected[airstrips_reprojected.intersects(img_bbox.iloc[0].geometry)]

        if intersecting_airstrips.empty:
            print(f"No airstrips intersect with image: {image_filename}")
            continue

        print(f"Found {len(intersecting_airstrips)} airstrips intersecting with {image_filename}")

        # Create buffered geometries for intersecting airstrips
        buffered_geometries = [airstrip.buffer(buffer_distance) for airstrip in intersecting_airstrips.geometry]

        # Prepare shapes for rasterization (geometry, value)
        shapes_to_rasterize = [(mapping(geom), 1) for geom in buffered_geometries]

        # Create a blank mask for the current image
        mask = np.zeros((img_height, img_width), dtype='uint8')

        # Rasterize the buffered airstrips onto the mask
        mask = rasterize(
            shapes=shapes_to_rasterize,
            out_shape=(img_height, img_width),
            transform=img_transform,
            fill=0,  # Assign 0 to areas outside the buffered geometries
            dtype='uint8',
        )

        # Update metadata for the output raster
        out_meta = img_meta.copy()
        out_meta.update({
            "count": 1,
            "dtype": "uint8",
            "nodata": None  # Disable nodata
        })

        # Check mask values
        check_mask_values(mask)

        # Save the generated mask
        # Extract year and month from the image filename (assuming format like Sentinel_AllBands_Inference_YYYY_MM.tif)
        try:
            parts = image_filename.split('_')
            year = parts[-2]
            month = parts[-1].split('.')[0]
            output_mask_filename = f"Mask_Buffer{buffer_distance}m_{year}_{month}.tif"
        except IndexError:
            print(f"Could not parse year and month from filename: {image_filename}. Saving with a generic name.")
            output_mask_filename = f"Mask_Buffer{buffer_distance}m_{image_filename}"


        output_raster_path = os.path.join('/content/masks', output_mask_filename)

        with rasterio.open(output_raster_path, "w", **out_meta) as dest:
            dest.write(mask, 1)

        print(f"Saved mask for {image_filename} as {output_raster_path}")

Processing image: Sentinel_AllBands_Inference_2020_03.tif
Found 2 airstrips intersecting with Sentinel_AllBands_Inference_2020_03.tif
The mask contains both 0 and 1.
Saved mask for Sentinel_AllBands_Inference_2020_03.tif as /content/masks/Mask_Buffer20m_2020_03.tif
Processing image: Sentinel_AllBands_Inference_2020_02.tif
Found 3 airstrips intersecting with Sentinel_AllBands_Inference_2020_02.tif
The mask contains both 0 and 1.
Saved mask for Sentinel_AllBands_Inference_2020_02.tif as /content/masks/Mask_Buffer20m_2020_02.tif
Processing image: Sentinel_AllBands_Inference_2020_01.tif
Found 5 airstrips intersecting with Sentinel_AllBands_Inference_2020_01.tif
The mask contains both 0 and 1.
Saved mask for Sentinel_AllBands_Inference_2020_01.tif as /content/masks/Mask_Buffer20m_2020_01.tif
Processing image: Sentinel_AllBands_Inference_2021_02.tif
Found 3 airstrips intersecting with Sentinel_AllBands_Inference_2021_02.tif
The mask contains both 0 and 1.
Saved mask for Sentinel_AllBands_Inf

In [None]:
import os

# List files in the directory
directory_path = '/content/Sentinel_inference/'
files_in_directory = os.listdir(directory_path)

# Print the list of files
for filename in files_in_directory:
    print(filename)

Sentinel_AllBands_Inference_2020_03.tif
Sentinel_AllBands_Inference_2020_02.tif
Sentinel_AllBands_Inference_2020_01.tif
Sentinel_AllBands_Inference_2021_02.tif
Sentinel_AllBands_Inference_2021_01.tif
Sentinel_AllBands_Inference_2021_04.tif
Sentinel_AllBands_Inference_2021_03.tif
Sentinel_AllBands_Inference_2022_01.tif
Sentinel_AllBands_Inference_2022_02.tif
Sentinel_AllBands_Inference_2024_01.tif
Sentinel_AllBands_Inference_2023_01.tif


In [None]:
import os
import shutil

# Define the source directory (where masks are currently saved)
source_directory = '/content/masks'

# Define the destination directory in your Google Drive
# **IMPORTANT**: Replace '/content/drive/MyDrive/Your_Masks_Folder' with the actual path in your Google Drive
destination_directory = '/content/drive/MyDrive/P2 - Amazon ITU - PESU/Generated_Masks'

# Create the destination directory if it doesn't exist
if not os.path.exists(destination_directory):
    os.makedirs(destination_directory)
    print(f"Destination folder created: {destination_directory}")
else:
    print(f"Destination folder already exists: {destination_directory}")


# List all files in the source directory
mask_files = [f for f in os.listdir(source_directory) if f.endswith('.tif')]

# Copy each mask file to the destination directory
for mask_filename in mask_files:
    source_path = os.path.join(source_directory, mask_filename)
    destination_path = os.path.join(destination_directory, mask_filename)

    try:
        shutil.copy(source_path, destination_path)
        print(f"Copied {mask_filename} to {destination_directory}")
    except Exception as e:
        print(f"Error copying {mask_filename}: {e}")

Destination folder created: /content/drive/MyDrive/P2 - Amazon ITU - PESU/Generated_Masks
Copied Mask_Buffer20m_2023_01.tif to /content/drive/MyDrive/P2 - Amazon ITU - PESU/Generated_Masks
Copied Mask_Buffer20m_2022_02.tif to /content/drive/MyDrive/P2 - Amazon ITU - PESU/Generated_Masks
Copied Mask_Buffer20m_2022_01.tif to /content/drive/MyDrive/P2 - Amazon ITU - PESU/Generated_Masks
Copied Mask_Buffer20m_2021_02.tif to /content/drive/MyDrive/P2 - Amazon ITU - PESU/Generated_Masks
Copied Mask_Buffer20m_2021_03.tif to /content/drive/MyDrive/P2 - Amazon ITU - PESU/Generated_Masks
Copied Mask_Buffer20m_2020_01.tif to /content/drive/MyDrive/P2 - Amazon ITU - PESU/Generated_Masks
Copied Mask_Buffer20m_2020_02.tif to /content/drive/MyDrive/P2 - Amazon ITU - PESU/Generated_Masks
Copied Mask_Buffer20m_2020_03.tif to /content/drive/MyDrive/P2 - Amazon ITU - PESU/Generated_Masks
Copied Mask_Buffer20m_2024_01.tif to /content/drive/MyDrive/P2 - Amazon ITU - PESU/Generated_Masks
Copied Mask_Buffer2