In [None]:
!mkdir Dataset2
!wget  https://zenodo.org/records/12750779/files/ImageV2.tif?download=1  -O Dataset2/Image.tif
!wget  https://zenodo.org/records/12750779/files/GTV2.tif?download=1  -O Dataset2/GT.tif



In [1]:
import rasterio
import numpy as np
import os
import geopandas as gpd
from tqdm.notebook import tqdm

In [2]:
def create_image_patches(
    sat_image_path,
    gt_image_path,
    patch_size=128,
    output_dir="Dataset2",
):
    counter = 0
    # Open the satellite image and ground truth image
    with rasterio.open(sat_image_path, nodata=np.nan) as sat_image, rasterio.open(
        gt_image_path, nodata=0.0
    ) as gt_image:
        # Check if dimensions match
        assert (
            sat_image.width == gt_image.width and sat_image.height == gt_image.height
        ), "Image dimensions do not match between satellite image and ground truth"

        # Get the nodata values
        sat_nodata = sat_image.nodata
        gt_nodata = gt_image.nodata

        print(f"Nodata values for sat, gt, ff {sat_nodata,gt_nodata}")
        # Determine the number of patches in x and y direction
        width, height = sat_image.width, sat_image.height
        n_patches_x = width // patch_size
        n_patches_y = height // patch_size

        for i in tqdm(range(n_patches_x)):
            for j in range(n_patches_y):
                # Calculate window position
                window = rasterio.windows.Window(
                    i * patch_size, j * patch_size, patch_size, patch_size
                )

                # Read the corresponding patches
                sat_patch = sat_image.read(window=window)
                gt_patch = gt_image.read(window=window)

                # Stack patches to ensure both patches have the same dimensions
                assert (
                    sat_patch.shape[1:] == gt_patch.shape[1:]
                ), "Patch dimensions do not match"

                # if any of the image has nodata skip that patch
                if (
                    np.any(np.isnan(sat_patch))
                    or np.any(sat_patch == sat_nodata)
                    or np.all(sat_patch == 0.0)
                ):
                    continue  # Skip this patch if any pixel has nodata value in satellite image
                if np.any(np.isnan(gt_patch)) or np.any(gt_patch == gt_nodata):
                    continue  # Skip this patch if any pixel has nodata value in ground truth image


                # combine ff patch with sat patch
                Xdata_patch = sat_patch
                Ydata_patch = gt_patch
                Ydata_patch[Ydata_patch != 2.0] = 0.0
                Ydata_patch[Ydata_patch == 2.0] = 1.0

                if (
                    Xdata_patch.shape[-1] != patch_size
                    or Xdata_patch.shape[-2] != patch_size
                ):
                    continue  # Skip this patch if size does not match

                if (
                    Ydata_patch.shape[-1] != patch_size
                    or Ydata_patch.shape[-2] != patch_size
                ):
                    continue  # Skip this patch if size does not match

                Xdata_patch = np.expand_dims(Xdata_patch, axis=0)
                Ydata_patch = np.expand_dims(Ydata_patch, axis=0)
                try:
                    if counter == 0:
                        Xdata = Xdata_patch
                        Ydata = Ydata_patch

                    else:
                        Xdata = np.vstack([Xdata, Xdata_patch])
                        Ydata = np.vstack([Ydata, Ydata_patch])
                except:
                    print(Xdata.shape, Xdata_patch.shape)
                    print(Ydata.shape, Ydata_patch.shape)

                counter += 1

    np.save(os.path.join(output_dir, f"Xdata.npy"), Xdata)
    np.save(os.path.join(output_dir, f"Ydata.npy"), Ydata)
    print(f" {counter} Number of patches are created and saved in the directory")


# Example usage
sat_image_path = "Dataset2/Image.tif"
gt_image_path = "Dataset2/GT.tif"
patch_size = 128  # Size of the patches (n x n)
output_dir = "Dataset2"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)
create_image_patches(
    sat_image_path, gt_image_path, patch_size, output_dir
)

Nodata values for sat, gt, ff (None, None)


  0%|          | 0/44 [00:00<?, ?it/s]

 1935 Number of patches are created and saved in the directory


In [3]:
Xdata = np.load("Dataset2/Xdata.npy")

In [4]:
Ydata = np.load("Dataset2/Ydata.npy")