This notebook covers the splitting of pabbi satellite imagery into train - test split (80-20 %).
Steps:
- load the pabbi cog using geoai.
- use rasterio to split pabbi cog into training testing parts.

In [1]:
import geoai

In [2]:
image_path = 'pabbi_satellite_image_COG.tif' # Path to the COG locally stored image

In [4]:
geoai.view_raster(image_path)

# Part 2
Using Rasterio to create training testing split.

In [21]:
import rasterio

In [1]:
import rasterio
from rasterio.windows import Window

image_path = "pabbi_satellite_image_COG.tif"
train_path = "train_image.tif"
test_path = "test_image.tif"

with rasterio.open(image_path) as src:
    width = src.width
    height = src.height
    count = src.count
    dtype = src.dtypes[0]
    crs = src.crs

    split_width = int(0.8 * width)

    # Prepare window definitions
    train_window = Window(0, 0, split_width, height)
    test_window = Window(split_width, 0, width - split_width, height)

    # Define shared profile updates (compression + tiling)
    profile = src.profile.copy()
    common_updates = {
        'compress': 'lzw',
        'tiled': True,
        'blockxsize': 512,
        'blockysize': 512,
        'interleave': 'band',  # safer than 'pixel' for writing per band
        'predictor': 2,
        'BIGTIFF': 'IF_SAFER'  # allow large TIFFs if needed
    }

    # ---------------- WRITE TRAIN ----------------
    train_profile = profile.copy()
    train_profile.update({
        'width': train_window.width,
        'height': train_window.height,
        'transform': src.window_transform(train_window),
        **common_updates
    })

    with rasterio.open(train_path, 'w', **train_profile) as dst:
        for i in range(1, count + 1):
            band = src.read(i, window=train_window)
            dst.write(band, i)

    # ---------------- WRITE TEST ----------------
    test_profile = profile.copy()
    test_profile.update({
        'width': test_window.width,
        'height': test_window.height,
        'transform': src.window_transform(test_window),
        **common_updates
    })

    with rasterio.open(test_path, 'w', **test_profile) as dst:
        for i in range(1, count + 1):
            band = src.read(i, window=test_window)
            dst.write(band, i)


In [2]:
geoai.view_raster('test20.tif')

In [3]:
geoai.view_raster('train80.tif')