# 01. Create COG

This script demonstrates creating a COG from an input GeoTiff using the new ["gdal" application](https://gdal.org/en/stable/programs/index.html#gdal-application) via Python.

While most of it is generic, it does make some assumptions about the original GeoTiffs (i.e. some of this is hardcoded to NatureScan).

In [31]:
from pathlib import Path
from osgeo import gdal
gdal.UseExceptions()

from pydantic import BaseModel

class CogConfig(BaseModel):
    num_bands: int = 3
    data_type: str = "Byte"
    nodata: int | float = 255
    bands: None | list[tuple[str, int, int]] = None


def assert_is_tif_file(input: Path):
    # Assert is a .tif file
    assert input.is_file(), "Input should be a file."
    assert input.suffix == ".tif", "Input should be a .tif file."


def assert_info(input: Path, config: CogConfig):
    tif_info = gdal.alg.raster.info(input=input).Output()

    # Assert is a GeoTiff file
    driver = tif_info["driverShortName"]
    assert (
        'GTiff' == driver
    ), f"Expected GTiff driver. Found {driver} driver instead."

    # Assert is 3 bands
    num_bands = len(tif_info["bands"])
    assert (
        config.num_bands == num_bands
    ), f"Expected {config.num_bands} bands. Found {num_bands} bands instead."

    # Assert band type is of byte
    data_type = tif_info["bands"][0]["type"]
    assert (
        config.data_type == data_type
    ), f"Expected {config.data_type} data type. Found {data_type} data type instead."

    # If there is a nodata value, assert that it as expected
    if "noDataValue" in tif_info["bands"][0]:
        nodata = tif_info["bands"][0]["noDataValue"]
        assert (
            config.nodata == nodata
        ), f"Expected nodata of {config.nodata}. Found {nodata} instead."


# Bands only needed for MS files
def create_tmp_tif(input: Path, config: CogConfig) -> Path:
    tmp_file = input.with_suffix(".tmp.tif")

    # Copy src to a tmp file not to change anything
    # In the future, we may be copying from a remote src
    gdal.alg.dataset.copy(source=input, destination=tmp_file, overwrite=True)

    # Edit temp file to:
    #   - Add nodata values
    #   - Compute statistics
    gdal.alg.raster.edit(dataset=tmp_file, nodata=config.nodata, stats="YES")

    # If band info is provided add to tmp tif
    if config.bands is not None:
        # Edit the temp file to add some band metadata
        # This can't be done by the new gdal program yet (I think)
        ds = gdal.Open(tmp_file, gdal.GA_Update)
        for i, (name, wl, ci) in enumerate(config.bands, start=1):
            band = ds.GetRasterBand(i)
            band.SetDescription(name)
            band.SetMetadata(
                {
                    "BAND_NAME": name.lower(),
                    "COMMON_NAME": name.lower(),
                    "WAVELENGTH": str(wl),
                    "UNITS": "nm",
                }
            )
            band.SetColorInterpretation(ci)
        ds.Close()

    return tmp_file


def create_cog(input: Path, config: CogConfig):
    assert_is_tif_file(input)
    assert_info(input, config)
    tmp_path = create_tmp_tif(input, config)

    # Create COG using GDAL
    output = input.with_suffix(".cog.tif")
    gdal.alg.raster.convert(
        input=input,
        output=output,
        output_format="COG",        # COG Format
        creation_option={
            "COMPRESS": "ZSTD",     # Lossless compression
            "LEVEL": 9,             # Default amount of compression
            "PREDICTOR": "YES",     # Uses appropriate level for data type
            "BIGTIFF": "IF_SAFER",  # Might need to explicitly use YES if heuristic fails
        },
        overwrite=True,             # Allow overwriting of files
    )

    # Delete temp file
    gdal.alg.dataset.delete(filename=tmp_path)

## Running the conversion

This is a crude script for generating a corresponding for all GeoTiff's within a directory. It won't recreate COGs if they already exist. A corresponding COG is one with the same file name but ending in '.cog.tif' instead of '.tif'.

In [32]:
force = True
limit = None
input_dir = Path("/Volumes/Sammy/terra-luma/stac-input")

## RGB COGs

In [33]:
# RGB
RGB_COG_CONFIG = CogConfig(
    num_bands=3, 
    data_type="Byte", 
    nodata=255
)

# Get Tif Files
tif_files = list(input_dir.rglob("*rgb.tif", case_sensitive=False))
tif_files = [f for f in tif_files if not f.name.endswith('.cog.tif')]

tif_files_with_cogs = [f for f in tif_files if f.with_suffix('.cog.tif').exists()]
tif_files_without_cogs = [f for f in tif_files if force or not f.with_suffix('.cog.tif').exists()]

tif_files_without_cogs = tif_files_without_cogs[0:limit]

print(f"Found {len(tif_files_with_cogs)} GeoTiff files with corresponding COGs.")
print(f"Found {len(tif_files_without_cogs)} GeoTiff files without corresponding COGs.")

total = len(tif_files_without_cogs)

for i, f in enumerate(tif_files_without_cogs):
    print(f"Processing {f.name} ({i + 1} / {total}) ...")
    try:
        create_cog(f, RGB_COG_CONFIG)
    except Exception as e:
        print(f"Error processing: {f.name}")
        print(e)

Found 17 GeoTiff files with corresponding COGs.
Found 17 GeoTiff files without corresponding COGs.
Processing 20241207_SANSSTP002_m3m_50mAGL_ortho_rgb.tif (1 / 17) ...
Processing 20241208_SANSSTP005_m3m_60mAGL_ortho_rgb.tif (2 / 17) ...
Processing 20241210_SANSSTP020_m3m_50mAGL_ortho_rgb.tif (3 / 17) ...
Processing 20241209_SANSSTP010_m3m_50mAGL_ortho_rgb.tif (4 / 17) ...
Processing 20240812_SANSSTP009_m3m_70mAGL_ortho_rgb.tif (5 / 17) ...
Processing 20241208_SANSSTP006_m3m_110mAGL_ortho_rgb.tif (6 / 17) ...
Processing 20241210_SANSSTP014_m3m_50mAGL_ortho_rgb.tif (7 / 17) ...
Processing 20241001_SAAGAW0004_m3m_50mAGL_ortho_RGB.tif (8 / 17) ...
Processing 20241002_SAAASTP0033_m3m_50mAGL_ortho_RGB.tif (9 / 17) ...
Processing 20241002_SAASTP0033_m3m_100mAGL_ortho_RGB.tif (10 / 17) ...
Processing 20241001_SAAGAW0009_m3m_50mAGL_ortho_RGB.tif (11 / 17) ...
Processing 20241002_SAAGAW0007_m3m_50mAGL_ortho_RGB.tif (12 / 17) ...
Processing 20241001_SAAGAW0008_m3m_100mAGL_ortho_RGB.tif (13 / 17) 

## MS COGs

In [34]:
MS_COG_CONFIG = CogConfig(
    num_bands=4, 
    data_type="Float32", 
    nodata=-32767.0,
    bands=[
        ("Green (G)", 560, gdal.GCI_GreenBand),
        ("Red (R)", 650, gdal.GCI_RedBand),
        ("Red Edge (RE)",  730, gdal.GCI_RedEdgeBand),
        ("Near infrared (NIR)", 860, gdal.GCI_NIRBand),
    ]                      
)

# Get Tif Files
tif_files = list(input_dir.rglob("*ms.tif", case_sensitive=False))
tif_files = [f for f in tif_files if not f.name.endswith('.cog.tif')]

tif_files_with_cogs = [f for f in tif_files if f.with_suffix('.cog.tif').exists()]
tif_files_without_cogs = [f for f in tif_files if force or not f.with_suffix('.cog.tif').exists()]

tif_files_without_cogs = tif_files_without_cogs[0:limit]

print(f"Found {len(tif_files_with_cogs)} GeoTiff files with corresponding COGs.")
print(f"Found {len(tif_files_without_cogs)} GeoTiff files without corresponding COGs.")

total = len(tif_files_without_cogs)

for i, f in enumerate(tif_files_without_cogs):
    print(f"Processing {f.name} ({i + 1} / {total}) ...")
    try:
        create_cog(f, MS_COG_CONFIG)
    except Exception as e:
        print(f"Error processing: {f.name}")
        print(e)

Found 17 GeoTiff files with corresponding COGs.
Found 17 GeoTiff files without corresponding COGs.
Processing 20241207_SANSSTP002_m3m_50mAGL_ortho_ms.tif (1 / 17) ...
Processing 20241208_SANSSTP005_m3m_60mAGL_ortho_ms.tif (2 / 17) ...
Processing 20241210_SANSSTP020_m3m_50mAGL_ortho_ms.tif (3 / 17) ...
Processing 20241209_SANSSTP010_m3m_50mAGL_ortho_ms.tif (4 / 17) ...
Processing 20240812_SANSSTP009_m3m_70mAGL_ortho_ms.tif (5 / 17) ...
Processing 20241208_SANSSTP006_m3m_110mAGL_ortho_ms.tif (6 / 17) ...
Processing 20241210_SANSSTP014_m3m_50mAGL_ortho_ms.tif (7 / 17) ...
Processing 20241001_SAAGAW0004_m3m_50mAGL_ortho_MS.tif (8 / 17) ...
Processing 20241002_SAASTP0033_m3m_100mAGL_ortho_MS.tif (9 / 17) ...
Processing 20241002_SAAASTP0033_m3m_50mAGL_ortho_MS.tif (10 / 17) ...
Processing 20241001_SAAGAW0009_m3m_50mAGL_ortho_MS.tif (11 / 17) ...
Processing 20241002_SAAGAW0007_m3m_50mAGL_ortho_MS.tif (12 / 17) ...
Processing 20241001_SAAGAW0008_m3m_100mAGL_ortho_MS.tif (13 / 17) ...
Processin

## Cleanup

In [30]:
# Clean up any old tmp files
tmp_files = list(input_dir.rglob('*.tmp.tif'))
for tmp_file in tmp_files:
    gdal.alg.dataset.delete(filename=tmp_file)
    print(f"Deleted {tmp_file.name}")