# GeoTiff to COG

This script demonstrates creating a COG from an input GeoTiff using the new ["gdal" application](https://gdal.org/en/stable/programs/index.html#gdal-application) via Python.

While most of it is generic, it does make some assumptions about the original GeoTiffs (i.e. some of this is hardcoded to NatureScan).

Todo
- [ ] Split up RGB and MS
- [ ] Make sure metadata for MS bands is added properly

In [13]:
from pathlib import Path
from osgeo import gdal

def create_cog(input: Path):
    # Assert is a .tif file
    assert(input.is_file())
    assert(input.suffix == '.tif')

    # Create tmp and output file paths
    tmp_file = input.with_suffix('.tmp.tif')
    output = input.with_suffix('.cog.tif')

    # Read input file info
    input_info = gdal.alg.raster.info(input=input).Output()

    # Assert is a GeoTiff file
    assert(input_info['driverShortName'] == 'GTiff')

    # Assert is 3 or 4 bands
    num_bands = len(input_info['bands'])
    assert(num_bands == 3 or num_bands == 4)

    # Assert byte type for 3 band RGB and float32 for 4 band MS
    if num_bands == 3:
        assert(input_info['bands'][0]['type'] == 'Byte')
    elif num_bands == 4:
        assert(input_info['bands'][0]['type'] == 'Float32')

    # Default nodata 255 for 3 band RGB and -32767.0 for 4 band MS
    if num_bands == 3:
        nodata = 255
    elif num_bands == 4:
        nodata = -32767.0

    # Check if there already is a nodata value and use that instead
    if 'noDataValue' in input_info['bands'][0]:
        nodata = input_info['bands'][0]['noDataValue']

    # Copy src to a tmp file not to change anything
    # In the future, we may be copying from a remote src
    gdal.alg.dataset.copy(source=input, destination=tmp_file, overwrite=True)

    # Edit temp file to:
    #   - Add nodata values
    #   - Compute statistics
    gdal.alg.raster.edit(dataset=tmp_file, nodata=nodata, stats="YES")

    # Convert to COG with
    #   - ZSTD compression (lossless)
    #   - Predictor = YES (uses appropriate level for data type)
    #   - Level = 9 (amount of compression, default for ZSTD)
    gdal.alg.raster.convert(
        input=tmp_file,
        output=output,
        output_format="COG",
        creation_option={
            "COMPRESS": "ZSTD", 
            "LEVEL": 9, 
            "PREDICTOR": "YES",
            "BIGTIFF": "IF_SAFER"   # Might need to explicitly use YES if heuristic fails
        },
        overwrite=True
    )

    # Delete temp file
    gdal.alg.dataset.delete(filename=tmp_file)

## Running the conversion

This is a crude script for generating a corresponding for all GeoTiff's within a directory. It won't recreate COGs if they already exist. A corresponding COG is one with the same file name but ending in '.cog.tif' instead of '.tif'.

In [14]:
input_dir = Path("/Volumes/Sammy/terra-luma/stac-input")

# Clean up any old tmp files
tmp_files = list(input_dir.rglob('*.tmp.tif'))
for tmp_file in tmp_files:
    gdal.alg.dataset.delete(filename=tmp_file)
    print(f"Deleted {tmp_file.name}")

# Get Tif Files
tif_files = list(input_dir.rglob('*.tif'))
tif_files = [f for f in tif_files if not f.name.endswith('.cog.tif')]

tif_files_with_cogs = [f for f in tif_files if f.with_suffix('.cog.tif').exists()]
tif_files_without_cogs = [f for f in tif_files if not f.with_suffix('.cog.tif').exists()]

print(f"Found {len(tif_files_with_cogs)} GeoTiff files with corresponding COGs.")
print(f"Found {len(tif_files_without_cogs)} GeoTiff files without corresponding COGs.")

total = len(tif_files_without_cogs)

for i, f in enumerate(tif_files_without_cogs):
    print(f"Processing {f.name} ({i + 1} / {total}) ...")
    try:
        create_cog(f)
    except Exception as e:
        print(f"Error processing: {f.name}")
        print(e)

Found 34 GeoTiff files with corresponding COGs.
Found 0 GeoTiff files without corresponding COGs.
