# Raster pre-processing tools

In [1]:
# Import libraries
import glob
import os
import subprocess
from xml.dom import minidom

import rasterio
from rasterio import uint16
import numpy as np
from osgeo import gdal

### Function to validate Cloud Optimized Geotiff

In [3]:
def validateCOG(file):
    # Validation script must be in same directory
    command = "python validate_cloud_optimized_geotiff.py " + file
    process = subprocess.run(command, stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT, universal_newlines=True)
    output = process.stdout
    print(output)

### Functions to convert geotiff/s to COG

In [4]:
def tiledRasterPreprocess(files_list, output_filename, output_dir):
    """Create COG from single or tiled geotiffs using GDAL < v 3.1
    https://gist.github.com/palmerj/ac1e19eb81c986d9634e3a3de7cdfc3d

    Parameters
    ----------
    files : str or list
        file path or list of file paths for input geotiff/s

    Returns
    -------
    outputFiles : str
        file path for output COG
    """
    # Create a virtual mosaic from the input tiles
    output_mosaic = output_dir + "\\" + output_filename + "_mosaic.vrt"
    vrt_options = gdal.BuildVRTOptions(resampleAlg='cubic')
    vrt = gdal.BuildVRT(output_mosaic, files_list, options=vrt_options)
    vrt = None

    # Create a bigtiff from the virtual mosaic
    # Translate(destName, srcDS, **kwargs)
    output_bigtiff = output_dir + "\\" + output_filename + "_bigtiff.tif"
    bigtiff_options = gdal.TranslateOptions(format='GTiff',
                                            creationOptions=['BIGTIFF=YES',
                                                             'TILED=YES',
                                                             'COMPRESS=DEFLATE',
                                                             'PREDICTOR=2',
                                                             'NUM_THREADS=ALL_CPUS'])
    bigtiff = gdal.Translate(
        output_bigtiff, output_mosaic, options=bigtiff_options)
    bigtiff = None

    # Create overviews
    image = gdal.Open(output_bigtiff)
    gdal.SetConfigOption("COMPRESS_OVERVIEW", "DEFLATE")
    image.BuildOverviews("AVERAGE", [2, 4, 8, 16, 32, 64, 128, 256, 512])

    # Create cloud optimized geotiff
    output_cog = output_dir + "\\" + output_filename + "_cog.tif"
    cog_options = gdal.TranslateOptions(format='GTiff',
                                        creationOptions=['BIGTIFF=YES',
                                                         'TILED=YES',
                                                         'BLOCKXSIZE=256',
                                                         'BLOCKYSIZE=256',
                                                         'COMPRESS=LZW',
                                                         'COPY_SRC_OVERVIEWS=YES'])
    cog = gdal.Translate(output_cog, output_bigtiff, options=cog_options)
    cog = None

    # Validate the COG geotiff
    command = "python validate_cloud_optimized_geotiff.py " + output_cog
    process = subprocess.run(command, stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT, universal_newlines=True)
    output = process.stdout
    print(output)

In [6]:
def tiledRasterPreprocessV2(files_list, output_filename, output_dir, separate_bands='FALSE', validate_cog='FALSE'):
    """Create COG from single or tiled geotiffs using GDAL v 3.1 +

    Parameters
    ----------
    files : str or list
        file path or list of file paths for input geotiff/s

    Returns
    -------
    outputFiles : str
        file path for output COG
    """
    # Create a virtual mosaic from the input tiles
    output_mosaic = output_dir + "\\" + output_filename + "_mosaic.vrt"
    vrt_options = gdal.BuildVRTOptions(resampleAlg='cubic')
    vrt = gdal.BuildVRT(output_mosaic, files_list, options=vrt_options)
    vrt = None

    # Query the number of bands
    src_ds = gdal.Open(output_mosaic)

    # Check to separate bands
    if separate_bands == 'TRUE' and src_ds.RasterCount > 1:

        # Loop over bands
        for x in range(src_ds.RasterCount):
            band = x + 1
            # output_cog = output_dir + "\\" + \
            #    output_filename + "_B" + str(band) + "_cog.tif"

            # Just save the file as the Band number for SentinelHub COG ingest
            output_cog = output_dir + "\\" + "B" + str(band) + ".tif"
            command = "-of COG -b " + \
                str(band) + " -co BLOCKSIZE=1024 -co COMPRESS=DEFLATE -co RESAMPLING=AVERAGE \
                -co OVERVIEWS=IGNORE_EXISTING -a_nodata 0"

            cog_options = gdal.TranslateOptions(gdal.ParseCommandLine(command))
            cog = gdal.Translate(
                output_cog, output_mosaic, options=cog_options)
            cog = None

            # Validate the COG
            if validate_cog == 'TRUE':
                validateCOG(output_cog)

    else:
        output_cog = output_dir + "\\" + output_filename + "_cog.tif"
        command = "-of COG -co BLOCKSIZE=1024 -co COMPRESS=DEFLATE -co RESAMPLING=AVERAGE -co OVERVIEWS=IGNORE_EXISTING -co BIGTIFF=YES"
        cog_options = gdal.TranslateOptions(gdal.ParseCommandLine(command))
        cog = gdal.Translate(output_cog, output_mosaic, options=cog_options)
        cog = None

        # Validate the COG
        if validate_cog == 'TRUE':
            validateCOG(output_cog)

### Pre-processing script for KSAT Pleiades data

In [7]:
# Set working directory
input_dir = os.path.join('Z:/',
                      'TS',
                        '2022',
                        '01_NMA_OML49_land_cover',
                        'data',
                        'PL-Nigeria',
                        '202212-PL-Nigeria-Mosaic Tiles') 

os.chdir(input_dir)

In [8]:
# Get the list of input tiles in the current directory
files_list = glob.glob("*.TIF")
# files_list=files_list[0]
files_list

['202212-PL-Nigeria-Mosaic_1_1.tif',
 '202212-PL-Nigeria-Mosaic_1_2.tif',
 '202212-PL-Nigeria-Mosaic_1_3.tif',
 '202212-PL-Nigeria-Mosaic_1_4.tif',
 '202212-PL-Nigeria-Mosaic_2_1.tif',
 '202212-PL-Nigeria-Mosaic_2_2.tif',
 '202212-PL-Nigeria-Mosaic_2_3.tif',
 '202212-PL-Nigeria-Mosaic_2_4.tif',
 '202212-PL-Nigeria-Mosaic_2_5.tif',
 '202212-PL-Nigeria-Mosaic_3_1.tif',
 '202212-PL-Nigeria-Mosaic_3_2.tif',
 '202212-PL-Nigeria-Mosaic_3_3.tif',
 '202212-PL-Nigeria-Mosaic_3_4.tif',
 '202212-PL-Nigeria-Mosaic_3_5.tif',
 '202212-PL-Nigeria-Mosaic_4_1.tif',
 '202212-PL-Nigeria-Mosaic_4_2.tif',
 '202212-PL-Nigeria-Mosaic_4_3.tif']

In [9]:
# Create the output filename for multiband output
output_filename = "202212-PL-Nigeria-Mosaic"

# Output directory
output_dir = os.path.join(input_dir, 'output')

# Check output directory
if os.path.isdir(output_dir) is False:
    os.mkdir(output_dir)
    print("Directory created:" + output_dir)

# Run the pre-processing function
tiledRasterPreprocessV2(files_list=files_list,
                        output_filename=output_filename,
                        output_dir=output_dir,
                        separate_bands='FALSE',
                        validate_cog='TRUE')

Directory created:Z:/TS\2022\01_NMA_OML49_land_cover\data\PL-Nigeria\202212-PL-Nigeria-Mosaic Tiles\output
python: can't open file 'validate_cloud_optimized_geotiff.py': [Errno 2] No such file or directory



In [18]:
output_dir

'6070485101'

### Pre-processing script for McKinley Planet Skysat data

In [None]:
# Script for tiled Skysat data
# Pattern for a subdirectories

input_directory = os.path.join(
    "I:\\", "CVX_AMER_IMG", "ETC", "ENVTEC", "satellite", "cemrec_mc_kinley", "2020", "skysat", "source_data")

# Get the list of input tiles
files_list = glob.glob(os.path.join(
    input_directory, "*_analytic_cliptoAOI.tif"))

# Check if there are any files conforming to wild card in the directory
if not files_list:
    print('test')
else:
    # Run the pre-processing function
    tiledRasterPreprocessV2(files_list=files_list,
                            output_filename="McKinley_Skysat",
                            output_dir=input_directory,
                            separate_bands='FALSE',
                            validate_cog='FALSE')

In [5]:
# Script for mosaicked Skysat data
input_directory = os.path.join(
    "D:\\", "McKinley", "skysat")

# Get the list of input tiles
files_list = glob.glob(os.path.join(
    input_directory, "*.tif"))

# Check if there are any files conforming to wild card in the directory
if not files_list:
    print('No files')
else:
    # Run the pre-processing function
    tiledRasterPreprocessV2(files_list=files_list,
                            output_filename="skysat_mosaic_50cm_cog",
                            output_dir=input_directory,
                            separate_bands='FALSE',
                            validate_cog='FALSE')

In [8]:
file = os.path.join(
    "D:\\", "McKinley", "skysat", "skysat_mosaic_50cm_cog_cog.tif")

validateCOG(file)

D:\McKinley\skysat\skysat_mosaic_50cm_cog_cog.tif is a valid cloud optimized GeoTIFF

The size of all IFD headers is 12812 bytes



### Pre-processing script for McKinley orthos

In [None]:
# Pattern for a subdirectories
input_directory = os.path.join(
    "I:\\", "CVX_AMER_IMG", "ETC", "ENVTEC", "aerial", "cemrec_mckinley", "2020", "01_4band_ortho_tiles_2")    
    
# Get top-level subdirectories
dirs = glob.glob(input_directory)

# Loop through each directory and get the tiffs in the files directories
for dir in dirs:
    
    print(dir)
    
    # Get the list of input tiles
    #files_list = glob.glob(os.path.join(dir, "files", "*_AnalyticMS_clip.tif"))
    files_list = glob.glob(os.path.join(dir, "*.tif"))
    
    # Check if there are any files conforming to wild card in the directory
    if not files_list:
        continue
    else:
        # Run the pre-processing function
        tiledRasterPreprocessV2(files_list=files_list,
                                output_filename="McKinley_NM_Ortho",
                                output_dir=dir,
                                separate_bands='FALSE',
                                validate_cog='FALSE')