In [None]:
#!pip install GDAL

In [None]:
from osgeo import gdal
from osgeo import gdalconst
import glob
import numpy as np
import os

basin_name = 'SA_84479'

Making the segmentation masks from the binary tiles

In [None]:
new_size = 64 / 0.7

#First iteration is noChannel, then it is channel

# set the folder path where the TIFF files are located
folder_paths = ['No_Channel/', 'Channel/']

output_folder_name = "newMaskedTiles64/"
output_folder_name_resized = "newMaskedTilesResized64/"
filename_endings = ["maskedTileNoChannel.tif", "maskedTileChannel.tif"]

output_folder_name_resized_mask = output_folder_name_resized

# Create the output folder if it doesn't exist
if not os.path.exists(output_folder_name):
        os.makedirs(output_folder_name)
if not os.path.exists(output_folder_name_resized):
        os.makedirs(output_folder_name_resized)


for i in range(len(folder_paths)):
    folder_path = folder_paths[i]
    # create a list of file paths for all TIFF files in the folder
    tiff_files = glob.glob(folder_path + '*.tif')

    # loop through each TIFF file and read its raster data
    for count, input_file in enumerate(tiff_files):
        input_ds = gdal.Open(input_file, gdal.GA_ReadOnly)


        # get the number of rows and columns in the input raster
        cols = input_ds.RasterXSize
        rows = input_ds.RasterYSize

        # create an output raster with one band that is all zeros
        driver = gdal.GetDriverByName("GTiff")
        filename = output_folder_name + str(count) + filename_endings[i] #Need to create the folder beforehand
        output_file = filename
        output_ds = driver.Create(output_file, cols, rows, 1, gdal.GDT_Float32)
        output_band = output_ds.GetRasterBand(1)
        output_array = np.zeros((rows, cols), dtype=np.float32)
        if i == 1:
                output_array = np.ones((rows, cols), dtype=np.float32)

        # write the output array to the output raster band
        output_band.WriteArray(output_array)

        # set the geotransform and projection for the output raster
        output_ds.SetGeoTransform(input_ds.GetGeoTransform())
        output_ds.SetProjection(input_ds.GetProjection())



        # close the raster datasets
        input_ds = None
        output_ds = None


        xres = cols / new_size
        yres = rows / new_size
        output_filename = output_folder_name_resized + str(count) + filename_endings[i]
        gdal.Warp(output_filename, filename, xRes=xres, yRes=yres, resampleAlg=gdal.GRA_Max)

Making the resized "Normal" tiles (i.e. saving the optical imagery tiles)

In [None]:
new_size = new_size

output_folder_name_normal = "newNormalTilesResized64/"

if not os.path.exists(output_folder_name_normal):
        os.makedirs(output_folder_name_normal)

for i in range(len(folder_paths)):
        folder_path = folder_paths[i]
        # create a list of file paths for all TIFF files in the folder
        tiff_files = glob.glob(folder_path + '*.tif')
        # loop through each TIFF file and read its raster data
        for count, input_file in enumerate(tiff_files):
                input_ds = gdal.Open(input_file, gdal.GA_ReadOnly)

                # get the number of rows and columns in the input raster
                cols = input_ds.RasterXSize
                rows = input_ds.RasterYSize
                # close the raster datasets
                input_ds = None

                xres = cols / new_size
                yres = rows / new_size
                output_filename = output_folder_name_normal + str(count) + ".tif"
                gdal.Warp(output_filename, input_file, xRes=xres, yRes=yres)

Making the mosaic of the original images and the masks

In [None]:
# Set the path to the input rasters
input_folders = [output_folder_name_resized_mask, output_folder_name_normal]
output_rasters = ['newMosaicMasked64.tif', 'newMosaicNormal64.tif']
for i in range(2):
    input_folder = input_folders[i]
    output_raster = output_rasters[i]

    # List all the raster files in the input folder
    raster_files = [os.path.join(input_folder, f) for f in os.listdir(input_folder) if f.endswith('.tif')]

    # Build a virtual raster from the input rasters
    vrt_options = gdal.BuildVRTOptions(resolution='average', addAlpha=False)
    vrt_ds = gdal.BuildVRT('temp.vrt', raster_files, options=vrt_options)

    # Set the output raster properties based on the virtual raster
    x_min, x_res, x_skew, y_min, y_skew, y_res = vrt_ds.GetGeoTransform()
    x_size = vrt_ds.RasterXSize
    y_size = vrt_ds.RasterYSize
    n_bands = vrt_ds.RasterCount
    band_type = vrt_ds.GetRasterBand(1).DataType

    # Create the output raster and write the virtual raster data to it
    driver = gdal.GetDriverByName('GTiff')
    output_ds = driver.Create(output_raster, x_size, y_size, n_bands, band_type)
    output_ds.SetGeoTransform((x_min, x_res, x_skew, y_min, y_skew, y_res))
    for i in range(1, n_bands+1):
        band = vrt_ds.GetRasterBand(i)
        output_band = output_ds.GetRasterBand(i)
        output_band.WriteArray(band.ReadAsArray())

    # Set the output raster projection
    output_ds.SetProjection(vrt_ds.GetProjection())

    # Clean up
    output_ds = None
    vrt_ds = None
    os.remove('temp.vrt')

Splitting the new mosaic into the "megatiles"

In [None]:
# Source: https://gis.stackexchange.com/questions/221671/splitting-tif-image-into-several-tiles

in_path = ''
input_filenames = output_rasters

out_paths = ['BasinwiseSplit/' + basin_name + '/Masks64512/', 'BasinwiseSplit/' + basin_name + '/GroundTruth64512/']
output_filename = basin_name + '_tile_'


for i in range(2):
    input_filename = input_filenames[i]
    out_path = out_paths[i]

    if not os.path.exists(out_path):
        os.makedirs(out_path)

    tile_size_x = 512
    tile_size_y = 512

    ds = gdal.Open(in_path + input_filename)
    band = ds.GetRasterBand(1)
    xsize = band.XSize
    ysize = band.YSize

    for i in range(0, xsize, tile_size_x):
        for j in range(0, ysize, tile_size_y):
            com_string = "gdal_translate -of GTIFF -srcwin " + str(i)+ ", " + str(j) + ", " + str(tile_size_x) + ", " + str(tile_size_y) + " " + str(in_path) + str(input_filename) + " " + str(out_path) + str(output_filename) + str(i) + "_" + str(j) + ".tif"
            os.system(com_string)

Converting the tiffs into pngs so that they can be passed to the ResNet

In [None]:
def convert_tiff_to_png(tiff_folder, png_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(png_folder):
        os.makedirs(png_folder)

    # Get a list of TIFF files in the input folder using glob
    tiff_files = glob.glob(os.path.join(tiff_folder, '*.tif')) + glob.glob(os.path.join(tiff_folder, '*.tiff'))
    #print(tiff_files)

    # Iterate over each TIFF file and convert it to PNG
    for tiff_file in tiff_files:
        tiff_path = tiff_file
        png_file = os.path.splitext(os.path.basename(tiff_file))[0] + '.png'
        png_path = os.path.join(png_folder, png_file)

        # Open the TIFF file
        tiff_ds = gdal.Open(tiff_path)
        if tiff_ds is None:
            print(f"Error opening {tiff_file}. Skipping...")
            continue

        # Convert the TIFF to PNG
        gdal.Translate(png_path, tiff_ds, format='PNG')

        # Close the TIFF dataset
        tiff_ds = None

        #print(f"Converted {tiff_file} to {png_file}")

    print("Conversion completed!")

datasetDirectory = basin_name + "_64512/"

tiff_folder = 'GroundTruth64512'
normal_png_folder = datasetDirectory + "Images"

convert_tiff_to_png(tiff_folder, normal_png_folder)

tiff_folder = 'Masks64512'
mask_png_folder = datasetDirectory + "Masks"

convert_tiff_to_png(tiff_folder, mask_png_folder)


Conversion completed!
Conversion completed!


Cleaning any empty images (i.e. any that were fully transparent)

In [None]:
from PIL import Image

def is_png_fully_transparent(png_path):
    image = Image.open(png_path)
    image = image.convert("RGBA")

    # Get the alpha channel data
    alpha_data = image.split()[3]

    # Check if all alpha values are zero (transparent)
    if not any(alpha_data.getdata()):
        return True

    return False

normal_png_files = glob.glob(os.path.join(normal_png_folder + "/", '*.png'))

for png in normal_png_files:

    if is_png_fully_transparent(png):
        filename = png.split("\\")[-1]
        os.remove(png)
        os.remove(mask_png_folder +"/" + filename)

directories = [normal_png_folder+"/",mask_png_folder+"/"]  # Replace with the path to your directory

for directory in directories:
    # Iterate over all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith('.xml'):
            file_path = os.path.join(directory, filename)  # Get the full path of the file
            os.remove(file_path)  # Delete the file