In [1]:
import rasterio
from rasterio.merge import merge
import numpy as np
import os

def memory_efficient_merge(file_list, output_path, chunk_size=1024):
    """
    Merge multiple GeoTIFF files in a memory-efficient way
    
    Parameters:
    file_list: List of paths to input GeoTIFF files
    output_path: Path where the merged GeoTIFF will be saved
    chunk_size: Size of chunks to process at once
    """
    # Get metadata from the first file
    with rasterio.open(file_list[0]) as src:
        out_meta = src.meta.copy()
        nodata = src.nodata if src.nodata is not None else -32767
    
    # Determine bounds of the final raster
    xs = []
    ys = []
    for file_path in file_list:
        with rasterio.open(file_path) as src:
            left, bottom, right, top = src.bounds
            xs.extend([left, right])
            ys.extend([bottom, top])
    
    # Calculate the final dimensions
    ulx, lry = min(xs), min(ys)
    lrx, uly = max(xs), max(ys)
    out_transform = rasterio.transform.from_bounds(ulx, lry, lrx, uly, 
                                                 int((lrx - ulx) / out_meta['transform'][0]),
                                                 int((uly - lry) / -out_meta['transform'][4]))
    
    out_meta.update({
        "driver": "GTiff",
        "height": int((uly - lry) / -out_meta['transform'][4]),
        "width": int((lrx - ulx) / out_meta['transform'][0]),
        "transform": out_transform,
        "nodata": nodata,
        "tiled": True,
        "blockxsize": 256,
        "blockysize": 256,
        "compress": "deflate"
    })
    
    print(f"Creating output file with dimensions: {out_meta['width']}x{out_meta['height']}")
    
    # Create the output file
    with rasterio.open(output_path, 'w', **out_meta) as dest:
        # Process each file
        for i, file_path in enumerate(file_list):
            print(f"Processing file {i+1} of {len(file_list)}: {file_path}")
            with rasterio.open(file_path) as src:
                # Calculate where this file goes in the output
                src_left, src_bottom, src_right, src_top = src.bounds
                dst_window = rasterio.windows.from_bounds(
                    src_left, src_bottom, src_right, src_top, 
                    out_transform
                )
                
                # Process in chunks
                for y in range(0, src.height, chunk_size):
                    for x in range(0, src.width, chunk_size):
                        # Read the chunk
                        window = rasterio.windows.Window(
                            x, y, 
                            min(chunk_size, src.width - x), 
                            min(chunk_size, src.height - y)
                        )
                        chunk_data = src.read(1, window=window)
                        
                        # Calculate output window
                        chunk_transform = src.window_transform(window)
                        chunk_bounds = rasterio.transform.array_bounds(
                            chunk_data.shape[0], chunk_data.shape[1], 
                            chunk_transform
                        )
                        
                        out_window = rasterio.windows.from_bounds(
                            *chunk_bounds, 
                            out_transform
                        )
                        
                        # Write to output file
                        dest.write(chunk_data, 1, window=out_window)
                        
                    # Print progress
                    if y % (chunk_size * 10) == 0:
                        print(f"  Progress: {y/src.height*100:.1f}%")
    
    print(f"Merged files saved to {output_path}")

# Example usage
if __name__ == "__main__":
    files_to_merge = [
        "US_SRTMGL3_region_1.tif",
        "US_SRTMGL3_region_2.tif",
        "US_SRTMGL3_region_3.tif",
        "US_SRTMGL3_region_4.tif",
        "US_SRTMGL3_region_5.tif"
    ]
    
    # Verify all files exist
    missing_files = [f for f in files_to_merge if not os.path.exists(f)]
    if missing_files:
        print(f"Warning: These files are missing: {missing_files}")
        files_to_merge = [f for f in files_to_merge if os.path.exists(f)]
    
    if files_to_merge:
        # You can adjust the chunk size based on your available memory
        # Smaller chunks use less memory but take longer to process
        memory_efficient_merge(files_to_merge, "US_merged.tif", chunk_size=2048)
    else:
        print("No files to merge. Please check file paths.")

Creating output file with dimensions: 70200x29999
Processing file 1 of 5: US_SRTMGL3_region_1.tif
  Progress: 0.0%
Processing file 2 of 5: US_SRTMGL3_region_2.tif
  Progress: 0.0%
Processing file 3 of 5: US_SRTMGL3_region_3.tif
  Progress: 0.0%
Processing file 4 of 5: US_SRTMGL3_region_4.tif
  Progress: 0.0%
Processing file 5 of 5: US_SRTMGL3_region_5.tif
  Progress: 0.0%
Merged files saved to US_merged.tif


In [2]:
import rasterio
from rasterio.windows import Window
import numpy as np

# Open the DEM file
with rasterio.open("US_merged.tif") as src:
    # Define chunk size
    chunk_size = 5000  # Adjust based on your system's memory

    # Initialize variables to track min and max values
    dem_min = np.inf
    dem_max = -np.inf

    # Loop through the DEM in chunks
    for i in range(0, src.height, chunk_size):
        for j in range(0, src.width, chunk_size):
            # Define the window for the current chunk
            window = Window(j, i, min(chunk_size, src.width - j), min(chunk_size, src.height - i))

            # Read the elevation data for the current chunk
            dem_chunk = src.read(1, window=window).astype(np.int16)

            # Check for NoData values
            nodata = src.nodata
            if nodata is not None:
                dem_chunk = np.where(dem_chunk == nodata, np.nan, dem_chunk)

            # Update min and max values
            dem_min = min(dem_min, np.nanmin(dem_chunk))
            dem_max = max(dem_max, np.nanmax(dem_chunk))

    print("DEM shape:", (src.height, src.width))
    print("DEM min value:", dem_min)
    print("DEM max value:", dem_max)

  dem_min = min(dem_min, np.nanmin(dem_chunk))
  dem_max = max(dem_max, np.nanmax(dem_chunk))


DEM shape: (29999, 70200)
DEM min value: -184.0
DEM max value: 4402.0
