# 🧩 GeoTIFF Mosaic Builder (merge tiles → one big GeoTIFF)

This notebook **merges multiple GeoTIFF tiles** (same CRS / resolution / grid alignment) into **one large GeoTIFF**.
It includes safety checks, tiled+compressed output, optional overviews, and optional clipping.

> Set the config in the next cell, then run all cells top‑to‑bottom.


In [None]:
# If needed, install dependencies (uncomment as appropriate)
# %pip install rasterio pandas numpy tqdm


In [1]:
from pathlib import Path

# ========= CONFIG =========
INPUT_DIR = Path("/Users/jayceebao/Desktop/tifs")     # directory containing .tif tiles
GLOB_PAT  = "*.tif"                         # change if needed
OUTPUT_TIF = Path("/Users/jayceebao/Desktop/merged.tif")           # output file path

# Mosaic options
MERGE_METHOD = "first"   # 'first' | 'last' | 'min' | 'max' | 'sum' | 'mean'
NODATA = None            # set to a value if your tiles use a specific nodata, e.g., 0 or 255
DTYPE = None             # force dtype (e.g., 'uint8', 'uint16', 'float32'); None = keep from first tile
ADD_OVERVIEWS = True     # build internal overviews for faster display
OVERVIEW_LEVELS = [2, 4, 8, 16]

# Optional: Clip to a bounding box (in the same CRS as your tiles). Set to None to skip.
# Format: (minx, miny, maxx, maxy)
CLIP_BBOX = None
# CLIP_BBOX = (780000.0, 6695000.0, 790000.0, 6715000.0)
# ========= END CONFIG =========

print("Input directory:", INPUT_DIR.resolve())
print("Output file:", OUTPUT_TIF.resolve())


Input directory: /Users/jayceebao/Desktop/tifs
Output file: /Users/jayceebao/Desktop/merged.tif


In [2]:
import pandas as pd
import numpy as np
import rasterio
from rasterio.errors import RasterioIOError
from rasterio.coords import BoundingBox
from tqdm import tqdm

def transform_key(transform):
    a = transform.a
    e = transform.e
    b = transform.b
    d = transform.d
    xoff = transform.c
    yoff = transform.f
    def mod_residue(offset, step):
        if step == 0:
            return 0.0
        r = offset % abs(step)
        return float(np.round(r, 9))
    return (
        float(np.round(mod_residue(xoff, a), 9)),
        float(np.round(mod_residue(yoff, e), 9)),
        float(np.round(a, 9)),
        float(np.round(e, 9)),
        float(np.round(b, 9)),
        float(np.round(d, 9)),
    )

tif_paths = sorted(INPUT_DIR.rglob(GLOB_PAT))
print(f"Found {len(tif_paths)} GeoTIFFs")

rows = []
for p in tqdm(tif_paths):
    try:
        with rasterio.open(p) as src:
            crs_obj = src.crs
            crs_string = crs_obj.to_string() if crs_obj else None
            res_x, res_y = src.res
            tf = src.transform
            tf_key = transform_key(tf)
            bounds: BoundingBox = src.bounds
            count = src.count
            dtype = src.dtypes[0] if count > 0 else None
            rows.append({
                "path": str(p),
                "name": p.name,
                "crs_string": crs_string,
                "res_x": float(np.round(res_x, 9)),
                "res_y": float(np.round(res_y, 9)),
                "align_key": tf_key,
                "transform": tuple(map(float, (tf.a, tf.b, tf.c, tf.d, tf.e, tf.f))),
                "bounds": (float(bounds.left), float(bounds.bottom), float(bounds.right), float(bounds.top)),
                "bands": count,
                "dtype": dtype,
            })
    except RasterioIOError as e:
        rows.append({"path": str(p), "name": p.name, "error": str(e)})

df = pd.DataFrame(rows)
display(df.head())
print("Unique CRS:", df["crs_string"].dropna().unique())
print("Unique resolutions:", df[["res_x","res_y"]].dropna().drop_duplicates().values.tolist())

# Basic assertions — feel free to relax if needed
assert df["crs_string"].dropna().nunique() <= 1, "Multiple CRS detected — reproject before merge."
assert df[["res_x","res_y"]].dropna().drop_duplicates().shape[0] <= 1, "Multiple resolutions detected — resample before merge."
assert df["align_key"].astype(str).nunique() <= 1, "Grid alignment differs — regrid before merge."


Found 818 GeoTIFFs


100%|██████████| 818/818 [01:17<00:00, 10.59it/s]


Unnamed: 0,path,name,crs_string,res_x,res_y,align_key,transform,bounds,bands,dtype
0,/Users/jayceebao/Desktop/tifs/patch_1375803_L_...,patch_1375803_L_2165_19.tif,"PROJCS[""WGS 84 / UTM zone 55S"",GEOGCS[""WGS 84""...",10.0,10.0,"(0.0, 0.0, 10.0, -10.0, 0.0, 0.0)","(10.0, 0.0, 782100.0, 0.0, -10.0, 6711220.0)","(782100.0, 6706100.0, 787220.0, 6711220.0)",3,uint8
1,/Users/jayceebao/Desktop/tifs/patch_1375804_S_...,patch_1375804_S_2412_377.tif,"PROJCS[""WGS 84 / UTM zone 55S"",GEOGCS[""WGS 84""...",10.0,10.0,"(0.0, 0.0, 10.0, -10.0, 0.0, 0.0)","(10.0, 0.0, 784570.0, 0.0, -10.0, 6707640.0)","(784570.0, 6705080.0, 787130.0, 6707640.0)",3,uint8
2,/Users/jayceebao/Desktop/tifs/patch_1375805_S_...,patch_1375805_S_2262_347.tif,"PROJCS[""WGS 84 / UTM zone 55S"",GEOGCS[""WGS 84""...",10.0,10.0,"(0.0, 0.0, 10.0, -10.0, 0.0, 0.0)","(10.0, 0.0, 783070.0, 0.0, -10.0, 6707940.0)","(783070.0, 6705380.0, 785630.0, 6707940.0)",3,uint8
3,/Users/jayceebao/Desktop/tifs/patch_1375806_L_...,patch_1375806_L_2469_207.tif,"PROJCS[""WGS 84 / UTM zone 55S"",GEOGCS[""WGS 84""...",10.0,10.0,"(0.0, 0.0, 10.0, -10.0, 0.0, 0.0)","(10.0, 0.0, 785140.0, 0.0, -10.0, 6709340.0)","(785140.0, 6704220.0, 790260.0, 6709340.0)",3,uint8
4,/Users/jayceebao/Desktop/tifs/patch_1375807_S_...,patch_1375807_S_2392_0.tif,"PROJCS[""WGS 84 / UTM zone 55S"",GEOGCS[""WGS 84""...",10.0,10.0,"(0.0, 0.0, 10.0, -10.0, 0.0, 0.0)","(10.0, 0.0, 784370.0, 0.0, -10.0, 6711410.0)","(784370.0, 6708850.0, 786930.0, 6711410.0)",3,uint8


Unique CRS: ['PROJCS["WGS 84 / UTM zone 55S",GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",147],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",10000000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]']
Unique resolutions: [[10.0, 10.0]]


In [3]:
import math

def union_bounds(bounds_list):
    minx = min(b[0] for b in bounds_list)
    miny = min(b[1] for b in bounds_list)
    maxx = max(b[2] for b in bounds_list)
    maxy = max(b[3] for b in bounds_list)
    return (minx, miny, maxx, maxy)

all_bounds = [b for b in df["bounds"] if isinstance(b, (list, tuple))]
mosaic_bounds = union_bounds(all_bounds) if all_bounds else None

print("Tiles union bounds:", mosaic_bounds)
if CLIP_BBOX is not None:
    print("Clipping bbox:", CLIP_BBOX)


Tiles union bounds: (760450.0, 6677420.0, 790280.0, 6711410.0)


In [5]:
from rasterio.merge import merge

# Open datasets
srcs = [rasterio.open(p) for p in tif_paths]
profile = srcs[0].profile.copy()

# Determine nodata & dtype
if NODATA is None:
    nodata_val = srcs[0].nodata
else:
    nodata_val = NODATA

if DTYPE is None:
    out_dtype = profile.get("dtype", srcs[0].dtypes[0])
else:
    out_dtype = DTYPE

# Clip bounds (optional)
bounds_to_use = CLIP_BBOX if CLIP_BBOX is not None else None

print("Merging... This can take a while for many tiles.")
# ✅ 关键修正：不要用 datasets=，而是把列表作为第一个位置参数传入
if bounds_to_use is None:
    mosaic, out_transform = merge(
        srcs,
        method=MERGE_METHOD,
        nodata=nodata_val,
    )
else:
    mosaic, out_transform = merge(
        srcs,
        bounds=bounds_to_use,
        method=MERGE_METHOD,
        nodata=nodata_val,
    )

# Update profile for output
profile.update({
    "driver": "GTiff",
    "height": mosaic.shape[1],
    "width": mosaic.shape[2],
    "transform": out_transform,
    "dtype": out_dtype,
    "count": mosaic.shape[0],
    "compress": "lzw",
    "tiled": True,
    "blockxsize": 512,
    "blockysize": 512,
    "BIGTIFF": "IF_SAFER",
})

print("Writing:", OUTPUT_TIF)
with rasterio.open(OUTPUT_TIF, "w", **profile) as dst:
    dst.write(mosaic)
    if nodata_val is not None:
        dst.nodata = nodata_val

for s in srcs:
    s.close()

print("✅ Mosaic written.")

Merging... This can take a while for many tiles.
Writing: /Users/jayceebao/Desktop/merged.tif
✅ Mosaic written.


In [6]:
# Build overviews (optional)
if ADD_OVERVIEWS:
    import rasterio
    print("Building internal overviews...")
    with rasterio.open(OUTPUT_TIF, "r+") as dst:
        dst.build_overviews(OVERVIEW_LEVELS, rasterio.enums.Resampling.nearest)
        dst.update_tags(ns='rio_overview', resampling='nearest')
    print("✅ Overviews built.")
else:
    print("Skip overviews.")


Building internal overviews...
✅ Overviews built.


In [7]:
# Quick info on the final file
import rasterio
with rasterio.open(OUTPUT_TIF) as src:
    print("Final CRS:", src.crs.to_string())
    print("Size (W x H):", src.width, "x", src.height)
    print("Bounds:", src.bounds)
    print("Dtype:", src.dtypes[0])
    print("Bands:", src.count)
    print("Transform:", tuple(map(float, (src.transform.a, src.transform.b, src.transform.c, src.transform.d, src.transform.e, src.transform.f))))


Final CRS: PROJCS["WGS 84 / UTM zone 55S",GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",147],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",10000000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]
Size (W x H): 2983 x 3399
Bounds: BoundingBox(left=760450.0, bottom=6677420.0, right=790280.0, top=6711410.0)
Dtype: uint8
Bands: 3
Transform: (10.0, 0.0, 760450.0, 0.0, -10.0, 6711410.0)
