In [None]:
# Install rasterio and geospatial packages
!pip install rasterio numpy matplotlib pyproj fiona geopandas contextily

# Rasterio Tutorial - Geospatial Raster Processing

**Rasterio** is the Python library for reading and writing geospatial raster datasets:
- **I/O**: Read/write GeoTIFF, NetCDF, HDF5, and 100+ formats
- **Reprojection**: Transform between coordinate systems (EPSG codes)
- **Resampling**: Change resolution with various interpolation methods
- **Windowing**: Efficient partial reading of large rasters
- **Metadata**: Access georeferencing, CRS, and raster properties

Perfect for: satellite imagery, DEMs, climate data, remote sensing workflows.

In [None]:
import rasterio
import rasterio.warp
import rasterio.enums
from rasterio.transform import from_bounds
from rasterio.crs import CRS
from rasterio.windows import Window
import numpy as np
import matplotlib.pyplot as plt
import os
from pathlib import Path

print(f"Rasterio version: {rasterio.__version__}")
print(f"GDAL version: {rasterio.__gdal_version__}")

# Create output directory
output_dir = Path('rasterio_outputs')
output_dir.mkdir(exist_ok=True)
print(f"Output directory: {output_dir.absolute()}")

## 🏔️ Creating Synthetic DEM

In [None]:
# Create a synthetic DEM since we don't have real data
# This simulates a realistic mountainous terrain

def create_synthetic_dem(width=1000, height=1000, bounds=(-10, 40, -5, 45)):
    """Create synthetic DEM with realistic topography"""
    np.random.seed(42)
    
    # Create coordinate arrays
    x = np.linspace(bounds[0], bounds[2], width)
    y = np.linspace(bounds[1], bounds[3], height)
    X, Y = np.meshgrid(x, y)
    
    # Base elevation with large-scale features
    elevation = 500 + 2000 * np.sin(X * 0.5) * np.cos(Y * 0.3)
    
    # Add mountain ranges
    elevation += 1500 * np.exp(-((X + 7.5)**2 + (Y - 42.5)**2) / 2)  # Mountain 1
    elevation += 1200 * np.exp(-((X + 6)**2 + (Y - 43.5)**2) / 1.5)   # Mountain 2
    elevation += 800 * np.exp(-((X + 8)**2 + (Y - 41)**2) / 1)        # Hill
    
    # Add valleys
    valley1 = -400 * np.exp(-((X + 6.5)**2 + (Y - 42)**2) / 0.5)
    valley2 = -300 * np.exp(-((X + 7)**2 + (Y - 44)**2) / 0.8)
    elevation += valley1 + valley2
    
    # Add realistic noise
    noise = 50 * np.random.randn(height, width)
    elevation += noise
    
    # Add some water bodies (low elevation areas)
    water_mask1 = (X + 8.5)**2 + (Y - 41.5)**2 < 0.1
    water_mask2 = (X + 6.2)**2 + (Y - 43.8)**2 < 0.05
    elevation[water_mask1] = 150  # Lake 1
    elevation[water_mask2] = 200  # Lake 2
    
    # Ensure no negative elevations
    elevation = np.maximum(elevation, 0)
    
    return elevation.astype(np.float32), bounds

# Create synthetic DEM
dem_data, bounds = create_synthetic_dem(1200, 800, bounds=(-10, 40, -5, 45))
print(f"Created synthetic DEM: {dem_data.shape}")
print(f"Elevation range: {dem_data.min():.1f} to {dem_data.max():.1f} meters")
print(f"Geographic bounds: {bounds} (lon_min, lat_min, lon_max, lat_max)")

# Define the transform (affine transformation from pixel to geographic coordinates)
transform = from_bounds(*bounds, dem_data.shape[1], dem_data.shape[0])
print(f"Pixel resolution: {abs(transform.a):.6f}° x {abs(transform.e):.6f}°")

# Define coordinate reference system (WGS84)
crs = CRS.from_epsg(4326)  # WGS84 Geographic
print(f"CRS: {crs}")

In [None]:
# Save synthetic DEM as GeoTIFF
original_dem_path = output_dir / 'original_dem.tif'

with rasterio.open(
    original_dem_path, 'w',
    driver='GTiff',
    height=dem_data.shape[0],
    width=dem_data.shape[1],
    count=1,
    dtype=dem_data.dtype,
    crs=crs,
    transform=transform,
    compress='lzw',  # Compression
    tiled=True,      # Tiled format for efficiency
    blockxsize=256,
    blockysize=256
) as dst:
    dst.write(dem_data, 1)
    dst.set_band_description(1, 'Elevation')
    dst.update_tags(1, units='meters')
    dst.update_tags(source='Synthetic DEM for tutorial', 
                   created_by='Rasterio Tutorial')

print(f"Original DEM saved: {original_dem_path}")
print(f"File size: {os.path.getsize(original_dem_path) / 1024:.1f} KB")

# Visualize the original DEM
plt.figure(figsize=(12, 8))
plt.imshow(dem_data, cmap='terrain', extent=bounds, aspect='equal')
plt.colorbar(label='Elevation (m)')
plt.title('Original Synthetic DEM (WGS84, 4326)')
plt.xlabel('Longitude (°)')
plt.ylabel('Latitude (°)')
plt.grid(True, alpha=0.3)
plt.show()

## 📖 Loading and Exploring DEM

In [None]:
# Load the DEM and explore its properties
with rasterio.open(original_dem_path) as src:
    print("DEM Properties:")
    print(f"  Driver: {src.driver}")
    print(f"  Dimensions: {src.width} x {src.height} pixels")
    print(f"  Bands: {src.count}")
    print(f"  Data type: {src.dtypes[0]}")
    print(f"  CRS: {src.crs}")
    print(f"  Transform: {src.transform}")
    print(f"  Bounds: {src.bounds}")
    print(f"  NoData: {src.nodata}")
    
    # Read metadata
    print(f"\nMetadata:")
    for key, value in src.tags().items():
        print(f"  {key}: {value}")
    
    print(f"\nBand 1 metadata:")
    for key, value in src.tags(1).items():
        print(f"  {key}: {value}")
    
    # Read the data
    dem_array = src.read(1)
    
    print(f"\nData statistics:")
    print(f"  Min elevation: {dem_array.min():.1f} m")
    print(f"  Max elevation: {dem_array.max():.1f} m")
    print(f"  Mean elevation: {dem_array.mean():.1f} m")
    print(f"  Std deviation: {dem_array.std():.1f} m")
    
    # Calculate pixel area (approximate for geographic coordinates)
    pixel_width = abs(src.transform.a)  # degrees longitude
    pixel_height = abs(src.transform.e) # degrees latitude
    
    # Convert to approximate meters (at mid-latitude)
    mid_lat = (src.bounds.bottom + src.bounds.top) / 2
    meters_per_degree_lat = 111000  # approximately
    meters_per_degree_lon = 111000 * np.cos(np.radians(mid_lat))
    
    pixel_area_m2 = (pixel_width * meters_per_degree_lon) * (pixel_height * meters_per_degree_lat)
    
    print(f"\nSpatial resolution:")
    print(f"  Pixel size: {pixel_width:.6f}° x {pixel_height:.6f}°")
    print(f"  Approximate size: {pixel_width * meters_per_degree_lon:.1f}m x {pixel_height * meters_per_degree_lat:.1f}m")
    print(f"  Pixel area: ~{pixel_area_m2:.0f} m²")

## 🔄 Resampling Resolution

In [None]:
# Resample DEM to lower resolution using different methods
print("Resampling DEM to lower resolution...")

def resample_raster(src_path, dst_path, scale_factor, resampling_method=rasterio.enums.Resampling.bilinear):
    """Resample raster by a scale factor"""
    with rasterio.open(src_path) as src:
        # Calculate new dimensions
        new_width = int(src.width * scale_factor)
        new_height = int(src.height * scale_factor)
        
        # Calculate new transform
        new_transform = src.transform * src.transform.scale(
            (src.width / new_width),
            (src.height / new_height)
        )
        
        # Read and resample data
        data = src.read(
            out_shape=(src.count, new_height, new_width),
            resampling=resampling_method
        )
        
        # Write resampled data
        profile = src.profile.copy()
        profile.update({
            'height': new_height,
            'width': new_width,
            'transform': new_transform
        })
        
        with rasterio.open(dst_path, 'w', **profile) as dst:
            dst.write(data)
            dst.update_tags(resampled_from=str(src_path),
                           scale_factor=scale_factor,
                           resampling_method=resampling_method.name)
    
    return new_width, new_height, new_transform

# Test different resampling methods
resampling_methods = {
    'nearest': rasterio.enums.Resampling.nearest,
    'bilinear': rasterio.enums.Resampling.bilinear,
    'cubic': rasterio.enums.Resampling.cubic,
    'average': rasterio.enums.Resampling.average
}

scale_factor = 0.25  # Reduce to 25% of original resolution
resampled_paths = {}

for method_name, method in resampling_methods.items():
    dst_path = output_dir / f'dem_resampled_{method_name}.tif'
    width, height, transform = resample_raster(original_dem_path, dst_path, scale_factor, method)
    resampled_paths[method_name] = dst_path
    
    file_size = os.path.getsize(dst_path) / 1024
    print(f"{method_name:8s}: {width:4d} x {height:3d} pixels, {file_size:5.1f} KB")

print(f"\nOriginal resolution: {dem_data.shape[1]} x {dem_data.shape[0]}")
print(f"Resampled resolution: {width} x {height} ({scale_factor*100:.0f}% of original)")

In [None]:
# Compare resampling methods visually
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

# Original (subset)
subset_size = 300
center_x, center_y = dem_data.shape[1]//2, dem_data.shape[0]//2
original_subset = dem_data[center_y-subset_size//2:center_y+subset_size//2,
                          center_x-subset_size//2:center_x+subset_size//2]

im0 = axes[0].imshow(original_subset, cmap='terrain')
axes[0].set_title('Original (subset)')
plt.colorbar(im0, ax=axes[0], fraction=0.046, pad=0.04)

# Resampled versions
for i, (method_name, path) in enumerate(resampled_paths.items(), 1):
    with rasterio.open(path) as src:
        resampled_data = src.read(1)
    
    im = axes[i].imshow(resampled_data, cmap='terrain')
    axes[i].set_title(f'Resampled - {method_name.title()}')
    plt.colorbar(im, ax=axes[i], fraction=0.046, pad=0.04)

# Remove empty subplot
fig.delaxes(axes[5])

plt.tight_layout()
plt.show()

print("Resampling method comparison:")
print("  • Nearest: Preserves original values, blocky appearance")
print("  • Bilinear: Smooth interpolation, good for continuous data")
print("  • Cubic: Smoother than bilinear, may introduce artifacts")
print("  • Average: Good for downsampling, reduces noise")

## 🚫 Setting NoData Based on Elevation

In [None]:
# Set areas below certain elevation to NoData (simulate water masking)
print("Setting water areas (< 300m) to NoData...")

def mask_by_elevation(src_path, dst_path, elevation_threshold, nodata_value=-9999):
    """Set pixels below elevation threshold to NoData"""
    with rasterio.open(src_path) as src:
        data = src.read(1)
        profile = src.profile.copy()
        
        # Create mask
        water_mask = data < elevation_threshold
        
        # Apply NoData
        data_masked = data.copy().astype(np.float32)
        data_masked[water_mask] = nodata_value
        
        # Update profile
        profile.update({
            'dtype': 'float32',
            'nodata': nodata_value
        })
        
        with rasterio.open(dst_path, 'w', **profile) as dst:
            dst.write(data_masked, 1)
            dst.update_tags(masked_elevation_threshold=elevation_threshold,
                           nodata_value=nodata_value,
                           water_pixels=int(water_mask.sum()))
    
    return data_masked, water_mask

# Apply water masking to the bilinear resampled DEM
water_threshold = 300  # meters
masked_dem_path = output_dir / 'dem_water_masked.tif'

masked_data, water_mask = mask_by_elevation(
    resampled_paths['bilinear'], 
    masked_dem_path, 
    water_threshold
)

water_pixels = water_mask.sum()
total_pixels = water_mask.size
water_percentage = water_pixels / total_pixels * 100

print(f"Water masking completed:")
print(f"  Threshold: {water_threshold} m")
print(f"  Water pixels: {water_pixels:,} ({water_percentage:.1f}% of image)")
print(f"  Land pixels: {total_pixels - water_pixels:,}")
print(f"  File saved: {masked_dem_path}")

# Also create high elevation mask (above 2000m for snow/ice)
snow_threshold = 2000  # meters
snow_masked_path = output_dir / 'dem_snow_masked.tif'

with rasterio.open(resampled_paths['bilinear']) as src:
    data = src.read(1)
    profile = src.profile.copy()
    
    # Create snow/ice mask (high elevation)
    snow_mask = data > snow_threshold
    
    # Set high elevations to NoData
    data_snow_masked = data.copy().astype(np.float32)
    data_snow_masked[snow_mask] = -9999
    
    profile.update({'dtype': 'float32', 'nodata': -9999})
    
    with rasterio.open(snow_masked_path, 'w', **profile) as dst:
        dst.write(data_snow_masked, 1)
        dst.update_tags(masked_elevation_threshold=f'>{snow_threshold}m',
                       description='High elevation areas masked')

snow_pixels = snow_mask.sum()
print(f"\nSnow/ice masking (>{snow_threshold}m):")
print(f"  Masked pixels: {snow_pixels:,} ({snow_pixels/total_pixels*100:.1f}% of image)")

In [None]:
# Visualize masking results
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

# Original resampled
with rasterio.open(resampled_paths['bilinear']) as src:
    original_resampled = src.read(1)
    bounds_resampled = src.bounds

im1 = axes[0].imshow(original_resampled, cmap='terrain')
axes[0].set_title('Original Resampled DEM')
plt.colorbar(im1, ax=axes[0], fraction=0.046, pad=0.04)

# Water masked (using custom colormap to show NoData)
masked_display = np.ma.masked_where(masked_data == -9999, masked_data)
im2 = axes[1].imshow(masked_display, cmap='terrain')
axes[1].set_title(f'Water Masked (<{water_threshold}m = NoData)')
plt.colorbar(im2, ax=axes[1], fraction=0.046, pad=0.04)

# Snow masked
snow_display = np.ma.masked_where(data_snow_masked == -9999, data_snow_masked)
im3 = axes[2].imshow(snow_display, cmap='terrain')
axes[2].set_title(f'Snow/Ice Masked (>{snow_threshold}m = NoData)')
plt.colorbar(im3, ax=axes[2], fraction=0.046, pad=0.04)

plt.tight_layout()
plt.show()

print("\n🎯 NoData masking applications:")
print("  • Water body removal for land-only analysis")
print("  • Cloud masking in satellite imagery")
print("  • Elevation-based habitat classification")
print("  • Data quality filtering")
print("  • Region of interest extraction")

## 🗺️ Reprojecting to Different EPSG

In [None]:
# Reproject DEM to different coordinate systems
print("Reprojecting DEM to different coordinate systems...")

def reproject_raster(src_path, dst_path, dst_crs,
                     resampling_method=rasterio.enums.Resampling.bilinear):
    """Reproject raster to a different CRS."""
    with rasterio.open(src_path) as src:
        # Source CRS
        src_crs = src.crs  # <— FIXED: capture the CRS from source

        # Calculate transform and dimensions for target CRS
        transform, width, height = rasterio.warp.calculate_default_transform(
            src_crs, dst_crs, src.width, src.height, *src.bounds
        )

        # Update profile for target CRS
        profile = src.profile.copy()
        profile.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        with rasterio.open(dst_path, 'w', **profile) as dst:
            # Perform reprojection band by band
            for i in range(1, src.count + 1):
                rasterio.warp.reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src_crs,         # <— use src_crs
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=resampling_method
                )

            # Add metadata tags to the new file
            dst.update_tags(
                reprojected_from=str(src_crs),
                reprojected_to=str(dst_crs),
                original_bounds=str(src.bounds),
                resampling_method=resampling_method.name
            )

    return width, height, transform

# Define target coordinate systems
target_projections = {
    'UTM_30N': CRS.from_epsg(32630),    # UTM Zone 30N (for Western Europe)
    'Web_Mercator': CRS.from_epsg(3857), # Web Mercator (Google/OSM)
    'Lambert_Azimuthal': CRS.from_epsg(3035),  # European grid
    'Albers_Equal_Area': CRS.from_string("+proj=aea +lat_1=40 +lat_2=45 +lat_0=42.5 +lon_0=-7.5 +datum=WGS84")  # Custom
}

reprojected_paths = {}
projection_info = {}

for proj_name, target_crs in target_projections.items():
    dst_path = output_dir / f'dem_reprojected_{proj_name.lower()}.tif'
    
    try:
        width, height, transform = reproject_raster(
            masked_dem_path, dst_path, target_crs
        )
        reprojected_paths[proj_name] = dst_path
        
        # Get bounds in new CRS
        with rasterio.open(dst_path) as dst:
            new_bounds = dst.bounds
            pixel_area = abs(transform.a * transform.e)  # pixel area in CRS units
        
        projection_info[proj_name] = {
            'crs': target_crs,
            'dimensions': (width, height),
            'bounds': new_bounds,
            'pixel_area': pixel_area,
            'file_size': os.path.getsize(dst_path) / 1024
        }
        
        print(f"{proj_name:16s}: {width:4d} x {height:4d} pixels, {pixel_area:10.1f} units², {projection_info[proj_name]['file_size']:5.1f} KB")
        
    except Exception as e:
        print(f"Failed to reproject to {proj_name}: {e}")

print(f"\nProjection comparison:")
print(f"  Original (WGS84): Geographic coordinates (degrees)")
for proj_name, info in projection_info.items():
    units = 'meters' if 'UTM' in proj_name or 'Mercator' in proj_name or 'Lambert' in proj_name or 'Albers' in proj_name else 'degrees'
    print(f"  {proj_name:16s}: Projected coordinates ({units})")

In [None]:
# Visualize reprojected DEMs
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
axes = axes.flatten()

for i, (proj_name, path) in enumerate(reprojected_paths.items()):
    if i >= 4:  # Only show first 4
        break
        
    with rasterio.open(path) as src:
        data = src.read(1)
        bounds = src.bounds
        
        # Mask NoData for display
        data_display = np.ma.masked_where(data == src.nodata, data)
        
        im = axes[i].imshow(data_display, cmap='terrain', extent=[bounds.left, bounds.right, bounds.bottom, bounds.top])
        axes[i].set_title(f'{proj_name.replace("_", " ")}\n{src.crs}')
        
        # Add units to axes
        units = 'meters' if src.crs.to_epsg() != 4326 else 'degrees'
        axes[i].set_xlabel(f'X ({units})')
        axes[i].set_ylabel(f'Y ({units})')
        
        plt.colorbar(im, ax=axes[i], fraction=0.046, pad=0.04, label='Elevation (m)')

plt.tight_layout()
plt.show()

print("\n🗺️ Coordinate System Characteristics:")
print("  • Geographic (WGS84): Preserves angles, distorts area and distance")
print("  • UTM: Preserves distance/area in zone, minimal distortion")
print("  • Web Mercator: Optimized for web mapping, distorts at high latitudes")
print("  • Lambert Azimuthal: Equal-area projection, good for regional analysis")
print("  • Albers Equal Area: Preserves area, good for statistical analysis")

## 💾 Saving to Different Formats

In [None]:
# Save DEM in different formats
print("Converting DEM to different file formats...")

def convert_format(src_path, dst_path, driver, **kwargs):
    """Convert raster to different format"""
    with rasterio.open(src_path) as src:
        profile = src.profile.copy()
        profile.update(driver=driver, **kwargs)
        
        with rasterio.open(dst_path, 'w', **profile) as dst:
            dst.write(src.read())
            # Copy metadata
            for key, value in src.tags().items():
                dst.update_tags(**{key: value})
    
    return os.path.getsize(dst_path)

# Use the UTM reprojected DEM as source
source_path = reprojected_paths['UTM_30N']

# Different output formats
output_formats = {
    'GeoTIFF_LZW': {'driver': 'GTiff', 'compress': 'lzw', 'tiled': True, 'ext': '.tif'},
    'GeoTIFF_DEFLATE': {'driver': 'GTiff', 'compress': 'deflate', 'tiled': True, 'ext': '.tif'},

    'ENVI': {'driver': 'ENVI', 'ext': '.dat'},
    'AAIGrid': {'driver': 'AAIGrid', 'ext': '.asc'},  # ASCII Grid

}

format_results = {}

for format_name, format_config in output_formats.items():
    ext = format_config.pop('ext')
    dst_path = output_dir / f'dem_utm_{format_name.lower()}{ext}'
    
    try:
        file_size = convert_format(source_path, dst_path, **format_config)
        format_results[format_name] = {
            'path': dst_path,
            'size_kb': file_size / 1024,
            'driver': format_config['driver']
        }
        print(f"{format_name:16s}: {file_size/1024:7.1f} KB - {dst_path.name}")
        
    except Exception as e:
        print(f"{format_name:16s}: FAILED - {e}")

# Compare file sizes
if format_results:
    print(f"\nFile size comparison:")
    sorted_formats = sorted(format_results.items(), key=lambda x: x[1]['size_kb'])
    
    smallest_size = sorted_formats[0][1]['size_kb']
    for format_name, info in sorted_formats:
        ratio = info['size_kb'] / smallest_size
        print(f"  {format_name:16s}: {info['size_kb']:7.1f} KB ({ratio:4.1f}x)")

print(f"\n📁 Format characteristics:")
print(f"  • GeoTIFF: Most common, excellent compression, wide support")

print(f"  • ENVI: Popular in remote sensing, header + binary format")
print(f"  • ASCII Grid: Human-readable, large files, limited metadata")


## 🪟 Advanced: Windowed Reading & Writing

In [None]:
from rasterio.windows import Window
import rasterio
import numpy as np

def process_dem_windows(src_path, dst_path, window_size=256, overlap=32):
    """Process DEM in overlapping windows (useful for large files)."""

    def calculate_slope(elev, px_x, px_y):
        # np.gradient expects dy, dx spacing
        dy, dx = np.gradient(elev, px_y, px_x)
        return np.sqrt(dx**2 + dy**2)

    with rasterio.open(src_path) as src:
        # Output profile
        profile = src.profile.copy()
        profile.update(dtype="float32", nodata=-9999.0, count=1)

        # Pixel sizes (account for sign)
        px_x = abs(src.transform.a)
        px_y = abs(src.transform.e)

        with rasterio.open(dst_path, "w", **profile) as dst:
            windows_processed = 0

            # Iterate over the dataset's internal tiling
            # block_windows(bidx) yields ( (block_row, block_col), Window )
            for _, win in src.block_windows(1):
                # Pad the window by `overlap` cells on each side, safely at edges
                col_off = max(0, win.col_off - overlap)
                row_off = max(0, win.row_off - overlap)
                col_end = min(src.width,  win.col_off + win.width  + overlap)
                row_end = min(src.height, win.row_off + win.height + overlap)

                pad_w = Window(
                    col_off=col_off,
                    row_off=row_off,
                    width=col_end - col_off,
                    height=row_end - row_off,
                )

                elev_pad = src.read(1, window=pad_w)

                # Skip pure NoData tiles
                if src.nodata is not None and np.all(elev_pad == src.nodata):
                    continue

                # Compute slope on padded tile
                slope_pad = calculate_slope(elev_pad, px_x, px_y)

                # Crop back to original (un-padded) block window region
                start_row = win.row_off - pad_w.row_off
                start_col = win.col_off - pad_w.col_off
                end_row = start_row + win.height
                end_col = start_col + win.width
                slope_core = slope_pad[start_row:end_row, start_col:end_col]

                # Apply NoData mask on the core region
                if src.nodata is not None:
                    elev_core = elev_pad[start_row:end_row, start_col:end_col]
                    mask = (elev_core == src.nodata)
                    slope_core = slope_core.astype(np.float32, copy=False)
                    slope_core[mask] = profile["nodata"]

                # Write
                dst.write(slope_core.astype(np.float32, copy=False), 1, window=win)
                windows_processed += 1

            # Tags
            dst.update_tags(
                processing_method="windowed_slope_calculation",
                window_size=window_size,
                overlap=overlap,
                windows_processed=windows_processed,
            )

    return windows_processed

# Process DEM to calculate slope using windowed approach
slope_path = output_dir / "dem_slope_windowed.tif"
windows_processed = process_dem_windows(reprojected_paths["UTM_30N"], slope_path)

print(f"Windowed processing completed:")
print(f"  Windows processed: {windows_processed}")
print(f"  Output file: {slope_path}")
print(f"  File size: {os.path.getsize(slope_path)/1024:.1f} KB")


## 📊 Summary and Comparison

In [None]:
# Create comprehensive summary of all operations
print("\n" + "="*70)
print("                    RASTERIO PROCESSING SUMMARY")
print("="*70)

# File inventory
print(f"\n📁 Generated Files:")
all_files = list(output_dir.glob('*'))
total_size = 0

for file_path in sorted(all_files):
    if file_path.is_file():
        size_kb = os.path.getsize(file_path) / 1024
        total_size += size_kb
        print(f"  {file_path.name:30s} {size_kb:8.1f} KB")

print(f"  {'─' * 40}")
print(f"  {'Total:':30s} {total_size:8.1f} KB")

# Processing steps summary
print(f"\n⚡ Processing Steps Completed:")
print(f"  ✅ Created synthetic DEM ({dem_data.shape[1]}x{dem_data.shape[0]} pixels)")
print(f"  ✅ Loaded and analyzed raster properties")
print(f"  ✅ Resampled resolution (4 different methods)")
print(f"  ✅ Applied elevation-based masking (water & snow)")
print(f"  ✅ Reprojected to {len(reprojected_paths)} coordinate systems")
print(f"  ✅ Converted to {len([f for f in format_results if format_results[f]])} different formats")
print(f"  ✅ Performed windowed slope calculation")

# Key transformations
print(f"\n🔄 Key Transformations:")
print(f"  Original: {dem_data.shape[1]}×{dem_data.shape[0]} pixels, WGS84 (EPSG:4326)")
print(f"  Resampled: {width}×{height} pixels ({scale_factor*100:.0f}% resolution)")
print(f"  Water masked: {water_pixels:,} pixels set to NoData (<{water_threshold}m)")
print(f"  Reprojected: Multiple coordinate systems (UTM, Web Mercator, etc.)")
print(f"  Formats: GeoTIFF, NetCDF, ENVI, ASCII Grid, HDF5")

# Coordinate systems comparison
print(f"\n🗺️ Coordinate Systems Tested:")
print(f"  WGS84 (4326):     Geographic, degrees, global coverage")
for proj_name, info in projection_info.items():
    dims = info['dimensions']
    print(f"  {proj_name:16s}: {dims[0]:4d}×{dims[1]:4d} pixels, projected coordinates")

# Performance insights
print(f"\n🎯 Key Rasterio Capabilities Demonstrated:")
print(f"  • Comprehensive format support (GDAL ecosystem)")
print(f"  • Efficient I/O with compression and tiling")
print(f"  • Precise coordinate system transformations")
print(f"  • Flexible resampling algorithms")
print(f"  • Memory-efficient windowed processing")
print(f"  • Rich metadata preservation")
print(f"  • NoData handling and masking")
print(f"  • Integration with NumPy ecosystem")

print(f"\n🚀 Advanced Features Used:")
print(f"  • Block-based reading for large rasters")
print(f"  • Windowed processing with overlap handling")
print(f"  • Multiple compression algorithms")
print(f"  • Coordinate reference system validation")
print(f"  • Metadata preservation across transformations")
print(f"  • Efficient reprojection with automatic bounds calculation")

print("\n" + "="*70)

## 🧹 Cleanup

In [None]:
# Optional cleanup
cleanup_files = input("Remove all generated files? (y/N): ").lower().startswith('y')

if cleanup_files:
    import shutil
    if output_dir.exists():
        shutil.rmtree(output_dir)
        print("✅ All output files removed")
else:
    print(f"💾 Files preserved in '{output_dir}/' directory")
    print(f"   Total: {len(list(output_dir.glob('*')))} files, {total_size:.1f} KB")

print(f"\n🎉 Rasterio tutorial completed!")

## 📋 Rasterio Quick Reference

In [None]:
from IPython.display import HTML, display
import html

SECTIONS = [
  ("Reading Rasters", r"""import rasterio
with rasterio.open('dem.tif') as src:
    data = src.read(1)          # Read band 1
    bounds = src.bounds         # Spatial bounds
    crs = src.crs               # Coordinate system
    transform = src.transform   # Pixel->coord transform"""),

  ("Writing Rasters", r"""profile = src.profile.copy()
with rasterio.open('output.tif', 'w', **profile) as dst:
    dst.write(data, 1)
    dst.update_tags(metadata='value')"""),

  ("Resampling", r"""data = src.read(
    out_shape=(1, new_height, new_width),
    resampling=rasterio.enums.Resampling.bilinear
)"""),

  ("Reprojection", r"""import rasterio.warp
rasterio.warp.reproject(
    source=rasterio.band(src, 1),
    destination=rasterio.band(dst, 1),
    src_crs=src.crs,
    dst_crs='EPSG:3857'
)"""),

  ("Windowed Reading", r"""from rasterio.windows import Window
window = Window(col_off=0, row_off=0, width=512, height=512)
data = src.read(1, window=window)"""),

  ("Coordinate Systems", r"""from rasterio.crs import CRS
crs = CRS.from_epsg(4326)      # WGS84
crs = CRS.from_epsg(3857)      # Web Mercator
crs = CRS.from_string('+proj=utm +zone=30 +datum=WGS84')"""),

  ("Common Operations", r"""# NoData masking
data[data < threshold] = src.nodata

# Format conversion
profile.update(driver='netCDF', compress='deflate')

# Coordinate transformation
lon, lat = rasterio.transform.xy(transform, row, col)
row, col = rasterio.transform.rowcol(transform, lon, lat)"""),
]

BEST_PRACTICES = [
  "Use context managers (with statements)",
  "Enable compression and tiling for large rasters",
  "Choose appropriate resampling methods",
  "Validate CRS before reprojection",
  "Handle NoData values properly",
  "Use windowed processing for large files",
  "Preserve metadata across operations",
]

def section_html(title, code):
    esc = html.escape(code)
    return f"""
    <section class="hsec">
      <div class="hsec-head">
        <h3>🧭 {html.escape(title)}</h3>
        <button class="copy" onclick="navigator.clipboard.writeText(this.parentElement.nextElementSibling.innerText)">Copy</button>
      </div>
      <pre><code>{esc}</code></pre>
    </section>
    """

best_list = "".join(f"<li>• {html.escape(x)}</li>" for x in BEST_PRACTICES)

html_block = f"""
<style>
:root {{
  --bg: #0b1726;          /* deep night */
  --panel: #0f2236;       /* slate */
  --accent: #00c3a3;      /* teal */
  --accent2: #4ec9f0;     /* sky */
  --text: #eaf2f8;
  --muted: #9bb3c9;
  --code-bg: #0a1a2a;
}}
.h5-wrap {{
  font-family: Inter, ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial, sans-serif;
  color: var(--text);
  max-width: 980px;
  margin: 10px 0 24px 0;
}}
.h5-card {{
  border-radius: 16px;
  overflow: hidden;
  box-shadow: 0 10px 30px rgba(0,0,0,.25);
  border: 1px solid rgba(255,255,255,.06);
}}
.h5-hero {{
  padding: 18px 20px;
  background: linear-gradient(135deg, rgba(0,195,163,.25), rgba(78,201,240,.16)), radial-gradient(1200px 500px at 0% 0%, rgba(0,195,163,.25), transparent 60%), radial-gradient(800px 400px at 100% 0%, rgba(78,201,240,.25), transparent 60%), var(--bg);
  border-bottom: 1px solid rgba(255,255,255,.07);
  display:flex; align-items:center; justify-content:space-between; gap:10px;
}}
.h5-title {{ margin:0; font-size:22px; letter-spacing:.2px; }}
.badge {{
  font-size:12px; color:#052; background: linear-gradient(90deg, #77ffd1 0%, #4ec9f0 100%);
  -webkit-background-clip: text; background-clip: text; color: transparent;
  font-weight:700;
}}
.actions {{ display:flex; gap:8px; }}
.btn {{
  cursor:pointer; border:1px solid rgba(255,255,255,.18);
  background: rgba(255,255,255,.06); color: var(--text);
  padding:6px 10px; border-radius:10px; font-size:12px;
}}
.btn:hover {{ background: rgba(255,255,255,.12); }}
.h5-body {{ background: var(--panel); padding: 16px 18px; }}
.grid {{
  display:grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap:14px;
}}
.hsec {{ background: var(--code-bg); border:1px solid rgba(255,255,255,.06); border-radius:12px; overflow:hidden; }}
.hsec-head {{ display:flex; align-items:center; justify-content:space-between; padding:10px 12px; background: rgba(255,255,255,.03); }}
.hsec h3 {{ margin:0; font-size:14px; color: var(--accent2); letter-spacing:.3px; }}
.copy {{ all: unset; cursor:pointer; padding:4px 8px; border-radius:8px; border:1px solid rgba(255,255,255,.15); font-size:12px; color: var(--text); }}
.copy:hover {{ background: rgba(255,255,255,.10); }}
pre {{
  margin:0; padding:12px; color:#e6f1ff; line-height:1.35; font-size:12.8px;
  overflow:auto; white-space:pre; tab-size:2;
}}
code {{ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, "Liberation Mono", monospace; }}
.note {{
  margin-top: 14px; color: var(--muted); font-size:13px;
}}
.kicker {{
  margin-top: 10px; padding:10px 12px; background: rgba(0,195,163,.08);
  border:1px solid rgba(0,195,163,.25); border-radius:12px; font-size:13px;
}}
ul.best {{ margin:10px 0 0 0; padding-left: 18px; color: var(--text); }}
</style>

<div class="h5-wrap">
  <div class="h5-card">
    <div class="h5-hero">
      <h2 class="h5-title">🗺️ Rasterio Quick Reference <span class="badge">Geo IO • CRS • Reproject • Windows</span></h2>
      <div class="actions">
        <button class="btn" onclick="(async()=>{{await navigator.clipboard.writeText(document.querySelector('#rasterio-cheat').innerText)}})()">Copy All</button>
      </div>
    </div>

    <div class="h5-body" id="rasterio-cheat">
      <div class="grid">
        {''.join(section_html(t,c) for t,c in SECTIONS)}
      </div>

      <div class="kicker">
        <b>Best Practices</b>
        <ul class="best">
          {best_list}
        </ul>
      </div>

      <div class="note">Rasterio: Python gateway to the raster universe. Mind your CRS, nodata, tiling & compression. 🌍</div>
    </div>
  </div>
</div>
"""

display(HTML(html_block))
