In [2]:
from pathlib import Path
import os
workdir = Path("/Users/Anthony/Data and Analysis Local/NYS_Wetlands_GHG/")
print(workdir)
os.chdir(workdir)
current_working_dir = Path.cwd()
print(f"Current working directory is now: {current_working_dir}")

/Users/Anthony/Data and Analysis Local/NYS_Wetlands_GHG
Current working directory is now: /Users/Anthony/Data and Analysis Local/NYS_Wetlands_GHG


In [3]:
import rasterio
from rasterio import features
import geopandas as gpd
import numpy as np

In [4]:
# === FILE PATHS ===
dem_path = "Data/TerrainProcessed/HUC_DEMs/cluster_208_huc_041402011002.tif"
wetlands_path = "Data/Training_Data/HUC_Extracted_Training_Data/cluster_208_huc_041402011002_NWI.gpkg"
output_path = "Data/Training_Data/cluster_208_huc_041402011002_labels.tif"

# === CLASS MAPPING ===
class_mapping = {
    'EMW': 1,  # Emergent Wetland
    'FSW': 2,  # Forested Wetland
    'SSW': 3,  # Shrub Scrub Wetland
    'OWW': 4,  # Open Water Wetland
}

In [5]:
# === LOAD REFERENCE RASTER (for grid alignment) ===
with rasterio.open(dem_path) as src:
    profile = src.profile.copy()
    transform = src.transform
    out_shape = (src.height, src.width)

# === LOAD AND PREPARE WETLANDS ===
wetlands = gpd.read_file(wetlands_path)
print(f"Loaded {len(wetlands)} wetland polygons")

# Add numeric class values
wetlands['class_value'] = wetlands['MOD_CLASS'].map(class_mapping)

# Check for any unmapped classes
if wetlands['class_value'].isna().any():
    unmapped = wetlands[wetlands['class_value'].isna()]['MOD_CLASS'].unique()
    print(f"WARNING: Unmapped classes found: {unmapped}")

# Create list of (geometry, value) tuples for rasterization
shapes = [(geom, value) for geom, value in zip(wetlands.geometry, wetlands['class_value'])]

Loaded 296 wetland polygons


In [6]:
# === RASTERIZE ===
print("Rasterizing polygons...")
label_raster = features.rasterize(
    shapes=shapes,
    out_shape=out_shape,
    transform=transform,
    fill=0,  # Background value
    dtype=np.uint8
)

Rasterizing polygons...


In [7]:
# === SAVE OUTPUT ===
profile.update(
    count=1,
    dtype=np.uint8,
    nodata=255  # Use 255 as nodata since 0 is background
)

with rasterio.open(output_path, 'w', **profile) as dst:
    dst.write(label_raster, 1)

print(f"Saved label raster to: {output_path}")

Saved label raster to: Data/Training_Data/cluster_208_huc_041402011002_labels.tif


In [8]:
# === SUMMARY ===
print("\nClass distribution (pixel counts):")
unique, counts = np.unique(label_raster, return_counts=True)
total_pixels = label_raster.size
for val, count in zip(unique, counts):
    class_name = {0: 'Background', 1: 'EMW', 2: 'FSW', 3: 'SSW', 4: 'OWW'}.get(val, 'Unknown')
    percentage = (count / total_pixels) * 100
    print(f"  {class_name} ({val}): {count:,} pixels ({percentage:.2f}%)")


Class distribution (pixel counts):
  Background (0): 211,366,353 pixels (97.85%)
  EMW (1): 723,642 pixels (0.34%)
  FSW (2): 1,956,625 pixels (0.91%)
  SSW (3): 1,702,629 pixels (0.79%)
  OWW (4): 250,751 pixels (0.12%)
