In [None]:
# !pip install geopandas rasterio shapely scipy scikit-learn scikit-image seaborn matplotlib_scalebar

In [None]:
import sys
import os

print("Python executable:", sys.executable)
print("Python version:", sys.version)
print("Environment location:", os.path.dirname(sys.executable))

In [None]:
from unittest.mock import sentinel

# =============================================================================
# IMPORTS
# =============================================================================
# Core libraries
import numpy as np
import pandas as pd
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
from pyproj import Transformer
# Geospatial libraries
import geopandas as gpd
import rasterio
from rasterio.warp import reproject, Resampling, calculate_default_transform
from rasterio.transform import from_bounds, rowcol
from rasterio.mask import mask
from rasterio import features
from shapely.geometry import Point, box, shape  # Add 'shape' here
from rasterio.features import shapes  # Add this new line
# Analysis libraries
from scipy.interpolate import griddata
from scipy.ndimage import median_filter, binary_opening, binary_closing
from scipy import stats
from sklearn.neighbors import NearestNeighbors, KernelDensity
from skimage.morphology import disk
from shapely.geometry import shape
from rasterio.features import shapes
import matplotlib.patches as mpatches
# Visualization libraries
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.patches import FancyArrowPatch
from matplotlib.colors import ListedColormap, BoundaryNorm
from matplotlib_scalebar.scalebar import ScaleBar
import seaborn as sns

# Data I/O
from dbfread import DBF

# =============================================================================
# PATHS AND DIRECTORIES
# =============================================================================
DATA_DIR = Path("data")
RESULTS_DIR = Path("results")
RESULTS_DIR.mkdir(exist_ok=True)
print(DATA_DIR)
# =============================================================================
# COORDINATE REFERENCE SYSTEM
# =============================================================================
TARGET_CRS = 'EPSG:32632'  # UTM 32N

# =============================================================================
# PROCESSING PARAMETERS
# =============================================================================
COHERENCE_THRESHOLD = 0.3
NODATA = -9999
GRID_SIZE = 10  # meters

# =============================================================================
# CORINE LAND COVER CLASSES
# =============================================================================
CORINE_CLASSES = {
    111: 'Continuous urban fabric', 112: 'Discontinuous urban fabric',
    121: 'Industrial or commercial units', 122: 'Road and rail networks and associated land',
    123: 'Port areas', 124: 'Airports', 131: 'Mineral extraction sites',
    132: 'Dump sites', 133: 'Construction sites', 141: 'Green urban areas',
    142: 'Sport and leisure facilities', 211: 'Non-irrigated arable land',
    212: 'Permanently irrigated land', 213: 'Rice fields', 221: 'Vineyards',
    222: 'Fruit trees and berry plantations', 223: 'Olive groves',
    231: 'Pastures', 241: 'Annual crops associated with permanent crops',
    242: 'Complex cultivation patterns', 243: 'Agriculture/natural vegetation mix', # Note: The description for 21 has been expanded for clarity
    244: 'Agro-forestry areas', 311: 'Broad-leaved forest',
    312: 'Coniferous forest', 313: 'Mixed forest', 321: 'Natural grasslands',
    322: 'Moors and heathland', 323: 'Sclerophyllous vegetation',
    324: 'Transitional woodland-shrub', 331: 'Beaches, dunes, sands',
    332: 'Bare rocks', 333: 'Sparsely vegetated areas', 334: 'Burnt areas',
    335: 'Glaciers and perpetual snow', 411: 'Inland marshes',
    412: 'Peat bogs', 421: 'Salt marshes', 422: 'Salines',
    423: 'Intertidal flats', 511: 'Water courses', 512: 'Water bodies',
    521: 'Coastal lagoons', 522: 'Estuaries', 523: 'Sea and ocean'
}

# CORINE_COLORS = {
#     111: (230, 0, 77), 112: (255, 0, 0), 121: (204, 77, 242), 122: (204, 0, 0),
#     123: (230, 204, 204), 124: (230, 204, 230), 131: (166, 0, 204), 132: (166, 77, 0),
#     133: (255, 77, 255), 141: (255, 166, 255), 142: (255, 230, 255),
#     211: (255, 255, 168), 212: (255, 255, 0), 213: (230, 230, 0), 221: (230, 128, 0),
#     222: (242, 166, 77), 223: (230, 166, 0), 231: (230, 230, 77), 241: (255, 230, 166),
#     242: (255, 230, 77), 243: (230, 204, 77), 244: (242, 204, 166),
#     311: (128, 255, 0), 312: (0, 166, 0), 313: (77, 255, 0), 321: (204, 242, 77),
#     322: (166, 255, 128), # Note: Corrected color for 322 (Moors and heathland) as per standard CLC. Your original color for key 27, (166, 230, 77), corresponds to 323 (Sclerophyllous vegetation)
#     323: (166, 230, 77),
#     324: (166, 242, 0), # Note: Corrected color for 324 (Transitional woodland-shrub) as per standard CLC. Your original color for key 29, (0, 204, 0), does not match standard CLC color for this category.
#     331: (230, 230, 230), # Note: Corrected color for 331 (Beaches, dunes, sands) as per standard CLC. Your original color for key 30, (240, 240, 240), is slightly different.
#     332: (204, 204, 204), 333: (204, 255, 204), 334: (0, 0, 0), 335: (166, 230, 204),
#     411: (166, 166, 255), 412: (77, 77, 255), 421: (204, 204, 255), 422: (230, 230, 255),
#     423: (166, 166, 230), 511: (0, 204, 242), 512: (128, 242, 230), 521: (0, 255, 166),
#     522: (166, 255, 230), 523: (230, 242, 255) # Note: Corrected color for 523 (Sea and ocean) as per standard CLC. Your original color for key 44 was (230, 242, 255) which is the correct one.
# }
#
# # Convert RGB (0-255) to matplotlib format (0-1)
# CORINE_COLORS_MPL = {k: (r/255, g/255, b/255) for k, (r, g, b) in CORINE_COLORS.items()}
# Colorblind-friendly version (paste this AFTER the original CORINE_COLORS)
CORINE_COLORS_COLORBLIND = {
    # ARTIFICIAL SURFACES (1xx) - Purples/Magentas/Grays
    111: (102, 0, 102),      # Dark purple - Continuous urban
    112: (153, 51, 153),     # Medium purple - Discontinuous urban
    121: (204, 102, 204),    # Light purple - Industrial
    122: (80, 80, 80),       # Dark gray - Roads/rail
    123: (120, 120, 120),    # Medium gray - Ports
    124: (160, 160, 160),    # Light gray - Airports
    131: (255, 0, 255),      # Bright magenta - Mineral extraction
    132: (178, 34, 34),      # Dark red-brown - Dump sites
    133: (255, 150, 180),    # Darker pink - Construction (was too light)
    141: (120, 200, 120),    # Medium green - Green urban areas (darkened)
    142: (100, 180, 100),    # Green - Sport/leisure (darkened)

    # AGRICULTURAL (2xx) - Yellows/Oranges/Browns
    211: (230, 230, 50),     # Strong yellow - Non-irrigated arable (darkened)
    212: (235, 200, 0),      # Gold yellow - Permanently irrigated (darkened)
    213: (220, 180, 0),      # Dark gold - Rice fields
    221: (255, 140, 0),      # Dark orange - Vineyards
    222: (255, 165, 79),     # Orange - Fruit trees
    223: (204, 153, 0),      # Olive-brown - Olive groves
    231: (210, 210, 80),     # Medium yellow - Pastures (MUCH darker)
    241: (200, 170, 100),    # Tan - Annual crops w/ permanent (darkened)
    242: (210, 160, 70),     # Brown - Complex cultivation (darkened)
    243: (190, 150, 80),     # Medium brown - Agriculture w/ natural
    244: (179, 143, 0),      # Dark yellow-brown - Agro-forestry

    # FORESTS & SEMI-NATURAL (3xx) - Teals/Cyans/Dark colors
    311: (0, 153, 102),      # Dark teal - Broad-leaved forest
    312: (0, 102, 76),       # Very dark teal - Coniferous forest
    313: (0, 128, 128),      # Medium teal - Mixed forest
    321: (150, 220, 150),    # Light green - Natural grasslands (darkened)
    322: (102, 204, 153),    # Mint - Moors and heathland
    323: (130, 180, 130),    # Sage - Sclerophyllous vegetation (darkened)
    324: (51, 153, 102),     # Medium green - Transitional woodland
    331: (210, 180, 140),    # Tan - Beaches/dunes/sands (MUCH darker)
    332: (140, 140, 140),    # Gray - Bare rocks (darkened)
    333: (170, 170, 120),    # Khaki - Sparsely vegetated (darkened)
    334: (40, 40, 40),       # Near black - Burnt areas
    335: (180, 210, 230),    # Light blue - Glaciers/snow (darkened)

    # WETLANDS (4xx) - Light blues/cyans
    411: (120, 170, 230),    # Sky blue - Inland marshes (darkened)
    412: (80, 140, 220),     # Medium blue - Peat bogs (darkened)
    421: (150, 190, 240),    # Light blue - Salt marshes (darkened)
    422: (140, 170, 210),    # Powder blue - Salines (darkened)
    423: (100, 160, 210),    # Cyan - Intertidal flats (darkened)

    # WATER BODIES (5xx) - Dark blues
    511: (0, 102, 204),      # Dark blue - Water courses
    512: (0, 76, 153),       # Very dark blue - Water bodies
    521: (51, 102, 153),     # Medium dark blue - Coastal lagoons
    522: (0, 51, 102),       # Navy - Estuaries
    523: (0, 25, 76)         # Very dark navy - Sea and ocean
}

# Use the colorblind-friendly version
CORINE_COLORS = CORINE_COLORS_COLORBLIND
CORINE_COLORS_MPL = {k: (r/255, g/255, b/255) for k, (r, g, b) in CORINE_COLORS.items()}
# =============================================================================
# FILE DISCOVERY
# =============================================================================
# Automatically locate data files in subdirectories
saocom_files = list((DATA_DIR / "saocom_csv").glob("*.csv"))
tinitaly_files = list((DATA_DIR / "tinitaly").glob("*.tif"))
copernicus_files = list((DATA_DIR / "copernicus").glob("*.tif"))
corine_files = list((DATA_DIR / "ground_cover").glob("*.tif"))
sentinel_files = list((DATA_DIR / "sentinel_data").glob("*.tif"))
print(corine_files, saocom_files, tinitaly_files, copernicus_files, sentinel_files)
# Select first match for each dataset
saocom_path = saocom_files[0] if saocom_files else None
tinitaly_path = tinitaly_files[0] if tinitaly_files else None
copernicus_path = copernicus_files[0] if copernicus_files else None
corine_path = corine_files[0] if corine_files else None
sentinel_path = sentinel_files[0] if sentinel_files else None

# Find the corresponding .vat.dbf file for the CORINE raster
corine_dbf_path = None
if corine_path:
    corine_dbf_candidates = list((DATA_DIR / "ground_cover").glob(f"{corine_path.name}.vat.dbf"))
    corine_dbf_path = corine_dbf_candidates[0] if corine_dbf_candidates else None



In [None]:
# =============================================================================
# LOAD SAOCOM POINT DATA
# =============================================================================
# Read CSV and standardize columns
df = pd.read_csv(saocom_path, sep=',')
df.columns = ['ID', 'SVET', 'LVET', 'LAT', 'LAT2', 'LON', 'LON2', 'HEIGHT', 'HEIGHT_WRT_DEM', 'SIGMA_HEIGHT', 'COHER']

# Convert to numeric and remove invalid points
for col in ['LAT', 'LON','LAT2', 'LON2',  'HEIGHT', 'COHER']:
    df[col] = pd.to_numeric(df[col], errors='coerce')
df = df.dropna(subset=['LAT', 'LON','LAT2', 'LON2', 'HEIGHT', 'COHER'])
df = df[(df['LAT2'] != 0) & (df['LON2'] != 0)]
df.rename(columns={
    'LAT': 'LAT_old',
    'LON': 'LON_old',
    'LAT2': 'LAT',
    'LON2': 'LON'
}, inplace=True)
# Apply coherence filter
df_filtered = df[df['COHER'] >= COHERENCE_THRESHOLD]

# Convert to GeoDataFrame and reproject to target CRS
geometry = [Point(lon, lat) for lon, lat in zip(df_filtered['LON'], df_filtered['LAT'])]
saocom_gdf = gpd.GeoDataFrame(df_filtered, geometry=geometry, crs='EPSG:4326')
saocom_gdf = saocom_gdf.to_crs(TARGET_CRS)
saocom_gdf['x_utm'] = saocom_gdf.geometry.x
saocom_gdf['y_utm'] = saocom_gdf.geometry.y

# =============================================================================
# LOAD REFERENCE DEMS
# =============================================================================
# TINITALY
with rasterio.open(tinitaly_path) as src:
    tinitaly_crs = src.crs
    tinitaly_res = src.res
    tinitaly_bounds = src.bounds
    tinitaly_nodata = src.nodata

# Copernicus
with rasterio.open(copernicus_path) as src:
    copernicus_crs = src.crs
    copernicus_res = src.res
    copernicus_bounds = src.bounds
    copernicus_nodata = src.nodata
# # =============================================================================
# # LOAD AND REMAP CORINE LAND COVER
# # =============================================================================
# # Load DBF lookup table
# dbf_table = DBF(corine_dbf_path, load=True)
# lookup_df = pd.DataFrame(iter(dbf_table))
#
# # Create mapping dictionaries
# value_to_code = dict(zip(lookup_df['Value'], lookup_df['CODE_18']))
# value_to_label = dict(zip(lookup_df['Value'], lookup_df['LABEL3']))
# code_to_label = dict(zip(lookup_df['CODE_18'], lookup_df['LABEL3']))
#
# # Load original CORINE raster
# with rasterio.open(corine_path) as src:
#     corine_raw = src.read(1)
#     corine_crs = src.crs
#     corine_res = src.res
#     corine_bounds = src.bounds
#     corine_nodata = src.nodata if src.nodata is not None else 255
#     corine_transform = src.transform
#     corine_profile = src.profile
#
# # Remap raster values from Value to CODE_18
# corine_remapped = np.full_like(corine_raw, 0, dtype=np.uint16)
# for value, code in value_to_code.items():
#     corine_remapped[corine_raw == value] = code
# corine_remapped[corine_raw == corine_nodata] = 0  # NoData = 0
#
# # Save remapped CORINE to temporary file
# corine_remapped_path = RESULTS_DIR / "corine_remapped.tif"
# profile_remapped = corine_profile.copy()
# profile_remapped.update(dtype='uint16', nodata=0)
# with rasterio.open(corine_remapped_path, 'w', **profile_remapped) as dst:
#     dst.write(corine_remapped, 1)
#
# # Update corine_path to use remapped version
# corine_path = corine_remapped_path

In [None]:
from sklearn.neighbors import NearestNeighbors

def remove_isolated_knn(gdf, k=5, distance_threshold=100):
    """Remove points far from k nearest neighbors."""
    coords = np.array([[p.x, p.y] for p in gdf.geometry])

    nbrs = NearestNeighbors(n_neighbors=k+1).fit(coords)
    distances, _ = nbrs.kneighbors(coords)

    # Average distance to k nearest neighbors (exclude self at index 0)
    avg_distances = distances[:, 1:].mean(axis=1)

    mask = avg_distances < distance_threshold
    return gdf[mask].reset_index(drop=True)


# =============================================================================
# HORIZONTAL DATUM VERIFICATION
# =============================================================================
# Check if datasets need reprojection to target CRS
tinitaly_needs_reproject = str(tinitaly_crs) != TARGET_CRS
copernicus_needs_reproject = str(copernicus_crs) != TARGET_CRS
# corine_needs_reproject = str(corine_crs) != TARGET_CRS

# =============================================================================
# VERTICAL DATUM VERIFICATION
# =============================================================================
# Extract vertical datum information from CRS WKT
tinitaly_wkt = tinitaly_crs.to_wkt()
copernicus_wkt = copernicus_crs.to_wkt()

# Check for vertical datum identifiers
tinitaly_vertical = 'EGM2008' in tinitaly_wkt or 'geoid' in tinitaly_wkt.lower()
copernicus_vertical = 'EGM2008' in copernicus_wkt or 'geoid' in copernicus_wkt.lower()

# Copernicus GLO-30 uses EGM2008 geoid (documented)
# TINITALY typically uses WGS84 ellipsoid (documented)

# =============================================================================
# CREATE STUDY AREA BOUNDS AND CONVEX HULL
# =============================================================================
study_bounds = saocom_gdf.total_bounds  # [xmin, ymin, xmax, ymax]
study_area_poly = box(*study_bounds)
study_area_gdf = gpd.GeoDataFrame([1], geometry=[study_area_poly], crs=TARGET_CRS)
saocom_gdf = remove_isolated_knn(saocom_gdf, k=5, distance_threshold=100)
# Create convex hull from SAOCOM points for masking
data_hull = saocom_gdf.unary_union.convex_hull
hull_gdf = gpd.GeoDataFrame(geometry=[data_hull], crs=TARGET_CRS)

# =============================================================================
# DEFINE 10M GRID PARAMETERS
# =============================================================================
xmin_grid = np.floor(study_bounds[0] / GRID_SIZE) * GRID_SIZE
ymin_grid = np.floor(study_bounds[1] / GRID_SIZE) * GRID_SIZE
xmax_grid = np.ceil(study_bounds[2] / GRID_SIZE) * GRID_SIZE
ymax_grid = np.ceil(study_bounds[3] / GRID_SIZE) * GRID_SIZE

grid_width = int((xmax_grid - xmin_grid) / GRID_SIZE)
grid_height = int((ymax_grid - ymin_grid) / GRID_SIZE)
target_transform = from_bounds(xmin_grid, ymin_grid, xmax_grid, ymax_grid, grid_width, grid_height)

# =============================================================================
# STORE REFERENCE DATASET METADATA
# =============================================================================
reference_dems = {
    'tinitaly_crop': {
        'path': tinitaly_path,
        'crs': tinitaly_crs,
        'needs_reproject': tinitaly_needs_reproject,
        'vertical_datum': 'WGS84 ellipsoid'
    },
    'copernicus': {
        'path': copernicus_path,
        'crs': copernicus_crs,
        'needs_reproject': copernicus_needs_reproject,
        'vertical_datum': 'EGM2008 geoid'
    }
}

In [None]:
# =============================================================================
# RESAMPLE TINITALY TO 10M
# =============================================================================
tinitaly_10m = np.full((grid_height, grid_width), NODATA, dtype=np.float32)

with rasterio.open(tinitaly_path) as src:
    reproject(
        source=rasterio.band(src, 1),
        destination=tinitaly_10m,
        src_transform=src.transform,
        src_crs=src.crs,
        dst_transform=target_transform,
        dst_crs=TARGET_CRS,
        resampling=Resampling.cubic,
        src_nodata=src.nodata,
        dst_nodata=NODATA
    )

# Save resampled TINITALY
tinitaly_10m_path = RESULTS_DIR / "tinitaly_10m.tif"
profile = {
    'driver': 'GTiff', 'dtype': 'float32', 'width': grid_width, 'height': grid_height,
    'count': 1, 'crs': TARGET_CRS, 'transform': target_transform,
    'nodata': NODATA, 'compress': 'lzw'
}
with rasterio.open(tinitaly_10m_path, 'w', **profile) as dst:
    dst.write(tinitaly_10m, 1)

# =============================================================================
# RESAMPLE COPERNICUS TO 10M
# =============================================================================
copernicus_10m = np.full((grid_height, grid_width), NODATA, dtype=np.float32)

with rasterio.open(copernicus_path) as src:
    reproject(
        source=rasterio.band(src, 1),
        destination=copernicus_10m,
        src_transform=src.transform,
        src_crs=src.crs,
        dst_transform=target_transform,
        dst_crs=TARGET_CRS,
        resampling=Resampling.cubic,
        src_nodata=src.nodata,
        dst_nodata=NODATA
    )

# Save resampled Copernicus
copernicus_10m_path = RESULTS_DIR / "copernicus_10m.tif"
with rasterio.open(copernicus_10m_path, 'w', **profile) as dst:
    dst.write(copernicus_10m, 1)


# =============================================================================
# UPDATE REFERENCE DATASET PATHS
# =============================================================================
reference_dems['tinitaly_crop']['resampled_path'] = tinitaly_10m_path
reference_dems['tinitaly_crop']['is_10m'] = True
reference_dems['copernicus']['resampled_path'] = copernicus_10m_path
reference_dems['copernicus']['is_10m'] = True

In [None]:
# =============================================================================
# CREATE RASTERIZED MASK FROM SAOCOM CONVEX HULL
# =============================================================================
# Rasterize the convex hull polygon to match the 10m grid
hull_mask = features.rasterize(
    shapes=[data_hull],
    out_shape=(grid_height, grid_width),
    transform=target_transform,
    fill=0,
    all_touched=True,
    dtype=np.uint8
) == 1  # Convert to boolean (True inside hull)

# =============================================================================
# MASK TINITALY
# =============================================================================
tinitaly_10m_masked = tinitaly_10m.copy()
tinitaly_10m_masked[~hull_mask] = NODATA

# Save masked TINITALY
tinitaly_masked_path = RESULTS_DIR / "tinitaly_10m_masked.tif"
with rasterio.open(tinitaly_masked_path, 'w', **profile) as dst:
    dst.write(tinitaly_10m_masked, 1)

# =============================================================================
# MASK COPERNICUS
# =============================================================================
copernicus_10m_masked = copernicus_10m.copy()
copernicus_10m_masked[~hull_mask] = NODATA

# Save masked Copernicus
copernicus_masked_path = RESULTS_DIR / "copernicus_10m_masked.tif"
with rasterio.open(copernicus_masked_path, 'w', **profile) as dst:
    dst.write(copernicus_10m_masked, 1)

# =============================================================================
# UPDATE REFERENCE DATASET PATHS TO MASKED VERSIONS
# =============================================================================
reference_dems['tinitaly_crop']['masked_path'] = tinitaly_masked_path
reference_dems['copernicus']['masked_path'] = copernicus_masked_path

# Store masked arrays in memory for quick access
tinitaly_10m = tinitaly_10m_masked
copernicus_10m = copernicus_10m_masked
# corine_10m = corine_10m_masked

In [None]:
# =============================================================================
# SAMPLE REFERENCE DEMS AT SAOCOM LOCATIONS
# =============================================================================
# Sample TINITALY at each SAOCOM point
tinitaly_heights = []
for idx, row in saocom_gdf.iterrows():
    row_idx, col_idx = rowcol(target_transform, row.geometry.x, row.geometry.y)
    if 0 <= row_idx < grid_height and 0 <= col_idx < grid_width:
        height = tinitaly_10m[row_idx, col_idx]
        tinitaly_heights.append(height if height != NODATA else np.nan)
    else:
        tinitaly_heights.append(np.nan)

saocom_gdf['tinitaly_height'] = tinitaly_heights

# Sample Copernicus at each SAOCOM point
copernicus_heights = []
for idx, row in saocom_gdf.iterrows():
    row_idx, col_idx = rowcol(target_transform, row.geometry.x, row.geometry.y)
    if 0 <= row_idx < grid_height and 0 <= col_idx < grid_width:
        height = copernicus_10m[row_idx, col_idx]
        copernicus_heights.append(height if height != NODATA else np.nan)
    else:
        copernicus_heights.append(np.nan)

saocom_gdf['copernicus_height'] = copernicus_heights

# Rename HEIGHT column for clarity
saocom_gdf['HEIGHT_RELATIVE'] = saocom_gdf['HEIGHT']

# =============================================================================
# CALIBRATE SAOCOM TO TINITALY (Method 1: Constant Offset)
# =============================================================================
# Filter to stable points: high coherence, valid reference data
stable_mask_tin = (
    (saocom_gdf['COHER'] >= 0.8) &
    (saocom_gdf['tinitaly_height'].notna()) &
    (saocom_gdf['HEIGHT_RELATIVE'].notna()) &
    (np.abs(saocom_gdf['HEIGHT_RELATIVE']) < 1000)  # Exclude extreme outliers
)

stable_points_tin = saocom_gdf[stable_mask_tin].copy()

# Calculate offset: difference between reference DEM and SAOCOM relative heights
height_diff_tin = stable_points_tin['tinitaly_height'] - stable_points_tin['HEIGHT_RELATIVE']
offset_tinitaly = np.median(height_diff_tin)  # Median for robustness

print(f"\nTINITALY Calibration:")
print(f"  Stable points used: {len(stable_points_tin):,}")
print(f"  Constant offset: {offset_tinitaly:.3f} m")
print(f"  Offset std dev: {np.std(height_diff_tin):.3f} m")

# Apply correction to all SAOCOM points
saocom_gdf['HEIGHT_ABSOLUTE_TIN'] = saocom_gdf['HEIGHT_RELATIVE'] + offset_tinitaly

# =============================================================================
# CALIBRATE SAOCOM TO COPERNICUS (Method 1: Constant Offset)
# =============================================================================
stable_mask_cop = (
    (saocom_gdf['COHER'] >= 0.8) &
    (saocom_gdf['copernicus_height'].notna()) &
    (saocom_gdf['HEIGHT_RELATIVE'].notna()) &
    (np.abs(saocom_gdf['HEIGHT_RELATIVE']) < 1000)
)

stable_points_cop = saocom_gdf[stable_mask_cop].copy()

height_diff_cop = stable_points_cop['copernicus_height'] - stable_points_cop['HEIGHT_RELATIVE']
offset_copernicus = np.median(height_diff_cop)

print(f"\nCOPERNICUS Calibration:")
print(f"  Stable points used: {len(stable_points_cop):,}")
print(f"  Constant offset: {offset_copernicus:.3f} m")
print(f"  Offset std dev: {np.std(height_diff_cop):.3f} m")

saocom_gdf['HEIGHT_ABSOLUTE_COP'] = saocom_gdf['HEIGHT_RELATIVE'] + offset_copernicus

# =============================================================================
# VALIDATION: CALCULATE RMSE AT STABLE POINTS
# =============================================================================
# TINITALY validation
residuals_tin = stable_points_tin['tinitaly_height'] - (stable_points_tin['HEIGHT_RELATIVE'] + offset_tinitaly)
rmse_tin = np.sqrt(np.mean(residuals_tin**2))

# Copernicus validation
residuals_cop = stable_points_cop['copernicus_height'] - (stable_points_cop['HEIGHT_RELATIVE'] + offset_copernicus)
rmse_cop = np.sqrt(np.mean(residuals_cop**2))

print(f"\nValidation Results:")
print(f"  TINITALY RMSE: {rmse_tin:.3f} m")
print(f"  Copernicus RMSE: {rmse_cop:.3f} m")
print(f"\nRecommendation: Use HEIGHT_ABSOLUTE_TIN (lower RMSE expected)")

In [None]:
# =============================================================================
# CREATE SAOCOM COVERAGE GRID
# =============================================================================
# Initialize coverage array matching the 10m grid
saocom_coverage = np.zeros((grid_height, grid_width), dtype=bool)

# Convert SAOCOM points to grid indices
saocom_rows, saocom_cols = rowcol(target_transform,
                                   saocom_gdf.geometry.x.values,
                                   saocom_gdf.geometry.y.values)

# Mark cells with SAOCOM data
valid_indices = ((saocom_rows >= 0) & (saocom_rows < grid_height) &
                 (saocom_cols >= 0) & (saocom_cols < grid_width))
saocom_coverage[saocom_rows[valid_indices], saocom_cols[valid_indices]] = True

# =============================================================================
# CALCULATE OVERALL VOID STATISTICS
# =============================================================================
# Study area mask (inside hull, excluding nodata)
study_area_mask = hull_mask

# Void mask (study area cells without SAOCOM data)
void_mask = study_area_mask & (~saocom_coverage)

n_total_cells = np.sum(study_area_mask)
n_occupied_cells = np.sum(study_area_mask & saocom_coverage)
n_void_cells = np.sum(void_mask)
void_percentage = 100 * n_void_cells / n_total_cells if n_total_cells > 0 else 0
print(void_percentage)


# =============================================================================
# SAVE VOID MASK AS RASTER
# =============================================================================
void_mask_path = RESULTS_DIR / "saocom_void_mask.tif"
profile_void = profile.copy()
profile_void['dtype'] = 'uint8'
profile_void['nodata'] = 255
void_raster = np.full((grid_height, grid_width), 255, dtype=np.uint8)
void_raster[study_area_mask] = 0  # Data area
void_raster[void_mask] = 1  # Void cells

with rasterio.open(void_mask_path, 'w', **profile_void) as dst:
    dst.write(void_raster, 1)

In [None]:
# =============================================================================
# LOAD REFERENCE DEM DATA (Already in memory from Cell 4)
# =============================================================================
tinitaly_data = tinitaly_10m.copy()
copernicus_data = copernicus_10m.copy()

# =============================================================================
# CALCULATE ELEVATION DIFFERENCE (TINITALY - COPERNICUS)
# =============================================================================
elevation_diff = tinitaly_data - copernicus_data

# =============================================================================
# CREATE VALID COMPARISON MASK
# =============================================================================
valid_mask = (tinitaly_data != NODATA) & (copernicus_data != NODATA)

# Extract valid data for statistics
# valid_pixels = np.sum(valid_mask)
valid_pixels = int(np.sum(valid_mask))
valid_diffs = elevation_diff[valid_mask]
valid_tinitaly = tinitaly_data[valid_mask]
valid_copernicus = copernicus_data[valid_mask]
print(valid_diffs)
# =============================================================================
# CALCULATE REFERENCE COMPARISON STATISTICS
# =============================================================================
ref_metrics = {
    'n_pixels': int(valid_pixels),
    'mean_diff': float(np.mean(valid_diffs)),
    'median_diff': float(np.median(valid_diffs)),
    'std_diff': float(np.std(valid_diffs)),
    'rmse': float(np.sqrt(np.mean(valid_diffs**2))),
    'mae': float(np.mean(np.abs(valid_diffs))),
    'nmad': float(1.4826 * np.median(np.abs(valid_diffs - np.median(valid_diffs)))),
    'min_diff': float(np.min(valid_diffs)),
    'max_diff': float(np.max(valid_diffs)),
    'correlation': float(np.corrcoef(valid_tinitaly, valid_copernicus)[0, 1])
}

# =============================================================================
# DEFINE EQUALITY TOLERANCE USING NMAD
# =============================================================================
# Use NMAD as statistical threshold for "roughly equal"
equal_tolerance = ref_metrics['nmad']
print(equal_tolerance)
# =============================================================================
# DIRECTIONAL COMPARISON GRIDS WITH EQUALITY BUFFER
# =============================================================================
# Where TINITALY significantly > Copernicus
tinitaly_higher_mask = (valid_mask) & (elevation_diff > equal_tolerance)
tinitaly_higher_data = np.full_like(elevation_diff, np.nan)
tinitaly_higher_data[tinitaly_higher_mask] = elevation_diff[tinitaly_higher_mask]

# Where TINITALY significantly < Copernicus
tinitaly_lower_mask = (valid_mask) & (elevation_diff < -equal_tolerance)
tinitaly_lower_data = np.full_like(elevation_diff, np.nan)
tinitaly_lower_data[tinitaly_lower_mask] = elevation_diff[tinitaly_lower_mask]

# Where TINITALY ≈ Copernicus (within tolerance)
roughly_equal_mask = (valid_mask) & (np.abs(elevation_diff) <= equal_tolerance)
roughly_equal_data = np.full_like(elevation_diff, np.nan)
roughly_equal_data[roughly_equal_mask] = elevation_diff[roughly_equal_mask]

# Pixel counts and percentages
higher_pixels = int(np.sum(tinitaly_higher_mask))
lower_pixels = int(np.sum(tinitaly_lower_mask))
equal_pixels = int(np.sum(roughly_equal_mask))

pct_higher = float(100 * higher_pixels / valid_pixels) if valid_pixels > 0 else 0.0
pct_lower = float(100 * lower_pixels / valid_pixels) if valid_pixels > 0 else 0.0
pct_equal = float(100 * equal_pixels / valid_pixels) if valid_pixels > 0 else 0.0

# =============================================================================
# HEIGHT STATISTICS COMPARISON
# =============================================================================
def calculate_height_stats(data, name):
    """Calculate comprehensive height statistics"""
    valid_data = data[~np.isnan(data)]

    if len(valid_data) == 0:
        return None

    stats = {
        'Dataset': name,
        'Count': len(valid_data),
        'Min': np.min(valid_data),
        'Max': np.max(valid_data),
        'Mean': np.mean(valid_data),
        'Median': np.median(valid_data),
        'Std Dev': np.std(valid_data),
        'Range': np.max(valid_data) - np.min(valid_data),
        'Q25': np.percentile(valid_data, 25),
        'Q75': np.percentile(valid_data, 75),
        'IQR': np.percentile(valid_data, 75) - np.percentile(valid_data, 25)
    }
    return stats

# Collect statistics
stats_list = []

# SAOCOM relative heights
stats_list.append(calculate_height_stats(
    saocom_gdf['HEIGHT_RELATIVE'].values,
    'SAOCOM (Relative)'
))

# TINITALY sampled at SAOCOM points
stats_list.append(calculate_height_stats(
    saocom_gdf['tinitaly_height'].values,
    'TINITALY (at SAOCOM pts)'
))

# Copernicus sampled at SAOCOM points
stats_list.append(calculate_height_stats(
    saocom_gdf['copernicus_height'].values,
    'Copernicus (at SAOCOM pts)'
))

# TINITALY full raster (within study area)
tinitaly_valid = tinitaly_10m[tinitaly_10m != NODATA]
stats_list.append(calculate_height_stats(
    tinitaly_valid,
    'TINITALY (Full Grid)'
))

# Copernicus full raster (within study area)
copernicus_valid = copernicus_10m[copernicus_10m != NODATA]
stats_list.append(calculate_height_stats(
    copernicus_valid,
    'Copernicus (Full Grid)'
))

# Create DataFrame
stats_df = pd.DataFrame(stats_list)

# Display with formatting
print("\n" + "="*90)
print("HEIGHT STATISTICS SUMMARY (all values in meters)")
print("="*90)
print(stats_df.to_string(index=False, float_format=lambda x: f'{x:.2f}'))
print("="*90)

# Additional comparison: SAOCOM vs Reference DEMs
print("\nDIFFERENCE STATISTICS (SAOCOM Relative - Reference DEM):")
print("-"*90)

# SAOCOM - TINITALY
diff_tin = saocom_gdf['HEIGHT_RELATIVE'] - saocom_gdf['tinitaly_height']
diff_tin_valid = diff_tin.dropna()
print(f"\nSAOCOM - TINITALY:")
print(f"  Mean difference: {diff_tin_valid.mean():+.3f} m")
print(f"  Median difference: {diff_tin_valid.median():+.3f} m")
print(f"  Std deviation: {diff_tin_valid.std():.3f} m")
print(f"  RMSE: {np.sqrt((diff_tin_valid**2).mean()):.3f} m")

# SAOCOM - Copernicus
diff_cop = saocom_gdf['HEIGHT_RELATIVE'] - saocom_gdf['copernicus_height']
diff_cop_valid = diff_cop.dropna()
print(f"\nSAOCOM - Copernicus:")
print(f"  Mean difference: {diff_cop_valid.mean():+.3f} m")
print(f"  Median difference: {diff_cop_valid.median():+.3f} m")
print(f"  Std deviation: {diff_cop_valid.std():.3f} m")
print(f"  RMSE: {np.sqrt((diff_cop_valid**2).mean()):.3f} m")

# TINITALY - Copernicus (reference comparison)
ref_diff = (tinitaly_10m[valid_mask] - copernicus_10m[valid_mask])
ref_diff_valid = ref_diff[~np.isnan(ref_diff)]
print(f"\nTINITALY - Copernicus (Reference Check):")
print(f"  Mean difference: {ref_diff_valid.mean():+.3f} m")
print(f"  Median difference: {np.median(ref_diff_valid):+.3f} m")
print(f"  Std deviation: {ref_diff_valid.std():.3f} m")
print(f"  RMSE: {np.sqrt((ref_diff_valid**2).mean()):.3f} m")

In [None]:
# -----------------------------------------------------------------------------
# 3. LOAD DBF LOOKUP TABLE
# -----------------------------------------------------------------------------
dbf_table = DBF(corine_dbf_path, load=True)
lookup_df = pd.DataFrame(iter(dbf_table))

# Create mapping dictionaries
value_to_code = dict(zip(lookup_df['Value'], lookup_df['CODE_18']))
value_to_label = dict(zip(lookup_df['Value'], lookup_df['LABEL3']))
code_to_label = dict(zip(lookup_df['CODE_18'], lookup_df['LABEL3']))

# -----------------------------------------------------------------------------
# 4. LOAD, MASK, AND REMAP CORINE (OPTIMIZED)
# -----------------------------------------------------------------------------
with rasterio.open(corine_path) as src:
    # Reproject hull to match CORINE CRS
    hull_corine_crs = hull_gdf.to_crs(src.crs)

    # Crop to study area using convex hull FIRST (faster processing)
    corine_raw, crop_transform = mask(src, hull_corine_crs.geometry, crop=True, filled=False)
    corine_raw = corine_raw[0]  # Extract from (1, h, w) to (h, w)

    corine_crs = src.crs
    corine_res = src.res
    corine_nodata = src.nodata if src.nodata is not None else 255
    corine_bounds = src.bounds

# Remap values: Value → CODE_18 (on cropped array)
corine_remapped = np.full_like(corine_raw, 0, dtype=np.uint16)
for value, code in value_to_code.items():
    corine_remapped[corine_raw == value] = code
corine_remapped[corine_raw == corine_nodata] = 0  # NoData = 0

# Save intermediate remapped version
corine_remapped_path = RESULTS_DIR / "corine_remapped_cropped.tif"
profile_remapped = {
    'driver': 'GTiff', 'dtype': 'uint16',
    'width': corine_remapped.shape[1], 'height': corine_remapped.shape[0],
    'count': 1, 'crs': corine_crs, 'transform': crop_transform,
    'nodata': 0, 'compress': 'lzw'
}
with rasterio.open(corine_remapped_path, 'w', **profile_remapped) as dst:
    dst.write(corine_remapped, 1)

# -----------------------------------------------------------------------------
# 5. RESAMPLE TO 10M GRID
# -----------------------------------------------------------------------------
corine_10m = np.full((grid_height, grid_width), 0, dtype=np.uint16)

with rasterio.open(corine_remapped_path) as src:
    reproject(
        source=rasterio.band(src, 1),
        destination=corine_10m,
        src_transform=src.transform,
        src_crs=src.crs,
        dst_transform=target_transform,
        dst_crs=TARGET_CRS,
        resampling=Resampling.nearest,  # Use nearest for categorical data
        src_nodata=0,
        dst_nodata=0
    )

# Save resampled version
corine_10m_path = RESULTS_DIR / "corine_10m.tif"
profile_10m = {
    'driver': 'GTiff', 'dtype': 'uint16',
    'width': grid_width, 'height': grid_height,
    'count': 1, 'crs': TARGET_CRS, 'transform': target_transform,
    'nodata': 0, 'compress': 'lzw'
}
with rasterio.open(corine_10m_path, 'w', **profile_10m) as dst:
    dst.write(corine_10m, 1)

# -----------------------------------------------------------------------------
# 6. MASK TO HULL
# -----------------------------------------------------------------------------
corine_10m_masked = corine_10m.copy()
corine_10m_masked[~hull_mask] = 0

# Save final masked version
corine_masked_path = RESULTS_DIR / "corine_10m_masked.tif"
with rasterio.open(corine_masked_path, 'w', **profile_10m) as dst:
    dst.write(corine_10m_masked, 1)

# Update working array
corine_10m = corine_10m_masked

# -----------------------------------------------------------------------------
# SUMMARY
# -----------------------------------------------------------------------------
unique_codes = np.unique(corine_10m[corine_10m > 0])
print(f"\nCORINE Processing Complete:")
print(f"  Original CRS: {corine_crs}")
print(f"  Unique classes: {len(unique_codes)}")
print(f"  Classes present: {sorted(unique_codes)}")
print(f"  Final resolution: {GRID_SIZE}m")
print(f"  Output path: {corine_masked_path}")
print(saocom_gdf)

In [None]:
# =============================================================================
# SIMPLE SPATIAL OVERLAP VISUALIZATION
# =============================================================================
import matplotlib.pyplot as plt
import rasterio
from rasterio.plot import show

fig, ax = plt.subplots(1, 1, figsize=(12, 10), facecolor='white')
ax.set_facecolor('white')

# 1. Plot original TINITALY DEM
with rasterio.open(tinitaly_path) as src:
    # Reproject to target CRS for comparison
    from rasterio.warp import transform_bounds

    # Get DEM bounds in target CRS
    dem_bounds_target = transform_bounds(src.crs, TARGET_CRS, *src.bounds)

    print(f"TINITALY bounds (original CRS): {src.bounds}")
    print(f"TINITALY bounds (UTM 32N): {dem_bounds_target}")
    print(f"TINITALY CRS: {src.crs}")

    # Read a downsampled version for quick plotting
    dem_data = src.read(1, out_shape=(
        int(src.height / 10),
        int(src.width / 10)
    ))

    # Plot DEM extent as a rectangle
    from matplotlib.patches import Rectangle
    dem_rect = Rectangle(
        (dem_bounds_target[0], dem_bounds_target[1]),
        dem_bounds_target[2] - dem_bounds_target[0],
        dem_bounds_target[3] - dem_bounds_target[1],
        linewidth=3, edgecolor='blue', facecolor='none',
        label='TINITALY Extent'
    )
    ax.add_patch(dem_rect)

# 2. Plot SAOCOM points
saocom_gdf.plot(ax=ax, markersize=1, color='red', alpha=0.5, label='SAOCOM Points')

# 3. Plot study area (convex hull)
hull_gdf.boundary.plot(ax=ax, color='green', linewidth=2, linestyle='--', label='Study Area Hull')

# Labels and formatting
ax.set_xlabel('UTM Easting (m)', fontsize=11)
ax.set_ylabel('UTM Northing (m)', fontsize=11)
ax.set_title('Spatial Coverage: SAOCOM vs TINITALY DEM', fontweight='bold', fontsize=14)
ax.legend(loc='best', fontsize=10)
ax.grid(True, alpha=0.3, color='gray')

# Add text box with extents
info_text = f"""SAOCOM Extent:
X: [{saocom_gdf.geometry.x.min():.0f}, {saocom_gdf.geometry.x.max():.0f}]
Y: [{saocom_gdf.geometry.y.min():.0f}, {saocom_gdf.geometry.y.max():.0f}]

TINITALY Extent (UTM 32N):
X: [{dem_bounds_target[0]:.0f}, {dem_bounds_target[2]:.0f}]
Y: [{dem_bounds_target[1]:.0f}, {dem_bounds_target[3]:.0f}]
"""
ax.text(0.02, 0.98, info_text, transform=ax.transAxes,
        fontsize=9, verticalalignment='top', fontfamily='monospace',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='black'))

plt.tight_layout()
plt.show()

# =============================================================================
# CHECK IF EXTENTS OVERLAP
# =============================================================================
print("\n=== OVERLAP CHECK ===")
overlap_x = not (saocom_gdf.geometry.x.max() < dem_bounds_target[0] or
                 saocom_gdf.geometry.x.min() > dem_bounds_target[2])
overlap_y = not (saocom_gdf.geometry.y.max() < dem_bounds_target[1] or
                 saocom_gdf.geometry.y.min() > dem_bounds_target[3])

print(f"X-axis overlap: {overlap_x}")
print(f"Y-axis overlap: {overlap_y}")
print(f"Full overlap: {overlap_x and overlap_y}")

if not (overlap_x and overlap_y):
    print("\n⚠️ NO OVERLAP DETECTED - SAOCOM data is outside TINITALY coverage!")
    print("You need a different TINITALY tile that covers this area.")


In [None]:
# =============================================================================
# COMPREHENSIVE REFERENCE DEM COMPARISON VISUALIZATION
# =============================================================================
fig, axes = plt.subplots(4, 2, figsize=(20, 28), facecolor='white')

extent = [xmin_grid, xmax_grid, ymin_grid, ymax_grid]

# Plot 1: TINITALY elevation
ax = axes[0, 0]
ax.set_facecolor('white')
tinitaly_display = np.ma.masked_where(tinitaly_data == NODATA, tinitaly_data)
cmap1 = plt.cm.terrain.copy()
cmap1.set_bad(color='white', alpha=0)
im1 = ax.imshow(tinitaly_display, cmap=cmap1, origin='upper', extent=extent)
hull_gdf.boundary.plot(ax=ax, color='darkred', linewidth=2.5, label='Study Area')
ax.set_title('TINITALY Elevation', fontweight='bold', fontsize=12, color='black')
ax.set_xlabel('UTM Easting (m)', color='black')
ax.set_ylabel('UTM Northing (m)', color='black')
ax.grid(True, color='black', alpha=0.3, linewidth=0.5)
ax.tick_params(colors='black')
cbar1 = plt.colorbar(im1, ax=ax, label='Elevation (m)', shrink=0.8)
cbar1.ax.yaxis.label.set_color('black')
cbar1.ax.tick_params(colors='black')
stats1 = f"""Min: {np.nanmin(tinitaly_display):.1f}m
Max: {np.nanmax(tinitaly_display):.1f}m
Mean: {np.nanmean(tinitaly_display):.1f}m
Std: {np.nanstd(tinitaly_display):.1f}m"""
ax.text(0.02, 0.98, stats1, transform=ax.transAxes, fontsize=9, verticalalignment='top',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='black'))

# Plot 2: Copernicus elevation
ax = axes[0, 1]
ax.set_facecolor('white')
copernicus_display = np.ma.masked_where(copernicus_data == NODATA, copernicus_data)
cmap2 = plt.cm.terrain.copy()
cmap2.set_bad(color='white', alpha=0)
im2 = ax.imshow(copernicus_display, cmap=cmap2, origin='upper', extent=extent)
hull_gdf.boundary.plot(ax=ax, color='darkred', linewidth=2.5, label='Study Area')
ax.set_title('Copernicus Elevation', fontweight='bold', fontsize=12, color='black')
ax.set_xlabel('UTM Easting (m)', color='black')
ax.set_ylabel('UTM Northing (m)', color='black')
ax.grid(True, color='black', alpha=0.3, linewidth=0.5)
ax.tick_params(colors='black')
cbar2 = plt.colorbar(im2, ax=ax, label='Elevation (m)', shrink=0.8)
cbar2.ax.yaxis.label.set_color('black')
cbar2.ax.tick_params(colors='black')
stats2 = f"""Min: {np.nanmin(copernicus_display):.1f}m
Max: {np.nanmax(copernicus_display):.1f}m
Mean: {np.nanmean(copernicus_display):.1f}m
Std: {np.nanstd(copernicus_display):.1f}m"""
ax.text(0.02, 0.98, stats2, transform=ax.transAxes, fontsize=9, verticalalignment='top',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='black'))

# Plot 3: Difference map (full)
ax = axes[1, 0]
ax.set_facecolor('white')
diff_display = np.ma.masked_where(~valid_mask, elevation_diff)
diff_limit = np.percentile(np.abs(valid_diffs), 95)
cmap3 = plt.cm.coolwarm.copy()
cmap3.set_bad(color='white', alpha=0)
im3 = ax.imshow(diff_display, cmap=cmap3, origin='upper', extent=extent,
                vmin=-diff_limit, vmax=diff_limit)
hull_gdf.boundary.plot(ax=ax, color='darkred', linewidth=2.5, label='Study Area')
ax.set_title(f'Elevation Difference\n(TINITALY - Copernicus)', fontweight='bold', fontsize=12, color='black')
ax.set_xlabel('UTM Easting (m)', color='black')
ax.set_ylabel('UTM Northing (m)', color='black')
ax.grid(True, color='black', alpha=0.3, linewidth=0.5)
ax.tick_params(colors='black')
cbar3 = plt.colorbar(im3, ax=ax, label='Difference (m)', shrink=0.8)
cbar3.ax.yaxis.label.set_color('black')
cbar3.ax.tick_params(colors='black')
stats3 = f"""Pixels: {valid_pixels:,}
Mean: {ref_metrics['mean_diff']:+.2f}m
RMSE: {ref_metrics['rmse']:.2f}m
NMAD: {ref_metrics['nmad']:.2f}m
MAE: {ref_metrics['mae']:.2f}m
Std: {ref_metrics['std_diff']:.2f}m
Corr: {ref_metrics['correlation']:.3f}
Range: [{ref_metrics['min_diff']:.1f}, {ref_metrics['max_diff']:.1f}]m"""
ax.text(0.02, 0.98, stats3, transform=ax.transAxes, fontsize=8, verticalalignment='top',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='black'))

# Plot 4: Statistics summary
ax = axes[1, 1]
ax.set_facecolor('white')
le68 = np.percentile(np.abs(valid_diffs), 68.27)
le90 = np.percentile(np.abs(valid_diffs), 90)
le95 = np.percentile(np.abs(valid_diffs), 95)
stats_text = f"""REFERENCE DEM COMPARISON

Valid Pixels: {valid_pixels:,}

CRITICAL METRICS:
Mean Error (Bias): {ref_metrics['mean_diff']:+.2f} m
RMSE: {ref_metrics['rmse']:.2f} m
NMAD (robust): {ref_metrics['nmad']:.2f} m
Std Deviation: {ref_metrics['std_diff']:.2f} m

SECONDARY METRICS:
MAE: {ref_metrics['mae']:.2f} m
Correlation: {ref_metrics['correlation']:.4f}
LE68: {le68:.2f} m
LE90: {le90:.2f} m
LE95: {le95:.2f} m
Median: {ref_metrics['median_diff']:+.2f} m

DIRECTIONAL BREAKDOWN:
(Tolerance: ±{equal_tolerance:.2f} m)
TINITALY Higher: {higher_pixels:,} ({pct_higher:.1f}%)
Copernicus Higher: {lower_pixels:,} ({pct_lower:.1f}%)
Roughly Equal: {equal_pixels:,} ({pct_equal:.1f}%)

Range: {ref_metrics['min_diff']:+.1f} to {ref_metrics['max_diff']:+.1f} m
"""
ax.text(0.05, 0.5, stats_text, transform=ax.transAxes,
        fontfamily='monospace', fontsize=9, verticalalignment='center', color='black')
ax.axis('off')
ax.set_title('Summary Statistics', fontweight='bold', fontsize=12, color='black')

# Plot 5: Where TINITALY > Copernicus
ax = axes[2, 0]
ax.set_facecolor('white')
cmap5 = plt.cm.YlOrRd.copy()
cmap5.set_bad(color='white', alpha=0)
im5 = ax.imshow(tinitaly_higher_data, cmap=cmap5, origin='upper', extent=extent,
                vmin=0, vmax=np.nanmax(tinitaly_higher_data))
hull_gdf.boundary.plot(ax=ax, color='darkred', linewidth=2.5, label='Study Area')
ax.set_title(f'TINITALY > Copernicus', fontweight='bold', fontsize=12, color='black')
ax.set_xlabel('UTM Easting (m)', color='black')
ax.set_ylabel('UTM Northing (m)', color='black')
ax.grid(True, color='black', alpha=0.3, linewidth=0.5)
ax.tick_params(colors='black')
cbar5 = plt.colorbar(im5, ax=ax, label='Difference (m)', shrink=0.8)
cbar5.ax.yaxis.label.set_color('black')
cbar5.ax.tick_params(colors='black')
higher_vals = tinitaly_higher_data[~np.isnan(tinitaly_higher_data)]
stats5 = f"""Pixels: {higher_pixels:,} ({pct_higher:.1f}%)
Mean: {np.mean(higher_vals):.2f}m
Std: {np.std(higher_vals):.2f}m
RMSE: {np.sqrt(np.mean(higher_vals**2)):.2f}m
Max: {np.max(higher_vals):.2f}m"""
ax.text(0.02, 0.98, stats5, transform=ax.transAxes, fontsize=8, verticalalignment='top',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='black'))

# Plot 6: Where TINITALY < Copernicus
ax = axes[2, 1]
ax.set_facecolor('white')
cmap6 = plt.cm.Blues_r.copy()
cmap6.set_bad(color='white', alpha=0)
im6 = ax.imshow(tinitaly_lower_data, cmap=cmap6, origin='upper', extent=extent,
                vmin=np.nanmin(tinitaly_lower_data), vmax=0)
hull_gdf.boundary.plot(ax=ax, color='darkred', linewidth=2.5, label='Study Area')
ax.set_title(f'Copernicus > TINITALY', fontweight='bold', fontsize=12, color='black')
ax.set_xlabel('UTM Easting (m)', color='black')
ax.set_ylabel('UTM Northing (m)', color='black')
ax.grid(True, color='black', alpha=0.3, linewidth=0.5)
ax.tick_params(colors='black')
cbar6 = plt.colorbar(im6, ax=ax, label='Difference (m)', shrink=0.8)
cbar6.ax.yaxis.label.set_color('black')
cbar6.ax.tick_params(colors='black')
lower_vals = tinitaly_lower_data[~np.isnan(tinitaly_lower_data)]
stats6 = f"""Pixels: {lower_pixels:,} ({pct_lower:.1f}%)
Mean: {np.mean(lower_vals):.2f}m
Std: {np.std(lower_vals):.2f}m
RMSE: {np.sqrt(np.mean(lower_vals**2)):.2f}m
Min: {np.min(lower_vals):.2f}m"""
ax.text(0.02, 0.98, stats6, transform=ax.transAxes, fontsize=8, verticalalignment='top',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='black'))

# Plot 7: Where roughly equal
ax = axes[3, 0]
ax.set_facecolor('white')
cmap7 = plt.cm.Greens.copy()
cmap7.set_bad(color='white', alpha=0)
im7 = ax.imshow(roughly_equal_data, cmap=cmap7, origin='upper', extent=extent,
                vmin=-equal_tolerance, vmax=equal_tolerance)
hull_gdf.boundary.plot(ax=ax, color='darkred', linewidth=2.5, label='Study Area')
ax.set_title(f'Roughly Equal (±{equal_tolerance:.2f}m)', fontweight='bold', fontsize=12, color='black')
ax.set_xlabel('UTM Easting (m)', color='black')
ax.set_ylabel('UTM Northing (m)', color='black')
ax.grid(True, color='black', alpha=0.3, linewidth=0.5)
ax.tick_params(colors='black')
cbar7 = plt.colorbar(im7, ax=ax, label='Difference (m)', shrink=0.8)
cbar7.ax.yaxis.label.set_color('black')
cbar7.ax.tick_params(colors='black')
equal_vals = roughly_equal_data[~np.isnan(roughly_equal_data)]
stats7 = f"""Pixels: {equal_pixels:,} ({pct_equal:.1f}%)
Mean: {np.mean(equal_vals):.2f}m
Std: {np.std(equal_vals):.2f}m
RMSE: {np.sqrt(np.mean(equal_vals**2)):.2f}m
MAE: {np.mean(np.abs(equal_vals)):.2f}m"""
ax.text(0.02, 0.98, stats7, transform=ax.transAxes, fontsize=8, verticalalignment='top',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='black'))

# # Plot 8: Histogram of differences
# ax = axes[3, 1]
# ax.set_facecolor('white')
# ax.hist(valid_diffs, bins=100, alpha=0.7, color='steelblue', edgecolor='black')
# ax.set_xlim(valid_diffs.min() * 1.05, valid_diffs.max() * 1.05)
# ax.axvline(0, color='red', linestyle='--', linewidth=2, label='Zero')
# ax.axvline(ref_metrics['mean_diff'], color='green', linestyle='-', linewidth=2,
#            label=f'Mean: {ref_metrics["mean_diff"]:+.2f}m')
# ax.axvline(equal_tolerance, color='orange', linestyle='--', linewidth=1.5,
#            label=f'±{equal_tolerance:.2f}m')
# ax.axvline(-equal_tolerance, color='orange', linestyle='--', linewidth=1.5)
# ax.set_xlabel('Elevation Difference (m)', color='black')
# ax.set_ylabel('Frequency', color='black')
# ax.set_title('Difference Distribution', fontweight='bold', fontsize=12, color='black')
# ax.tick_params(colors='black')
# ax.legend(loc='upper right', fontsize=9)
# ax.grid(True, alpha=0.3, color='black', linewidth=0.5)
# for spine in ax.spines.values():
#     spine.set_edgecolor('black')
# ax.set_yscale('log')
# plt.tight_layout()
# plt.show()
# Plot 8: Histogram of differences
ax = axes[3, 1]
ax.set_facecolor('white')

# Create histogram
n, bins, patches = ax.hist(valid_diffs, bins=25, alpha=0.7,
                           color='steelblue', edgecolor='black')

# Add reference lines
ax.axvline(0, color='red', linestyle='--', linewidth=2, label='Zero')
ax.axvline(ref_metrics['mean_diff'], color='green', linestyle='-', linewidth=2,
           label=f'Mean: {ref_metrics["mean_diff"]:+.2f}m')
ax.axvline(equal_tolerance, color='orange', linestyle='--', linewidth=1.5,
           label=f'±{equal_tolerance:.2f}m')
ax.axvline(-equal_tolerance, color='orange', linestyle='--', linewidth=1.5)

# SET AXIS LIMITS TO DATA EXTENT
x_min = float(valid_diffs.min())
x_max = float(valid_diffs.max())
x_padding = (x_max - x_min) * 0.02
ax.set_xlim(x_min - x_padding, x_max + x_padding)

# Labels and styling
ax.set_xlabel('Elevation Difference (m)', color='black', fontsize=11)
ax.set_ylabel('Frequency', color='black', fontsize=11)
ax.set_title('Difference Distribution', fontweight='bold', fontsize=12, color='black')
ax.tick_params(colors='black')
ax.set_yscale('log')
ax.legend(loc='upper right', fontsize=9)
ax.grid(True, alpha=0.3, color='black', linewidth=0.5)
for spine in ax.spines.values():
    spine.set_edgecolor('black')

# =============================================================================
# 1. PREPARE DIFFERENCE DATA
# =============================================================================
# Calculate differences using calibrated SAOCOM heights
saocom_gdf['diff_tinitaly'] = saocom_gdf['HEIGHT_ABSOLUTE_TIN'] - saocom_gdf['tinitaly_height']
saocom_gdf['diff_copernicus'] = saocom_gdf['HEIGHT_ABSOLUTE_COP'] - saocom_gdf['copernicus_height']

# Create coherence bins if not already present
if 'coherence_bin' not in saocom_gdf.columns:
    coherence_bins = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    coherence_labels = [f"{coherence_bins[i]:.1f}-{coherence_bins[i+1]:.1f}"
                        for i in range(len(coherence_bins)-1)]
    saocom_gdf['coherence_bin'] = pd.cut(saocom_gdf['COHER'],
                                         bins=coherence_bins,
                                         labels=coherence_labels,
                                         include_lowest=True)

# =============================================================================
# 2. BASIC VIOLIN PLOTS - ALL REFERENCE COMPARISONS
# =============================================================================
fig, ax = plt.subplots(1, 1, figsize=(12, 8), facecolor='white')

# Prepare data
plot_data = pd.DataFrame({
    'SAOCOM - TINITALY': saocom_gdf['diff_tinitaly'],
    'SAOCOM - Copernicus': saocom_gdf['diff_copernicus']
})

# Create violin plot
parts = ax.violinplot([plot_data['SAOCOM - TINITALY'].dropna(),
                       plot_data['SAOCOM - Copernicus'].dropna()],
                      positions=[1, 2],
                      showmeans=True,
                      showmedians=True,
                      showextrema=True)

padding = (x_max - x_min) * 0.05
plt.xlim(x_min - padding, x_max + padding)
# Color the violins
colors = ['#4472C4', '#ED7D31']
for pc, color in zip(parts['bodies'], colors):
    pc.set_facecolor(color)
    pc.set_alpha(0.7)
    pc.set_edgecolor('black')

# Style other elements
for partname in ('cbars', 'cmins', 'cmaxes', 'cmedians', 'cmeans'):
    if partname in parts:
        parts[partname].set_edgecolor('black')
        parts[partname].set_linewidth(1.5)

ax.axhline(y=0, color='red', linestyle='--', linewidth=1.5, alpha=0.7, label='Zero')
ax.set_xticks([1, 2])
ax.set_xticklabels(['SAOCOM -\nTINITALY', 'SAOCOM -\nCopernicus'], fontsize=11)
ax.set_ylabel('Elevation Difference (m)', fontsize=12)
ax.set_title('Distribution of Elevation Differences', fontweight='bold', fontsize=14)
ax.grid(axis='y', linestyle='--', alpha=0.3)
ax.legend()

# Add statistics
for i, col in enumerate(['SAOCOM - TINITALY', 'SAOCOM - Copernicus'], 1):
    data = plot_data[col].dropna()
    stats_text = f"n={len(data):,}\nμ={data.mean():.2f}m\nσ={data.std():.2f}m"
    ax.text(i, data.max()*0.9, stats_text, ha='center', fontsize=9,
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

plt.tight_layout()
plt.show()

# =============================================================================
# 3. VIOLIN PLOTS BY COHERENCE BINS
# =============================================================================
fig, axes = plt.subplots(1, 2, figsize=(16, 7), facecolor='white')

for idx, (col, title) in enumerate([('diff_tinitaly', 'SAOCOM - TINITALY'),
                                     ('diff_copernicus', 'SAOCOM - Copernicus')]):
    ax = axes[idx]

    # Filter data with valid coherence bins and differences
    plot_df = saocom_gdf[saocom_gdf['coherence_bin'].notna() &
                         saocom_gdf[col].notna()].copy()

    if len(plot_df) > 0:
        sns.violinplot(x='coherence_bin', y=col, data=plot_df,
                      inner='quartile', palette='viridis', ax=ax)
        y_min = plot_df[col].min()
        y_max = plot_df[col].max()
        padding = (y_max - y_min) * 0.05
        ax.set_ylim(y_min - padding, y_max + padding)
        ax.axhline(y=0, color='red', linestyle='--', linewidth=1.5, alpha=0.7)
        ax.set_xlabel('Coherence Bin', fontsize=11)
        ax.set_ylabel('Difference (m)', fontsize=11)
        ax.set_title(f'{title} by Coherence', fontweight='bold', fontsize=12)
        ax.grid(axis='y', linestyle='--', alpha=0.3)

        # Add sample counts
        bin_counts = plot_df['coherence_bin'].value_counts().sort_index()
        labels = [f"{label}\n(n={bin_counts.get(label, 0):,})"
                 for label in coherence_labels]
        ax.set_xticklabels(labels, rotation=45, ha='right', fontsize=9)

plt.tight_layout()
plt.show()

# =============================================================================
# 4. SUMMARY STATISTICS TABLE
# =============================================================================
summary_stats = []

for comparison in ['diff_tinitaly', 'diff_copernicus']:
    data = saocom_gdf[comparison].dropna()
    name = 'SAOCOM - TINITALY' if 'tinitaly' in comparison else 'SAOCOM - Copernicus'

    summary_stats.append({
        'Comparison': name,
        'N Points': f"{len(data):,}",
        'Mean': f"{data.mean():+.2f} m",
        'Median': f"{data.median():+.2f} m",
        'Std Dev': f"{data.std():.2f} m",
        'RMSE': f"{np.sqrt((data**2).mean()):.2f} m",
        'Range': f"[{data.min():.1f}, {data.max():.1f}] m"
    })

summary_df = pd.DataFrame(summary_stats)
print("\n" + "="*80)
print("VIOLIN PLOT SUMMARY STATISTICS")
print("="*80)
print(summary_df.to_string(index=False))
print("="*80)




In [None]:
# =============================================================================
# PRE-REQUISITE: CALCULATE HEIGHT RESIDUALS
# (This step was missing/unexecuted from the previous cell sequence)
# =============================================================================
# Residual = Calibrated SAOCOM Height - Reference DEM Height
saocom_gdf['diff_tinitaly'] = saocom_gdf['HEIGHT_ABSOLUTE_TIN'] - saocom_gdf['tinitaly_height']
# saocom_gdf['diff_copernicus'] = saocom_gdf['HEIGHT_ABSOLUTE_COP'] - saocom_gdf['copernicus_height'] # Not needed for this table

# =============================================================================
# 1. SPATIAL SAMPLE CORINE LAND COVER AT SAOCOM POINTS
# =============================================================================
# The corine_10m raster is already loaded, reprojected, and masked (Cell 202).

corine_codes = []
# Assuming the global variables target_transform, grid_height, grid_width, corine_10m, and NODATA are defined in previous cells
for idx, row in saocom_gdf.iterrows():
    # Use the same transform and grid dimensions as defined in Cell 200
    row_idx, col_idx = rowcol(target_transform, row.geometry.x, row.geometry.y)

    if 0 <= row_idx < grid_height and 0 <= col_idx < grid_width:
        code = corine_10m[row_idx, col_idx]
        # Skip NoData/Masked value (255)
        corine_codes.append(code if code != 255 else 0)
    else:
        corine_codes.append(0) # 0 represents NoData/outside study area

saocom_gdf['corine_code'] = corine_codes

# Filter out points outside the valid CORINE area (where code is 0)
saocom_lc_analysis = saocom_gdf[saocom_gdf['corine_code'] != 0].copy()

# =============================================================================
# 2. CALCULATE ROBUST STATISTICS BY LAND COVER CLASS
# =============================================================================

def nmad(data):
    """Normalized Median Absolute Deviation (robust measure of spread)"""
    return 1.4826 * np.median(np.abs(data - np.median(data)))

# Calculate stats for the recommended residual: SAOCOM (calibrated to TINITALY) - TINITALY DEM
lc_height_stats = saocom_lc_analysis.groupby('corine_code')['diff_tinitaly'].agg([
    'count',
    'median',
    'mean',
    'std',
    nmad
]).reset_index()

# Rename columns for clarity
lc_height_stats.rename(columns={
    'count': 'N_Points',
    'median': 'Median_Diff_m',
    'mean': 'Mean_Diff_m',
    'std': 'Std_Dev_m',
    'nmad': 'NMAD_m'
}, inplace=True)

# Add land cover label for interpretability
lc_height_stats['LC_Label'] = lc_height_stats['corine_code'].map(CORINE_CLASSES)

# Reorder and filter for classes with enough samples (N > 50)
MIN_SAMPLES = 50
lc_height_stats_filtered = lc_height_stats[lc_height_stats['N_Points'] >= MIN_SAMPLES]
lc_height_stats_filtered = lc_height_stats_filtered.sort_values('LC_Label', ascending=True)

# =============================================================================
# 3. DISPLAY RESULTS
# =============================================================================

print("\n" + "="*100)
print(f"HEIGHT RESIDUAL STATISTICS by CORINE Land Cover (N > {MIN_SAMPLES})")
print("(Residual = Calibrated SAOCOM Height - TINITALY Reference DEM)")
print("="*100)

display_cols = ['corine_code', 'LC_Label', 'N_Points', 'Median_Diff_m', 'NMAD_m', 'Mean_Diff_m', 'Std_Dev_m']

# Print the filtered, robustly sorted table
print(lc_height_stats_filtered[display_cols].to_string(
    index=False,
    float_format=lambda x: f'{x:+.2f}' if 'Diff' in lc_height_stats_filtered.columns.tolist() else f'{x:.2f}', # Generic float formatting
    formatters={'N_Points': '{:,}'.format,
                'Median_Diff_m': '{:+.2f} m'.format,
                'NMAD_m': '{:.2f} m'.format,
                'Mean_Diff_m': '{:+.2f} m'.format,
                'Std_Dev_m': '{:.2f} m'.format}
))

print("="*100)

# Store the filtered results for later use in plotting/reporting
lc_height_stats_final = lc_height_stats_filtered.copy()

# =============================================================================
# 1. SETUP AND DEFINITIONS (from previous cells)
# =============================================================================
# Global variables assumed defined:
# corine_10m (masked CLC raster), study_area_mask (valid area inside hull),
# saocom_coverage (boolean array: True where SAOCOM data exists),
# void_mask (study_area_mask & ~saocom_coverage), GRID_SIZE (10m)
# CORINE_CLASSES (LC code lookup)

# Recalculate global void stats for context
n_total_cells = np.sum(study_area_mask)
n_void_cells = np.sum(void_mask)
void_percentage_global = 100 * n_void_cells / n_total_cells if n_total_cells > 0 else 0

# Get unique, valid CORINE codes within the study area
unique_lc_codes = np.unique(corine_10m[study_area_mask])
unique_lc_codes = unique_lc_codes[unique_lc_codes != 0] # Filter out 0/NoData

# =============================================================================
# 2. VOID ANALYSIS BY LAND COVER CLASS
# =============================================================================
void_stats_by_lc = []
cell_area_km2 = (GRID_SIZE / 1000.0) ** 2 # 0.0001 km^2

for lc_code in unique_lc_codes:
    # Mask for this land cover class within the study area
    lc_mask = study_area_mask & (corine_10m == lc_code)

    # Total cells of this land cover
    total_lc_cells = np.sum(lc_mask)

    if total_lc_cells == 0:
        continue

    # Void cells within this land cover
    void_lc_cells = np.sum(lc_mask & void_mask)

    # METRIC 1: What % of this land cover is void? (Key Metric for coverage performance)
    pct_of_lc_that_is_void = 100 * void_lc_cells / total_lc_cells

    # METRIC 2: What % of total voids is this land cover? (Key Metric for contribution)
    pct_of_total_voids = 100 * void_lc_cells / n_void_cells if n_void_cells > 0 else 0

    void_stats_by_lc.append({
        'corine_code': lc_code,
        'label': CORINE_CLASSES.get(lc_code, f'Unknown_{lc_code}'),
        'total_cells': total_lc_cells,
        'void_cells': void_lc_cells,
        'Area_km2': total_lc_cells * cell_area_km2,
        'Pct_LC_is_Void': pct_of_lc_that_is_void,
        'Pct_of_Total_Voids': pct_of_total_voids,
    })

# Create DataFrame
void_stats_df = pd.DataFrame(void_stats_by_lc)

# =============================================================================
# 3. DISPLAY RESULTS
# =============================================================================
# Filter for significant land cover classes (e.g., > 1 km2 area)
MIN_AREA_KM2 = 1.0
void_stats_filtered = void_stats_df[void_stats_df['Area_km2'] >= MIN_AREA_KM2].copy()

# Sort by the primary metric: Percentage of the LC class that is void
void_stats_filtered = void_stats_filtered.sort_values('Pct_LC_is_Void', ascending=False)

print("\n" + "="*120)
print(f"VOID ANALYSIS by CORINE Land Cover (Area > {MIN_AREA_KM2:.1f} km²)")
print(f"Overall Void Percentage (Study Area): {void_percentage_global:.2f}%")
print("="*120)

display_cols = ['corine_code', 'label', 'Area_km2', 'void_cells', 'Pct_LC_is_Void', 'Pct_of_Total_Voids']

# Print the filtered table
print(void_stats_filtered[display_cols].to_string(
    index=False,
    float_format=lambda x: f'{x:.2f}',
    formatters={'Area_km2': '{:.1f} km²'.format,
                'void_cells': '{:,}'.format,
                'Pct_LC_is_Void': '{:.2f} %'.format,
                'Pct_of_Total_Voids': '{:.2f} %'.format}
))

print("="*120)

# Store the filtered results for later use in plotting/reporting
lc_void_stats_final = void_stats_filtered.copy()

In [None]:


# =============================================================================
# 1. PREPARE DATA FOR PLOTTING
# =============================================================================
# Use the full saocom_lc_analysis DataFrame which contains both the residuals
# ('diff_tinitaly') and the sampled land cover codes ('corine_code').

# Define major land cover groups for better visualization (CLC Level 1)
def get_clc_level1(code):
    """Maps CLC Level 3 code to Level 1 category"""
    if 100 <= code < 200: return '1. Artificial Surfaces'
    if 200 <= code < 300: return '2. Agricultural Areas'
    if 300 <= code < 400: return '3. Forest & Semi-Natural Areas'
    if 400 <= code < 500: return '4. Wetlands'
    if 500 <= code < 600: return '5. Water Bodies'
    return 'Other'

# Add Level 1 categories to the analysis DataFrame
saocom_lc_analysis['LC_Level_1'] = saocom_lc_analysis['corine_code'].apply(get_clc_level1)
saocom_lc_analysis['LC_Label'] = saocom_lc_analysis['corine_code'].map(CORINE_CLASSES)

# Filter for the most common Level 3 classes (using the N_Points filter from Step 1)
common_codes = lc_height_stats_final['corine_code'].unique()
plot_df_L3 = saocom_lc_analysis[saocom_lc_analysis['corine_code'].isin(common_codes)].copy()

# Filter extreme outliers for better plot scaling (e.g., 99th percentile)
q_low = plot_df_L3['diff_tinitaly'].quantile(0.005)
q_high = plot_df_L3['diff_tinitaly'].quantile(0.995)
plot_df_L3_filtered = plot_df_L3[(plot_df_L3['diff_tinitaly'] >= q_low) &
                                (plot_df_L3['diff_tinitaly'] <= q_high)]

# Sort the categories by the NMAD metric (best to worst performance)
nmad_order = lc_height_stats_final.sort_values('LC_Label', ascending=False)['LC_Label'].tolist()
plot_df_L3_filtered['LC_Label'] = pd.Categorical(
    plot_df_L3_filtered['LC_Label'],
    categories=nmad_order,
    ordered=True
)



In [None]:
# =============================================================================
# SENTINEL-2 RGB PREPARATION
# =============================================================================

# File discovery
sentinel_files = list((DATA_DIR / "sentinel_data").glob("*.tif"))
if not sentinel_files:
    raise FileNotFoundError("No Sentinel files found in sentinel_data directory")

# Load Sentinel bands (assuming separate R, G, B files or multi-band)
with rasterio.open(sentinel_files[0]) as src:
    sentinel_count = src.count

    if sentinel_count >= 3:
        # Multi-band file - read RGB bands
        sentinel_r = src.read(1)  # Band 1 (Red)
        sentinel_g = src.read(2)  # Band 2 (Green)
        sentinel_b = src.read(3)  # Band 3 (Blue)
        sentinel_transform_orig = src.transform
        sentinel_crs = src.crs
    else:
        # Single band files - need to find R, G, B separately
        r_file = next((f for f in sentinel_files if 'B04' in f.name or 'red' in f.name.lower()), None)
        g_file = next((f for f in sentinel_files if 'B03' in f.name or 'green' in f.name.lower()), None)
        b_file = next((f for f in sentinel_files if 'B02' in f.name or 'blue' in f.name.lower()), None)

        if not all([r_file, g_file, b_file]):
            raise FileNotFoundError("Could not find RGB bands in Sentinel files")

        with rasterio.open(r_file) as r_src:
            sentinel_r = r_src.read(1)
            sentinel_transform_orig = r_src.transform
            sentinel_crs = r_src.crs
        with rasterio.open(g_file) as g_src:
            sentinel_g = g_src.read(1)
        with rasterio.open(b_file) as b_src:
            sentinel_b = b_src.read(1)

# Resample each band to 10m grid
sentinel_r_10m = np.zeros((grid_height, grid_width), dtype=np.float32)
sentinel_g_10m = np.zeros((grid_height, grid_width), dtype=np.float32)
sentinel_b_10m = np.zeros((grid_height, grid_width), dtype=np.float32)

for band_src, band_dst in [(sentinel_r, sentinel_r_10m),
                            (sentinel_g, sentinel_g_10m),
                            (sentinel_b, sentinel_b_10m)]:
    reproject(
        source=band_src,
        destination=band_dst,
        src_transform=sentinel_transform_orig,
        src_crs=sentinel_crs,
        dst_transform=target_transform,
        dst_crs=TARGET_CRS,
        resampling=Resampling.bilinear
    )

# Stack into RGB array
sentinel_rgb = np.stack([sentinel_r_10m, sentinel_g_10m, sentinel_b_10m], axis=2)

# Mask to study area
sentinel_rgb[~hull_mask] = 0

# Normalize to 0-1 for display (using 2-98 percentile stretch for contrast)
sentinel_rgb_norm = np.zeros_like(sentinel_rgb, dtype=np.float32)
for i in range(3):
    band = sentinel_rgb[:, :, i]
    valid_pixels = band[hull_mask]

    if len(valid_pixels) > 0:
        p2, p98 = np.percentile(valid_pixels[valid_pixels > 0], [2, 98])
        band_norm = np.clip((band - p2) / (p98 - p2), 0, 1)
        sentinel_rgb_norm[:, :, i] = band_norm

print(f"\nSentinel-2 RGB Prepared:")
print(f"  Shape: {sentinel_rgb_norm.shape}")
print(f"  Resolution: {GRID_SIZE}m")
print(f"  Value range: [{sentinel_rgb_norm.min():.3f}, {sentinel_rgb_norm.max():.3f}]")

In [None]:
# =============================================================================
# 2. GENERATE VIOLIN PLOT (Level 3 - Detailed Performance)
# =============================================================================
plt.figure(figsize=(14, 8), facecolor='white')

# Use violin plot for distribution shape and box plot for robust statistics
sns.violinplot(
    x='diff_tinitaly',
    y='LC_Label',
    data=plot_df_L3_filtered.sort_values('LC_Label', ascending=False), # Plotting worst (top) to best (bottom)
    inner='quartile', # Show quartiles
    palette='Spectral_r',
    orient='h',
    linewidth=1.0,
    cut=0 # Don't extend beyond data points
)

# Add a vertical line at zero error
plt.axvline(0, color='red', linestyle='--', linewidth=1.5, alpha=0.7)

# Set labels and title
plt.title(
    f'Distribution of SAOCOM Height Residuals by Land Cover (CLC Level 3)',
    fontweight='bold',
    fontsize=18  # Increased from 14
)
plt.xlabel('Height Residual (Calibrated SAOCOM - TINITALY DEM) [m]', fontsize=14)  # Increased from 11
plt.ylabel('CORINE Land Cover Class (Ordered by Category)', fontsize=14)  # Increased from 11
plt.grid(axis='x', linestyle='--', alpha=0.5)

# Increase tick label sizes
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

# Add NMAD and Median labels
for i, label in enumerate(nmad_order[::-1]):
    stats = lc_height_stats_final[lc_height_stats_final['LC_Label'] == label].iloc[0]
    nmad_m = stats['NMAD_m']
    median_diff = stats['Median_Diff_m']
    # Place text annotation to the right of the plot
    plt.text(q_high, i,
             f'Med: {median_diff:+.2f}m | NMAD: {nmad_m:.2f}m',
             verticalalignment='center',
             horizontalalignment='right',
             fontsize=11,  # Increased from 9
             bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7)
            )

plt.tight_layout()
plt.show()
print("Violin Plot for Level 3 Land Cover classes generated successfully.")

# =============================================================================
# COPERNICUS STATISTICS AND VIOLIN PLOT (Level 3)
# =============================================================================

# 1. Calculate statistics for Copernicus residuals by land cover
lc_height_stats_cop = saocom_lc_analysis.groupby('corine_code')['diff_copernicus'].agg([
    'count',
    'median',
    'mean',
    'std',
    nmad
]).reset_index()

# Rename columns
lc_height_stats_cop.rename(columns={
    'count': 'N_Points',
    'median': 'Median_Diff_m',
    'mean': 'Mean_Diff_m',
    'std': 'Std_Dev_m',
    'nmad': 'NMAD_m'
}, inplace=True)

# Add land cover labels
lc_height_stats_cop['LC_Label'] = lc_height_stats_cop['corine_code'].map(CORINE_CLASSES)

# Filter for classes with enough samples
MIN_SAMPLES = 50
lc_height_stats_cop_filtered = lc_height_stats_cop[lc_height_stats_cop['N_Points'] >= MIN_SAMPLES]
lc_height_stats_cop_filtered = lc_height_stats_cop_filtered.sort_values('LC_Label', ascending=True)

# 2. Prepare plot data
common_codes_cop = lc_height_stats_cop_filtered['corine_code'].unique()
plot_df_cop = saocom_lc_analysis[saocom_lc_analysis['corine_code'].isin(common_codes_cop)].copy()

# Filter extreme outliers
q_low_cop = plot_df_cop['diff_copernicus'].quantile(0.005)
q_high_cop = plot_df_cop['diff_copernicus'].quantile(0.995)
plot_df_cop_filtered = plot_df_cop[(plot_df_cop['diff_copernicus'] >= q_low_cop) &
                                    (plot_df_cop['diff_copernicus'] <= q_high_cop)]

# Sort by NMAD (best to worst)
nmad_order_cop = lc_height_stats_cop_filtered.sort_values('LC_Label', ascending=False)['LC_Label'].tolist()
plot_df_cop_filtered['LC_Label'] = pd.Categorical(
    plot_df_cop_filtered['LC_Label'],
    categories=nmad_order_cop,
    ordered=True
)

# 3. Generate violin plot
plt.figure(figsize=(14, 8), facecolor='white')

sns.violinplot(
    x='diff_copernicus',
    y='LC_Label',
    data=plot_df_cop_filtered.sort_values('LC_Label', ascending=False),
    inner='quartile',
    palette='Spectral_r',
    orient='h',
    linewidth=1.0,
    cut=0
)

# Add zero line
plt.axvline(0, color='red', linestyle='--', linewidth=1.5, alpha=0.7)

# Labels and title
plt.title(
    'Distribution of SAOCOM Height Residuals by Land Cover (CLC Level 3)',
    fontweight='bold',
    fontsize=14
)
plt.xlabel('Height Residual (Calibrated SAOCOM - Copernicus DEM) [m]', fontsize=11)
plt.ylabel('CORINE Land Cover Class (Ordered by Category)', fontsize=11)
plt.grid(axis='x', linestyle='--', alpha=0.5)

# Add NMAD and Median labels
for i, label in enumerate(nmad_order_cop[::-1]):
    stats = lc_height_stats_cop_filtered[lc_height_stats_cop_filtered['LC_Label'] == label].iloc[0]
    nmad_m = stats['NMAD_m']
    median_diff = stats['Median_Diff_m']
    plt.text(q_high_cop, i,
             f'Med: {median_diff:+.2f}m | NMAD: {nmad_m:.2f}m',
             verticalalignment='center',
             horizontalalignment='right',
             fontsize=9,
             bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7))

plt.tight_layout()
plt.show()
print("Violin Plot for Copernicus comparison generated successfully.")

# =============================================================================
# 3. GENERATE BOX PLOT (Level 1 - Broad Comparison)
# =============================================================================
plt.figure(figsize=(10, 6), facecolor='white')

# Use box plot for a cleaner Level 1 aggregation
sns.boxplot(
    x='LC_Level_1',
    y='diff_tinitaly',
    data=plot_df_L3_filtered,
    palette='Set2',
    linewidth=1.0,
    showfliers=False # Do not show outliers already filtered
)

# Add a horizontal line at zero error
plt.axhline(0, color='red', linestyle='--', linewidth=1.5, alpha=0.7)

# Set labels and title
plt.title(
    'SAOCOM Height Residuals by Land Cover (CLC Level 1)',
    fontweight='bold',
    fontsize=14
)
plt.xlabel('CORINE Land Cover Category (Level 1)', fontsize=11)
plt.ylabel('Height Residual (m)', fontsize=11)
plt.xticks(rotation=15, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
print("Box Plot for Level 1 Land Cover categories generated successfully.")

# =============================================================================
# CORINE LAND COVER VISUALIZATION
# =============================================================================
fig, ax = plt.subplots(figsize=(14, 10), facecolor='white')
ax.set_facecolor('white')

# Get extent
extent = [xmin_grid, xmax_grid, ymin_grid, ymax_grid]

# Mask NoData/zero values for transparency
corine_display = np.ma.masked_where((corine_10m == 0) | (corine_10m == 255), corine_10m)

# Get unique classes in study area
unique_codes = np.unique(corine_display.compressed())

# Create colormap for present classes only
colors_list = [CORINE_COLORS_MPL.get(code, (0.5, 0.5, 0.5)) for code in unique_codes]
cmap = ListedColormap(colors_list)
norm = BoundaryNorm(boundaries=np.append(unique_codes, unique_codes[-1]+1) - 0.5,
                    ncolors=len(unique_codes))

# Plot CORINE
im = ax.imshow(corine_display, cmap=cmap, norm=norm, origin='upper', extent=extent)

# Add study area boundary
hull_gdf.boundary.plot(ax=ax, color='black', linewidth=2, label='Study Area')

# Labels and title
ax.set_xlabel('UTM Easting (m)', fontsize=14, color='black')
ax.set_ylabel('UTM Northing (m)', fontsize=14, color='black')
ax.set_title('CORINE Land Cover 2018', fontweight='bold', fontsize=18, color='black')
ax.tick_params(colors='black', labelsize=12)
ax.grid(True, alpha=0.3, linewidth=0.5, color='black')

# Create legend with only present classes
legend_elements = [plt.Rectangle((0,0),1,1, facecolor=CORINE_COLORS_MPL[code],
                                 edgecolor='black', linewidth=0.5,
                                 label=f"{code}: {CORINE_CLASSES.get(code, 'Unknown')}")
                   for code in sorted(unique_codes)]

ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5),
          fontsize=13, frameon=True, fancybox=False, edgecolor='black')

# Add scale bar
scalebar = ScaleBar(1, location='lower right', box_alpha=0.8, color='black')
ax.add_artist(scalebar)

# Add statistics box
total_area_km2 = np.sum(study_area_mask) * 0.0001
stats_text = f"Study Area: {total_area_km2:.2f} km²\nClasses: {len(unique_codes)}"
ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=12,
        verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white',
        alpha=0.9, edgecolor='black'))

plt.tight_layout()
plt.show()

print(f"\nCORINE Land Cover Map:")
print(f"  Total classes present: {len(unique_codes)}")
print(f"  Study area: {total_area_km2:.2f} km²")

In [None]:

# =============================================================================
# ESTABLISH CONSISTENT ORDERING (use TINITALY as reference)
# =============================================================================
# Get common codes that appear in both datasets with sufficient samples
common_codes_both = set(lc_height_stats_final['corine_code']) & set(lc_height_stats_cop_filtered['corine_code'])
common_codes_sorted = sorted(common_codes_both)  # Sort by corine_code numerically

# Create consistent label ordering
consistent_labels = [CORINE_CLASSES[code] for code in common_codes_sorted]
# =============================================================================
# SYNCHRONIZED X-AXIS FOR VIOLIN PLOTS (Land Cover Residuals)
# =============================================================================

# STEP 1: Calculate global min/max BEFORE creating plots
# Add this BEFORE the violin plot sections:

# Get combined min/max from both filtered datasets
combined_min = min(
    plot_df_L3_filtered['diff_tinitaly'].min(),
    plot_df_cop_filtered['diff_copernicus'].min()
)
combined_max = max(
    plot_df_L3_filtered['diff_tinitaly'].max(),
    plot_df_cop_filtered['diff_copernicus'].max()
)

# Add 5% padding
x_range = combined_max - combined_min
x_padding = x_range * 0.05
global_xlim = (combined_min - x_padding, combined_max + x_padding)

print(f"Synchronized x-axis range: [{global_xlim[0]:.2f}, {global_xlim[1]:.2f}] m")
# =============================================================================
# TINITALY VIOLIN PLOT (with consistent ordering)
# =============================================================================
plt.figure(figsize=(14, 8), facecolor='white')

# Filter to common codes only
plot_df_L3_common = plot_df_L3_filtered[plot_df_L3_filtered['corine_code'].isin(common_codes_both)].copy()
plot_df_L3_common['LC_Label'] = pd.Categorical(
    plot_df_L3_common['LC_Label'],
    categories=consistent_labels,
    ordered=True
)
#
# sns.violinplot(
#     x='diff_tinitaly',
#     y='LC_Label',
#     data=plot_df_L3_common,
#     inner='quartile',
#     palette='Spectral_r',
#     orient='h',
#     linewidth=1.0,
#     cut=0,
#     order=consistent_labels
# )
#
# plt.axvline(0, color='red', linestyle='--', linewidth=1.5, alpha=0.7)
# plt.title('Distribution of SAOCOM Height Residuals by Land Cover (CLC Level 3)',
#           fontweight='bold', fontsize=18)
# plt.xlabel('Height Residual (Calibrated SAOCOM - TINITALY DEM) [m]', fontsize=14)
# plt.ylabel('CORINE Land Cover Class', fontsize=14)
# plt.grid(axis='x', linestyle='--', alpha=0.5)
# plt.xticks(fontsize=12)
# plt.yticks(fontsize=12)
#
# # Add NMAD labels (now in corine_code order)
# for i, label in enumerate(consistent_labels):
#     stats = lc_height_stats_final[lc_height_stats_final['LC_Label'] == label].iloc[0]
#     plt.text(q_high, i, f'Med: {stats["Median_Diff_m"]:+.2f}m | NMAD: {stats["NMAD_m"]:.2f}m',
#              verticalalignment='center', horizontalalignment='right', fontsize=11,
#              bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7))
#
# plt.tight_layout()
# plt.show()
#
# =============================================================================
# COPERNICUS VIOLIN PLOT (with SAME consistent ordering)
# =============================================================================
plt.figure(figsize=(14, 8), facecolor='white')

plot_df_cop_common = plot_df_cop_filtered[plot_df_cop_filtered['corine_code'].isin(common_codes_both)].copy()
plot_df_cop_common['LC_Label'] = pd.Categorical(
    plot_df_cop_common['LC_Label'],
    categories=consistent_labels,
    ordered=True
)
#
# sns.violinplot(
#     x='diff_copernicus',
#     y='LC_Label',
#     data=plot_df_cop_common,
#     inner='quartile',
#     palette='Spectral_r',
#     orient='h',
#     linewidth=1.0,
#     cut=0,
#     order=consistent_labels
# )
#
# plt.axvline(0, color='red', linestyle='--', linewidth=1.5, alpha=0.7)
# plt.title('Distribution of SAOCOM Height Residuals by Land Cover (CLC Level 3)',
#           fontweight='bold', fontsize=14)
# plt.xlabel('Height Residual (Calibrated SAOCOM - Copernicus DEM) [m]', fontsize=11)
# plt.ylabel('CORINE Land Cover Class', fontsize=11)
# plt.grid(axis='x', linestyle='--', alpha=0.5)
#
# for i, label in enumerate(consistent_labels):
#     stats = lc_height_stats_cop_filtered[lc_height_stats_cop_filtered['LC_Label'] == label].iloc[0]
#     plt.text(q_high_cop, i, f'Med: {stats["Median_Diff_m"]:+.2f}m | NMAD: {stats["NMAD_m"]:.2f}m',
#              verticalalignment='center', horizontalalignment='right', fontsize=9,
#              bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7))
#
# plt.tight_layout()
# plt.show()
# =============================================================================
# TINITALY VIOLIN PLOT - Apply global limits
# =============================================================================
plt.figure(figsize=(14, 8), facecolor='white')

sns.violinplot(
    x='diff_tinitaly',
    y='LC_Label',
    data=plot_df_L3_common,
    inner='quartile',
    palette='Spectral_r',
    orient='h',
    linewidth=1.0,
    cut=0,
    order=consistent_labels
)

# ✅ APPLY SYNCHRONIZED LIMITS
plt.xlim(global_xlim)

plt.axvline(0, color='red', linestyle='--', linewidth=1.5, alpha=0.7)
plt.title('Distribution of SAOCOM Height Residuals by Land Cover (CLC Level 3)',
          fontweight='bold', fontsize=18)
plt.xlabel('Height Residual (Calibrated SAOCOM - TINITALY DEM) [m]', fontsize=14)
plt.ylabel('CORINE Land Cover Class', fontsize=14)
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

# Add NMAD labels - adjusted position for consistent layout
for i, label in enumerate(consistent_labels):
    stats = lc_height_stats_final[lc_height_stats_final['LC_Label'] == label].iloc[0]
    # Use global_xlim[1] instead of q_high for consistent positioning
    plt.text(global_xlim[1] * 0.98, i,
             f'Med: {stats["Median_Diff_m"]:+.2f}m | NMAD: {stats["NMAD_m"]:.2f}m',
             verticalalignment='center', horizontalalignment='right', fontsize=11,
             bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7))

plt.tight_layout()
plt.show()

# =============================================================================
# COPERNICUS VIOLIN PLOT - Apply SAME global limits
# =============================================================================
plt.figure(figsize=(14, 8), facecolor='white')

sns.violinplot(
    x='diff_copernicus',
    y='LC_Label',
    data=plot_df_cop_common,
    inner='quartile',
    palette='Spectral_r',
    orient='h',
    linewidth=1.0,
    cut=0,
    order=consistent_labels
)

# ✅ APPLY SAME SYNCHRONIZED LIMITS
plt.xlim(global_xlim)

plt.axvline(0, color='red', linestyle='--', linewidth=1.5, alpha=0.7)
plt.title('Distribution of SAOCOM Height Residuals by Land Cover (CLC Level 3)',
          fontweight='bold', fontsize=14)
plt.xlabel('Height Residual (Calibrated SAOCOM - Copernicus DEM) [m]', fontsize=11)
plt.ylabel('CORINE Land Cover Class', fontsize=11)
plt.grid(axis='x', linestyle='--', alpha=0.5)

# Add NMAD labels - same positioning as TINITALY plot
for i, label in enumerate(consistent_labels):
    stats = lc_height_stats_cop_filtered[lc_height_stats_cop_filtered['LC_Label'] == label].iloc[0]
    # Use global_xlim[1] for consistent positioning
    plt.text(global_xlim[1] * 0.98, i,
             f'Med: {stats["Median_Diff_m"]:+.2f}m | NMAD: {stats["NMAD_m"]:.2f}m',
             verticalalignment='center', horizontalalignment='right', fontsize=9,
             bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7))

plt.tight_layout()
plt.show()

# =============================================================================
# ALTERNATIVE: Side-by-side comparison with shared axis
# =============================================================================
fig, axes = plt.subplots(1, 2, figsize=(22, 10), facecolor='white', sharey=True)

# Left: TINITALY
ax = axes[0]
sns.violinplot(
    x='diff_tinitaly',
    y='LC_Label',
    data=plot_df_L3_common,
    inner='quartile',
    palette='Spectral_r',
    orient='h',
    linewidth=1.0,
    cut=0,
    order=consistent_labels,
    ax=ax
)
ax.set_xlim(global_xlim)
ax.axvline(0, color='red', linestyle='--', linewidth=1.5, alpha=0.7)
ax.set_title('SAOCOM - TINITALY', fontweight='bold', fontsize=14)
ax.set_xlabel('Height Residual [m]', fontsize=12)
ax.set_ylabel('Land Cover Class', fontsize=12)
ax.grid(axis='x', linestyle='--', alpha=0.5)

# Right: Copernicus
ax = axes[1]
sns.violinplot(
    x='diff_copernicus',
    y='LC_Label',
    data=plot_df_cop_common,
    inner='quartile',
    palette='Spectral_r',
    orient='h',
    linewidth=1.0,
    cut=0,
    order=consistent_labels,
    ax=ax
)
ax.set_xlim(global_xlim)
ax.axvline(0, color='red', linestyle='--', linewidth=1.5, alpha=0.7)
ax.set_title('SAOCOM - Copernicus', fontweight='bold', fontsize=14)
ax.set_xlabel('Height Residual [m]', fontsize=12)
ax.set_ylabel('')  # Remove duplicate y-label
ax.grid(axis='x', linestyle='--', alpha=0.5)

fig.suptitle('Height Residual Comparison by Land Cover',
             fontweight='bold', fontsize=16, y=0.995)
plt.tight_layout()
plt.show()
# =============================================================================
# CORINE LAND COVER VISUALIZATION
# =============================================================================
fig, ax = plt.subplots(figsize=(14, 10), facecolor='white')
ax.set_facecolor('white')

# Get extent
extent = [xmin_grid, xmax_grid, ymin_grid, ymax_grid]

# Mask NoData/zero values for transparency
corine_display = np.ma.masked_where((corine_10m == 0) | (corine_10m == 255), corine_10m)

# Get unique classes in study area
unique_codes = np.unique(corine_display.compressed())

# Create colormap for present classes only
colors_list = [CORINE_COLORS_MPL.get(code, (0.5, 0.5, 0.5)) for code in unique_codes]
cmap = ListedColormap(colors_list)
norm = BoundaryNorm(boundaries=np.append(unique_codes, unique_codes[-1]+1) - 0.5,
                    ncolors=len(unique_codes))

# Plot CORINE
im = ax.imshow(corine_display, cmap=cmap, norm=norm, origin='upper', extent=extent)

# Add study area boundary
hull_gdf.boundary.plot(ax=ax, color='black', linewidth=2, label='Study Area')

# Labels and title
ax.set_xlabel('UTM Easting (m)', fontsize=14, color='black')
ax.set_ylabel('UTM Northing (m)', fontsize=14, color='black')
ax.set_title('CORINE Land Cover 2018', fontweight='bold', fontsize=18, color='black')
ax.tick_params(colors='black', labelsize=12)
ax.grid(True, alpha=0.3, linewidth=0.5, color='black')

# Create legend with only present classes
legend_elements = [plt.Rectangle((0,0),1,1, facecolor=CORINE_COLORS_MPL[code],
                                 edgecolor='black', linewidth=0.5,
                                 label=f"{code}: {CORINE_CLASSES.get(code, 'Unknown')}")
                   for code in sorted(unique_codes)]

ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5),
          fontsize=13, frameon=True, fancybox=False, edgecolor='black')

# Add scale bar
scalebar = ScaleBar(1, location='lower right', box_alpha=0.8, color='black')
ax.add_artist(scalebar)

# Add statistics box
total_area_km2 = np.sum(study_area_mask) * 0.0001
stats_text = f"Study Area: {total_area_km2:.2f} km²\nClasses: {len(unique_codes)}"
ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=12,
        verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white',
        alpha=0.9, edgecolor='black'))

plt.tight_layout()
plt.show()

print(f"\nCORINE Land Cover Map:")
print(f"  Total classes present: {len(unique_codes)}")
print(f"  Study area: {total_area_km2:.2f} km²")

In [None]:
# =============================================================================
# INDIVIDUAL CLASS OVERLAY MAPS (COLORBLIND-FRIENDLY)
# =============================================================================
from matplotlib.patches import Patch

# Get unique classes present in data
unique_classes = np.unique(corine_10m[corine_10m > 0])

# Create one map per class
for lc_code in sorted(unique_classes):
    fig, ax = plt.subplots(1, 1, figsize=(14, 10), facecolor='white')
    ax.set_facecolor('white')

    # Display Sentinel RGB as background
    ax.imshow(sentinel_rgb_norm, extent=[xmin_grid, xmax_grid, ymin_grid, ymax_grid],
              origin='upper', alpha=0.7)  # Slight transparency to help overlay show

    # Get color for this land cover class
    fill_color = tuple(c/255 for c in CORINE_COLORS.get(lc_code, (128, 128, 128)))

    # Create mask for this land cover class
    lc_mask = (corine_10m == lc_code)

    # Vectorize to get boundaries
    mask_shapes = shapes(lc_mask.astype(np.uint8), mask=lc_mask, transform=target_transform)

    # Convert to polygons and plot
    polys = [shape(geom) for geom, val in mask_shapes if val == 1]

    if polys:
        for poly in polys:
            if poly.is_valid:
                x, y = poly.exterior.xy

                # Fill with class-specific color + hatching for visibility
                ax.fill(x, y, color=fill_color, alpha=0.4,
                       edgecolor='none', hatch='///', linewidth=0)

                # Bold black outline for definition
                ax.plot(x, y, color='black', linewidth=2.5, alpha=0.9)

                # Colored inner outline
                ax.plot(x, y, color=fill_color, linewidth=1.5, alpha=1.0)

    # Add study area boundary
    hull_gdf.boundary.plot(ax=ax, color='black', linewidth=3, linestyle='--', alpha=0.8)
    hull_gdf.boundary.plot(ax=ax, color='red', linewidth=1.5, linestyle='--', alpha=1.0)

    # Calculate statistics
    lc_count = np.sum(lc_mask)
    area_km2 = lc_count * (GRID_SIZE**2) / 1e6
    pct_area = 100 * lc_count / np.sum(corine_10m > 0)

    # Title with statistics
    class_name = CORINE_CLASSES.get(lc_code, f'Class {lc_code}')
    ax.set_title(f'Land Cover: {class_name}\n'
                 f'Code {lc_code} | Area: {area_km2:.1f} km² ({pct_area:.1f}%)',
                 fontweight='bold', fontsize=13, pad=15)

    ax.set_xlabel('UTM Easting (m)', fontsize=11)
    ax.set_ylabel('UTM Northing (m)', fontsize=11)
    ax.grid(True, alpha=0.3, color='gray', linewidth=0.5)

    # Legend with hatching
    legend_elements = [
        Patch(facecolor=fill_color, edgecolor='black', linewidth=2,
              alpha=0.4, hatch='///', label=class_name),
        Patch(facecolor='none', edgecolor='red', linestyle='--',
              linewidth=2, label='Study Area')
    ]
    ax.legend(handles=legend_elements, loc='upper right', fontsize=10,
              frameon=True, fancybox=False, edgecolor='black')

    plt.tight_layout()

    # Save
    safe_name = class_name.replace(' ', '_').replace(',', '').replace('/', '_')
    filename = f'landcover_{lc_code}_{safe_name}.png'
    plt.savefig(RESULTS_DIR / filename, dpi=300, bbox_inches='tight', facecolor='white')
    plt.show()
    plt.close()

    print(f"Saved: {filename}")

print(f"\nGenerated {len(unique_classes)} individual land cover overlay maps")

In [None]:
# =============================================================================
# SAOCOM VS TINITALY COMPARISON
# =============================================================================
# Filter for valid comparisons with elevation range check
valid_elevation_range = (50, 850)

saocom_tinitaly_mask = (
    (saocom_gdf['HEIGHT_ABSOLUTE_TIN'].notna()) &
    (saocom_gdf['tinitaly_height'].notna()) &
    (saocom_gdf['HEIGHT_ABSOLUTE_TIN'] >= valid_elevation_range[0]) &
    (saocom_gdf['HEIGHT_ABSOLUTE_TIN'] <= valid_elevation_range[1]) &
    (saocom_gdf['COHER'] >= 0.5)
)
saocom_tinitaly_valid = saocom_gdf[saocom_tinitaly_mask]

saocom_tinitaly_diff = (saocom_tinitaly_valid['HEIGHT_ABSOLUTE_TIN'] -
                        saocom_tinitaly_valid['tinitaly_height']).values

saocom_tinitaly_metrics = {
    'n_points': int(len(saocom_tinitaly_diff)),
    'mean_diff': float(np.mean(saocom_tinitaly_diff)),
    'median_diff': float(np.median(saocom_tinitaly_diff)),
    'std_diff': float(np.std(saocom_tinitaly_diff)),
    'rmse': float(np.sqrt(np.mean(saocom_tinitaly_diff**2))),
    'mae': float(np.mean(np.abs(saocom_tinitaly_diff))),
    'nmad': float(1.4826 * np.median(np.abs(saocom_tinitaly_diff - np.median(saocom_tinitaly_diff)))),
    'min_diff': float(np.min(saocom_tinitaly_diff)),
    'max_diff': float(np.max(saocom_tinitaly_diff)),
    'correlation': float(np.corrcoef(saocom_tinitaly_valid['HEIGHT_ABSOLUTE_TIN'].values,
                                     saocom_tinitaly_valid['tinitaly_height'].values)[0, 1])
}

saocom_tinitaly_tolerance = float(saocom_tinitaly_metrics['nmad'])
saocom_tinitaly_higher_mask = saocom_tinitaly_diff > saocom_tinitaly_tolerance
saocom_tinitaly_lower_mask = saocom_tinitaly_diff < -saocom_tinitaly_tolerance
saocom_tinitaly_equal_mask = np.abs(saocom_tinitaly_diff) <= saocom_tinitaly_tolerance

saocom_tinitaly_higher_count = np.sum(saocom_tinitaly_higher_mask)
saocom_tinitaly_lower_count = np.sum(saocom_tinitaly_lower_mask)
saocom_tinitaly_equal_count = np.sum(saocom_tinitaly_equal_mask)

saocom_tinitaly_pct_higher = 100 * saocom_tinitaly_higher_count / len(saocom_tinitaly_diff)
saocom_tinitaly_pct_lower = 100 * saocom_tinitaly_lower_count / len(saocom_tinitaly_diff)
saocom_tinitaly_pct_equal = 100 * saocom_tinitaly_equal_count / len(saocom_tinitaly_diff)

# =============================================================================
# SAOCOM VS COPERNICUS COMPARISON
# =============================================================================
saocom_copernicus_mask = (
    (saocom_gdf['HEIGHT_ABSOLUTE_COP'].notna()) &
    (saocom_gdf['copernicus_height'].notna()) &
    (saocom_gdf['HEIGHT_ABSOLUTE_COP'] >= valid_elevation_range[0]) &
    (saocom_gdf['HEIGHT_ABSOLUTE_COP'] <= valid_elevation_range[1]) &
    (saocom_gdf['COHER'] >= 0.5)
)
saocom_copernicus_valid = saocom_gdf[saocom_copernicus_mask]

saocom_copernicus_diff = (saocom_copernicus_valid['HEIGHT_ABSOLUTE_COP'] -
                          saocom_copernicus_valid['copernicus_height']).values

saocom_copernicus_metrics = {
    'n_points': int(len(saocom_copernicus_diff)),
    'mean_diff': float(np.mean(saocom_copernicus_diff)),
    'median_diff': float(np.median(saocom_copernicus_diff)),
    'std_diff': float(np.std(saocom_copernicus_diff)),
    'rmse': float(np.sqrt(np.mean(saocom_copernicus_diff**2))),
    'mae': float(np.mean(np.abs(saocom_copernicus_diff))),
    'nmad': float(1.4826 * np.median(np.abs(saocom_copernicus_diff - np.median(saocom_copernicus_diff)))),
    'min_diff': float(np.min(saocom_copernicus_diff)),
    'max_diff': float(np.max(saocom_copernicus_diff)),
    'correlation': float(np.corrcoef(saocom_copernicus_valid['HEIGHT_ABSOLUTE_COP'].values,
                                     saocom_copernicus_valid['copernicus_height'].values)[0, 1])
}

saocom_copernicus_tolerance = float(saocom_copernicus_metrics['nmad'])
saocom_copernicus_higher_mask = saocom_copernicus_diff > saocom_copernicus_tolerance
saocom_copernicus_lower_mask = saocom_copernicus_diff < -saocom_copernicus_tolerance
saocom_copernicus_equal_mask = np.abs(saocom_copernicus_diff) <= saocom_copernicus_tolerance

saocom_copernicus_higher_count = int(np.sum(saocom_copernicus_higher_mask))
saocom_copernicus_lower_count = int(np.sum(saocom_copernicus_lower_mask))
saocom_copernicus_equal_count = int(np.sum(saocom_copernicus_equal_mask))

saocom_copernicus_pct_higher = float(100 * saocom_copernicus_higher_count / len(saocom_copernicus_diff))
saocom_copernicus_pct_lower = float(100 * saocom_copernicus_lower_count / len(saocom_copernicus_diff))
saocom_copernicus_pct_equal = float(100 * saocom_copernicus_equal_count / len(saocom_copernicus_diff))



In [None]:
# =============================================================================
# SAOCOM VS REFERENCE DEMs - GRIDDED COMPARISON ANALYSIS
# =============================================================================
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import griddata

# =============================================================================
# 1. PREPARE SAOCOM DIFFERENCE GRIDS
# =============================================================================

def create_difference_grid(saocom_gdf, height_col, ref_col, grid_shape, transform):
    """Grid SAOCOM point differences to match raster grid"""
    # Filter valid points
    valid_mask = (
        saocom_gdf[height_col].notna() &
        saocom_gdf[ref_col].notna() &
        (saocom_gdf['COHER'] >= COHERENCE_THRESHOLD)
    )
    valid_points = saocom_gdf[valid_mask].copy()

    # Calculate difference
    valid_points['diff'] = valid_points[height_col] - valid_points[ref_col]

    # Get coordinates
    x = valid_points.geometry.x.values
    y = valid_points.geometry.y.values
    diff = valid_points['diff'].values

    # Create grid coordinates
    grid_height, grid_width = grid_shape
    x_min, y_max = transform * (0, 0)
    x_max, y_min = transform * (grid_width, grid_height)

    xi = np.linspace(x_min, x_max, grid_width)
    yi = np.linspace(y_max, y_min, grid_height)
    xi_grid, yi_grid = np.meshgrid(xi, yi)

    # Grid the differences using nearest neighbor
    diff_grid = griddata((x, y), diff, (xi_grid, yi_grid),
                         method='nearest', fill_value=np.nan)

    # Apply hull mask
    diff_grid[~hull_mask] = np.nan

    return diff_grid, valid_points

# Create grids for both comparisons
print("Gridding SAOCOM - TINITALY differences...")
diff_grid_tin, points_tin = create_difference_grid(
    saocom_gdf, 'HEIGHT_ABSOLUTE_TIN', 'tinitaly_height',
    (grid_height, grid_width), target_transform
)

print("Gridding SAOCOM - Copernicus differences...")
diff_grid_cop, points_cop = create_difference_grid(
    saocom_gdf, 'HEIGHT_ABSOLUTE_COP', 'copernicus_height',
    (grid_height, grid_width), target_transform
)

# =============================================================================
# 2. CALCULATE DIRECTIONAL MASKS
# =============================================================================

def create_directional_masks(diff_grid, tolerance):
    """Create masks for higher/lower/equal categories"""
    valid_mask = ~np.isnan(diff_grid)

    higher_mask = valid_mask & (diff_grid > tolerance)
    lower_mask = valid_mask & (diff_grid < -tolerance)
    equal_mask = valid_mask & (np.abs(diff_grid) <= tolerance)

    # Create masked arrays
    higher_data = np.full_like(diff_grid, np.nan)
    higher_data[higher_mask] = diff_grid[higher_mask]

    lower_data = np.full_like(diff_grid, np.nan)
    lower_data[lower_mask] = diff_grid[lower_mask]

    equal_data = np.full_like(diff_grid, np.nan)
    equal_data[equal_mask] = diff_grid[equal_mask]

    # Statistics
    n_total = np.sum(valid_mask)
    n_higher = np.sum(higher_mask)
    n_lower = np.sum(lower_mask)
    n_equal = np.sum(equal_mask)

    stats = {
        'n_total': n_total,
        'n_higher': n_higher,
        'n_lower': n_lower,
        'n_equal': n_equal,
        'pct_higher': 100 * n_higher / n_total if n_total > 0 else 0,
        'pct_lower': 100 * n_lower / n_total if n_total > 0 else 0,
        'pct_equal': 100 * n_equal / n_total if n_total > 0 else 0
    }

    return higher_data, lower_data, equal_data, stats

# SAOCOM vs TINITALY
tolerance_tin = saocom_tinitaly_metrics['nmad']
tin_higher, tin_lower, tin_equal, tin_stats = create_directional_masks(
    diff_grid_tin, tolerance_tin
)

# SAOCOM vs Copernicus
tolerance_cop = saocom_copernicus_metrics['nmad']
cop_higher, cop_lower, cop_equal, cop_stats = create_directional_masks(
    diff_grid_cop, tolerance_cop
)

# =============================================================================
# 3. VISUALIZATION - SAOCOM VS TINITALY
# =============================================================================
fig, axes = plt.subplots(2, 3, figsize=(20, 14), facecolor='white')
extent = [xmin_grid, xmax_grid, ymin_grid, ymax_grid]

# Row 1: SAOCOM vs TINITALY
# Plot 1: Full difference map
ax = axes[0, 0]
ax.set_facecolor('white')
diff_display = np.ma.masked_invalid(diff_grid_tin)
diff_limit = np.percentile(np.abs(points_tin['diff']), 95)
cmap1 = plt.cm.coolwarm.copy()
cmap1.set_bad(color='white', alpha=0)
im1 = ax.imshow(diff_display, cmap=cmap1, origin='upper', extent=extent,
                vmin=-diff_limit, vmax=diff_limit)
hull_gdf.boundary.plot(ax=ax, color='black', linewidth=2)
ax.set_title('SAOCOM - TINITALY\nFull Difference', fontweight='bold', fontsize=12)
ax.set_xlabel('UTM Easting (m)')
ax.set_ylabel('UTM Northing (m)')
ax.grid(True, alpha=0.3)
cbar1 = plt.colorbar(im1, ax=ax, label='Difference (m)', shrink=0.8)

stats_text = f"""Points: {tin_stats['n_total']:,}
Mean: {saocom_tinitaly_metrics['mean_diff']:+.2f}m
RMSE: {saocom_tinitaly_metrics['rmse']:.2f}m
NMAD: {saocom_tinitaly_metrics['nmad']:.2f}m
Tolerance: ±{tolerance_tin:.2f}m"""
ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=9,
        verticalalignment='top', bbox=dict(boxstyle='round',
        facecolor='white', alpha=0.9, edgecolor='black'))

# Plot 2: SAOCOM Higher
ax = axes[0, 1]
ax.set_facecolor('white')
cmap2 = plt.cm.YlOrRd.copy()
cmap2.set_bad(color='white', alpha=0)
im2 = ax.imshow(tin_higher, cmap=cmap2, origin='upper', extent=extent,
                vmin=0, vmax=np.nanmax(tin_higher))
hull_gdf.boundary.plot(ax=ax, color='black', linewidth=2)
ax.set_title(f'SAOCOM > TINITALY\n({tin_stats["pct_higher"]:.1f}%)',
             fontweight='bold', fontsize=12)
ax.set_xlabel('UTM Easting (m)')
ax.set_ylabel('UTM Northing (m)')
ax.grid(True, alpha=0.3)
cbar2 = plt.colorbar(im2, ax=ax, label='Difference (m)', shrink=0.8)

higher_vals = tin_higher[~np.isnan(tin_higher)]
if len(higher_vals) > 0:
    stats2 = f"""Points: {tin_stats['n_higher']:,}
Mean: {np.mean(higher_vals):.2f}m
Std: {np.std(higher_vals):.2f}m
Max: {np.max(higher_vals):.2f}m"""
    ax.text(0.02, 0.98, stats2, transform=ax.transAxes, fontsize=9,
            verticalalignment='top', bbox=dict(boxstyle='round',
            facecolor='white', alpha=0.9, edgecolor='black'))

# Plot 3: SAOCOM Lower
ax = axes[0, 2]
ax.set_facecolor('white')
cmap3 = plt.cm.Blues_r.copy()
cmap3.set_bad(color='white', alpha=0)
im3 = ax.imshow(tin_lower, cmap=cmap3, origin='upper', extent=extent,
                vmin=np.nanmin(tin_lower), vmax=0)
hull_gdf.boundary.plot(ax=ax, color='black', linewidth=2)
ax.set_title(f'TINITALY > SAOCOM\n({tin_stats["pct_lower"]:.1f}%)',
             fontweight='bold', fontsize=12)
ax.set_xlabel('UTM Easting (m)')
ax.set_ylabel('UTM Northing (m)')
ax.grid(True, alpha=0.3)
cbar3 = plt.colorbar(im3, ax=ax, label='Difference (m)', shrink=0.8)

lower_vals = tin_lower[~np.isnan(tin_lower)]
if len(lower_vals) > 0:
    stats3 = f"""Points: {tin_stats['n_lower']:,}
Mean: {np.mean(lower_vals):.2f}m
Std: {np.std(lower_vals):.2f}m
Min: {np.min(lower_vals):.2f}m"""
    ax.text(0.02, 0.98, stats3, transform=ax.transAxes, fontsize=9,
            verticalalignment='top', bbox=dict(boxstyle='round',
            facecolor='white', alpha=0.9, edgecolor='black'))

# Row 2: SAOCOM vs Copernicus
# Plot 4: Full difference map
ax = axes[1, 0]
ax.set_facecolor('white')
diff_display = np.ma.masked_invalid(diff_grid_cop)
diff_limit = np.percentile(np.abs(points_cop['diff']), 95)
cmap4 = plt.cm.coolwarm.copy()
cmap4.set_bad(color='white', alpha=0)
im4 = ax.imshow(diff_display, cmap=cmap4, origin='upper', extent=extent,
                vmin=-diff_limit, vmax=diff_limit)
hull_gdf.boundary.plot(ax=ax, color='black', linewidth=2)
ax.set_title('SAOCOM - Copernicus\nFull Difference', fontweight='bold', fontsize=12)
ax.set_xlabel('UTM Easting (m)')
ax.set_ylabel('UTM Northing (m)')
ax.grid(True, alpha=0.3)
cbar4 = plt.colorbar(im4, ax=ax, label='Difference (m)', shrink=0.8)

stats_text = f"""Points: {cop_stats['n_total']:,}
Mean: {saocom_copernicus_metrics['mean_diff']:+.2f}m
RMSE: {saocom_copernicus_metrics['rmse']:.2f}m
NMAD: {saocom_copernicus_metrics['nmad']:.2f}m
Tolerance: ±{tolerance_cop:.2f}m"""
ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=9,
        verticalalignment='top', bbox=dict(boxstyle='round',
        facecolor='white', alpha=0.9, edgecolor='black'))

# Plot 5: SAOCOM Higher
ax = axes[1, 1]
ax.set_facecolor('white')
cmap5 = plt.cm.YlOrRd.copy()
cmap5.set_bad(color='white', alpha=0)
im5 = ax.imshow(cop_higher, cmap=cmap5, origin='upper', extent=extent,
                vmin=0, vmax=np.nanmax(cop_higher))
hull_gdf.boundary.plot(ax=ax, color='black', linewidth=2)
ax.set_title(f'SAOCOM > Copernicus\n({cop_stats["pct_higher"]:.1f}%)',
             fontweight='bold', fontsize=12)
ax.set_xlabel('UTM Easting (m)')
ax.set_ylabel('UTM Northing (m)')
ax.grid(True, alpha=0.3)
cbar5 = plt.colorbar(im5, ax=ax, label='Difference (m)', shrink=0.8)

higher_vals = cop_higher[~np.isnan(cop_higher)]
if len(higher_vals) > 0:
    stats5 = f"""Points: {cop_stats['n_higher']:,}
Mean: {np.mean(higher_vals):.2f}m
Std: {np.std(higher_vals):.2f}m
Max: {np.max(higher_vals):.2f}m"""
    ax.text(0.02, 0.98, stats5, transform=ax.transAxes, fontsize=9,
            verticalalignment='top', bbox=dict(boxstyle='round',
            facecolor='white', alpha=0.9, edgecolor='black'))

# Plot 6: SAOCOM Lower
ax = axes[1, 2]
ax.set_facecolor('white')
cmap6 = plt.cm.Blues_r.copy()
cmap6.set_bad(color='white', alpha=0)
im6 = ax.imshow(cop_lower, cmap=cmap6, origin='upper', extent=extent,
                vmin=np.nanmin(cop_lower), vmax=0)
hull_gdf.boundary.plot(ax=ax, color='black', linewidth=2)
ax.set_title(f'Copernicus > SAOCOM\n({cop_stats["pct_lower"]:.1f}%)',
             fontweight='bold', fontsize=12)
ax.set_xlabel('UTM Easting (m)')
ax.set_ylabel('UTM Northing (m)')
ax.grid(True, alpha=0.3)
cbar6 = plt.colorbar(im6, ax=ax, label='Difference (m)', shrink=0.8)

lower_vals = cop_lower[~np.isnan(cop_lower)]
if len(lower_vals) > 0:
    stats6 = f"""Points: {cop_stats['n_lower']:,}
Mean: {np.mean(lower_vals):.2f}m
Std: {np.std(lower_vals):.2f}m
Min: {np.min(lower_vals):.2f}m"""
    ax.text(0.02, 0.98, stats6, transform=ax.transAxes, fontsize=9,
            verticalalignment='top', bbox=dict(boxstyle='round',
            facecolor='white', alpha=0.9, edgecolor='black'))

plt.tight_layout()
plt.savefig(RESULTS_DIR / 'saocom_comparison_directional.png',
            dpi=300, bbox_inches='tight', facecolor='white')
plt.show()

# =============================================================================
# 4. SUMMARY STATISTICS TABLE
# =============================================================================
print("\n" + "="*100)
print("SAOCOM HEIGHT COMPARISON SUMMARY")
print("="*100)

summary_data = []
for name, metrics, stats, tolerance in [
    ('SAOCOM - TINITALY', saocom_tinitaly_metrics, tin_stats, tolerance_tin),
    ('SAOCOM - Copernicus', saocom_copernicus_metrics, cop_stats, tolerance_cop)
]:
    summary_data.append({
        'Comparison': name,
        'N_Points': f"{metrics['n_points']:,}",
        'Mean_Diff': f"{metrics['mean_diff']:+.2f} m",
        'RMSE': f"{metrics['rmse']:.2f} m",
        'NMAD': f"{metrics['nmad']:.2f} m",
        'Tolerance': f"±{tolerance:.2f} m",
        'Higher': f"{stats['pct_higher']:.1f}%",
        'Lower': f"{stats['pct_lower']:.1f}%",
        'Equal': f"{stats['pct_equal']:.1f}%"
    })

summary_df = pd.DataFrame(summary_data)
print(summary_df.to_string(index=False))
print("="*100)

print("\nGridded comparison maps saved to:", RESULTS_DIR / 'saocom_comparison_directional.png')
# =============================================================================
# SAOCOM HEIGHT RESIDUAL DISTRIBUTIONS
# =============================================================================
fig, axes = plt.subplots(1, 2, figsize=(18, 7), facecolor='white')

# Plot 1: SAOCOM vs TINITALY Distribution
ax = axes[0]
ax.set_facecolor('white')

# Create histogram
n, bins, patches = ax.hist(saocom_tinitaly_diff, bins=10, alpha=0.7,
                           color='steelblue', edgecolor='black')

# Add reference lines
ax.axvline(0, color='red', linestyle='--', linewidth=2, label='Zero')
ax.axvline(saocom_tinitaly_metrics['mean_diff'], color='green', linestyle='-',
           linewidth=2, label=f'Mean: {saocom_tinitaly_metrics["mean_diff"]:+.2f}m')
ax.axvline(saocom_tinitaly_tolerance, color='orange', linestyle='--',
           linewidth=1.5, label=f'±{saocom_tinitaly_tolerance:.2f}m')
ax.axvline(-saocom_tinitaly_tolerance, color='orange', linestyle='--', linewidth=1.5)

# Set axis limits to data extent
x_min = float(saocom_tinitaly_diff.min())
x_max = float(saocom_tinitaly_diff.max())
x_padding = (x_max - x_min) * 0.02
ax.set_xlim(x_min - x_padding, x_max + x_padding)

# Labels and styling
ax.set_xlabel('Elevation Difference (m)', color='black', fontsize=12)
ax.set_ylabel('Frequency', color='black', fontsize=12)
ax.set_title('SAOCOM - TINITALY Distribution', fontweight='bold',
             fontsize=13, color='black')
ax.tick_params(colors='black', labelsize=11)
ax.set_yscale('log')
ax.legend(loc='upper right', fontsize=10)
ax.grid(True, alpha=0.3, color='black', linewidth=0.5)
for spine in ax.spines.values():
    spine.set_edgecolor('black')

# Add statistics box
stats_text = f"""n = {saocom_tinitaly_metrics['n_points']:,}
RMSE = {saocom_tinitaly_metrics['rmse']:.2f} m
NMAD = {saocom_tinitaly_metrics['nmad']:.2f} m
Std Dev = {saocom_tinitaly_metrics['std_diff']:.2f} m"""
ax.text(0.98, 0.97, stats_text, transform=ax.transAxes, fontsize=9,
        verticalalignment='top', horizontalalignment='right',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.9,
                 edgecolor='black'))

# Plot 2: SAOCOM vs Copernicus Distribution
ax = axes[1]
ax.set_facecolor('white')

# Create histogram
n, bins, patches = ax.hist(saocom_copernicus_diff, bins=10, alpha=0.7,
                           color='steelblue', edgecolor='black')

# Add reference lines
ax.axvline(0, color='red', linestyle='--', linewidth=2, label='Zero')
ax.axvline(saocom_copernicus_metrics['mean_diff'], color='green',
           linestyle='-', linewidth=2,
           label=f'Mean: {saocom_copernicus_metrics["mean_diff"]:+.2f}m')
ax.axvline(saocom_copernicus_tolerance, color='orange', linestyle='--',
           linewidth=1.5, label=f'±{saocom_copernicus_tolerance:.2f}m')
ax.axvline(-saocom_copernicus_tolerance, color='orange', linestyle='--',
           linewidth=1.5)

# Set axis limits to data extent
x_min = float(saocom_copernicus_diff.min())
x_max = float(saocom_copernicus_diff.max())
x_padding = (x_max - x_min) * 0.02
ax.set_xlim(x_min - x_padding, x_max + x_padding)

# Labels and styling
ax.set_xlabel('Elevation Difference (m)', color='black', fontsize=12)
ax.set_ylabel('Frequency', color='black', fontsize=12)
ax.set_title('SAOCOM - Copernicus Distribution', fontweight='bold',
             fontsize=13, color='black')
ax.tick_params(colors='black', labelsize=11)
ax.set_yscale('log')
ax.legend(loc='upper right', fontsize=10)
ax.grid(True, alpha=0.3, color='black', linewidth=0.5)
for spine in ax.spines.values():
    spine.set_edgecolor('black')

# Add statistics box
stats_text = f"""n = {saocom_copernicus_metrics['n_points']:,}
RMSE = {saocom_copernicus_metrics['rmse']:.2f} m
NMAD = {saocom_copernicus_metrics['nmad']:.2f} m
Std Dev = {saocom_copernicus_metrics['std_diff']:.2f} m"""
ax.text(0.98, 0.97, stats_text, transform=ax.transAxes, fontsize=9,
        verticalalignment='top', horizontalalignment='right',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.9,
                 edgecolor='black'))

plt.tight_layout()
plt.savefig(RESULTS_DIR / 'saocom_residual_distributions.png',
            dpi=300, bbox_inches='tight', facecolor='white')
plt.show()

print("\nSaved: saocom_residual_distributions.png")

# =============================================================================
# SAOCOM HEIGHT OUTLIER DETECTION AND VISUALIZATION
# =============================================================================
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# =============================================================================
# 1. IDENTIFY OUTLIERS USING MULTIPLE METHODS
# =============================================================================

# Filter for valid SAOCOM heights
valid_saocom = saocom_gdf[saocom_gdf['HEIGHT_ABSOLUTE_TIN'].notna()].copy()

heights = valid_saocom['HEIGHT_ABSOLUTE_TIN'].values

# Method 1: IQR Method (robust)
q1 = np.percentile(heights, 25)
q3 = np.percentile(heights, 75)
iqr = q3 - q1
lower_bound_iqr = q1 - 3 * iqr  # 3*IQR for extreme outliers
upper_bound_iqr = q3 + 3 * iqr

outliers_iqr = (heights < lower_bound_iqr) | (heights > upper_bound_iqr)

# Method 2: Z-score Method (assuming normal distribution)
z_scores = np.abs(stats.zscore(heights))
outliers_zscore = z_scores > 3  # 3 standard deviations

# Method 3: Modified Z-score (NMAD-based, more robust)
median_height = np.median(heights)
mad = np.median(np.abs(heights - median_height))
modified_z_scores = 0.6745 * (heights - median_height) / mad
outliers_nmad = np.abs(modified_z_scores) > 3.5

# Combine methods (point is outlier if flagged by at least 2 methods)
outlier_votes = outliers_iqr.astype(int) + outliers_zscore.astype(int) + outliers_nmad.astype(int)
is_outlier = outlier_votes >= 2

# Add outlier flag to GeoDataFrame
valid_saocom['is_outlier'] = is_outlier

# Separate normal and outlier points
normal_points = valid_saocom[~valid_saocom['is_outlier']]
outlier_points = valid_saocom[valid_saocom['is_outlier']]

# =============================================================================
# 2. CALCULATE OUTLIER STATISTICS
# =============================================================================
n_total = len(valid_saocom)
n_outliers = len(outlier_points)
pct_outliers = 100 * n_outliers / n_total

outlier_stats = {
    'n_total': n_total,
    'n_outliers': n_outliers,
    'pct_outliers': pct_outliers,
    'normal_mean': normal_points['HEIGHT_ABSOLUTE_TIN'].mean(),
    'normal_std': normal_points['HEIGHT_ABSOLUTE_TIN'].std(),
    'normal_min': normal_points['HEIGHT_ABSOLUTE_TIN'].min(),
    'normal_max': normal_points['HEIGHT_ABSOLUTE_TIN'].max(),
    'outlier_mean': outlier_points['HEIGHT_ABSOLUTE_TIN'].mean() if n_outliers > 0 else np.nan,
    'outlier_std': outlier_points['HEIGHT_ABSOLUTE_TIN'].std() if n_outliers > 0 else np.nan,
    'outlier_min': outlier_points['HEIGHT_ABSOLUTE_TIN'].min() if n_outliers > 0 else np.nan,
    'outlier_max': outlier_points['HEIGHT_ABSOLUTE_TIN'].max() if n_outliers > 0 else np.nan,
    'iqr_bounds': (lower_bound_iqr, upper_bound_iqr),
    'median': median_height,
    'q1': q1,
    'q3': q3
}
print(outlier_stats)
# =============================================================================
# 3. VISUALIZATION - SPATIAL MAP WITH OUTLIERS
# =============================================================================
fig, axes = plt.subplots(2, 2, figsize=(20, 18), facecolor='white')

extent = [xmin_grid, xmax_grid, ymin_grid, ymax_grid]

# Plot 1: Spatial Distribution with Outliers Highlighted
ax = axes[0, 0]
ax.set_facecolor('white')

# Background: Sentinel RGB
# ax.imshow(sentinel_rgb_norm, extent=extent, origin='upper', alpha=0.5)

# Normal points (small, semi-transparent)
normal_points.plot(ax=ax, markersize=0.5, color='#2E86AB', alpha=0.4,
                   label=f'Normal Points (n={len(normal_points):,})')

# Outlier points (larger, bright color)
if n_outliers > 0:
    outlier_points.plot(ax=ax, markersize=8, color='#E63946', alpha=0.9,
                        edgecolors='black', linewidth=0.3,
                        label=f'Outliers (n={n_outliers:,})')

# Study area boundary
hull_gdf.boundary.plot(ax=ax, color='black', linewidth=2, linestyle='--')

ax.set_xlabel('UTM Easting (m)', fontsize=11, color='black')
ax.set_ylabel('UTM Northing (m)', fontsize=11, color='black')
ax.set_title('SAOCOM Height Outliers - Spatial Distribution',
             fontweight='bold', fontsize=13, color='black')
ax.legend(loc='upper right', fontsize=10)
ax.grid(True, alpha=0.3, color='black', linewidth=0.5)

# Statistics box
stats_text = f"""Total Points: {n_total:,}
Outliers: {n_outliers:,} ({pct_outliers:.2f}%)

Detection Criteria:
- IQR bounds: [{lower_bound_iqr:.1f}, {upper_bound_iqr:.1f}] m
- Median: {median_height:.1f} m
- Q1-Q3: [{q1:.1f}, {q3:.1f}] m"""

ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=9,
        verticalalignment='top', fontfamily='monospace',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.9,
                 edgecolor='black'))

# Plot 2: Height Distribution with Outlier Thresholds
ax = axes[0, 1]
ax.set_facecolor('white')

# Histogram of all heights
ax.hist(normal_points['HEIGHT_ABSOLUTE_TIN'], bins=50, alpha=0.6,
        color='#2E86AB', edgecolor='black', label='Normal Points')
if n_outliers > 0:
    ax.hist(outlier_points['HEIGHT_ABSOLUTE_TIN'], bins=20, alpha=0.8,
            color='#E63946', edgecolor='black', label='Outliers')

# Add threshold lines
ax.axvline(lower_bound_iqr, color='orange', linestyle='--', linewidth=2,
           label=f'Lower Bound: {lower_bound_iqr:.1f}m')
ax.axvline(upper_bound_iqr, color='orange', linestyle='--', linewidth=2,
           label=f'Upper Bound: {upper_bound_iqr:.1f}m')
ax.axvline(median_height, color='green', linestyle='-', linewidth=2,
           label=f'Median: {median_height:.1f}m')

ax.set_xlabel('Height (m)', fontsize=11, color='black')
ax.set_ylabel('Frequency', fontsize=11, color='black')
ax.set_title('Height Distribution with Outlier Thresholds',
             fontweight='bold', fontsize=13, color='black')
ax.legend(loc='upper right', fontsize=9)
ax.grid(True, alpha=0.3, color='black', linewidth=0.5)
ax.tick_params(colors='black')

# Plot 3: Box Plot Comparison
ax = axes[1, 0]
ax.set_facecolor('white')

box_data = [normal_points['HEIGHT_ABSOLUTE_TIN'].dropna()]
box_labels = ['Normal\nPoints']

if n_outliers > 0:
    box_data.append(outlier_points['HEIGHT_ABSOLUTE_TIN'].dropna())
    box_labels.append('Outlier\nPoints')

bp = ax.boxplot(box_data, labels=box_labels, patch_artist=True,
                showmeans=True, meanline=True)

# Color the boxes
bp['boxes'][0].set_facecolor('#2E86AB')
bp['boxes'][0].set_alpha(0.6)
if n_outliers > 0:
    bp['boxes'][1].set_facecolor('#E63946')
    bp['boxes'][1].set_alpha(0.6)

ax.set_ylabel('Height (m)', fontsize=11, color='black')
ax.set_title('Height Distribution Comparison',
             fontweight='bold', fontsize=13, color='black')
ax.grid(True, alpha=0.3, color='black', linewidth=0.5, axis='y')
ax.tick_params(colors='black')

# Add statistics
stats_text = f"""Normal Points:
Mean: {outlier_stats['normal_mean']:.1f} m
Std: {outlier_stats['normal_std']:.1f} m
Range: [{outlier_stats['normal_min']:.1f}, {outlier_stats['normal_max']:.1f}]"""

if n_outliers > 0:
    stats_text += f"""

Outliers:
Mean: {outlier_stats['outlier_mean']:.1f} m
Std: {outlier_stats['outlier_std']:.1f} m
Range: [{outlier_stats['outlier_min']:.1f}, {outlier_stats['outlier_max']:.1f}]"""

ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=9,
        verticalalignment='top', fontfamily='monospace',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.9,
                 edgecolor='black'))

# Plot 4: Outlier Statistics Summary Table
ax = axes[1, 1]
ax.axis('off')

summary_text = f"""SAOCOM HEIGHT OUTLIER ANALYSIS SUMMARY

DETECTION METHODS:
- IQR Method (3×IQR): {np.sum(outliers_iqr):,} flagged
- Z-score Method (|z| > 3): {np.sum(outliers_zscore):,} flagged
- Modified Z-score (NMAD): {np.sum(outliers_nmad):,} flagged
- Combined (≥2 methods): {n_outliers:,} outliers

DATASET STATISTICS:
Total Points: {n_total:,}
Outlier Points: {n_outliers:,} ({pct_outliers:.2f}%)
Normal Points: {len(normal_points):,} ({100-pct_outliers:.2f}%)

NORMAL POINT STATISTICS:
Mean Height: {outlier_stats['normal_mean']:.2f} m
Std Deviation: {outlier_stats['normal_std']:.2f} m
Median: {median_height:.2f} m
Q1-Q3: [{q1:.2f}, {q3:.2f}] m
IQR: {iqr:.2f} m
Range: [{outlier_stats['normal_min']:.1f}, {outlier_stats['normal_max']:.1f}] m"""

if n_outliers > 0:
    summary_text += f"""

OUTLIER STATISTICS:
Mean Height: {outlier_stats['outlier_mean']:.2f} m
Std Deviation: {outlier_stats['outlier_std']:.2f} m
Range: [{outlier_stats['outlier_min']:.1f}, {outlier_stats['outlier_max']:.1f}] m

OUTLIER BOUNDS (3×IQR):
Lower Bound: {lower_bound_iqr:.2f} m
Upper Bound: {upper_bound_iqr:.2f} m"""

ax.text(0.1, 0.5, summary_text, transform=ax.transAxes, fontsize=10,
        verticalalignment='center', fontfamily='monospace',
        bbox=dict(boxstyle='round', facecolor='white', alpha=1.0,
                 edgecolor='black', linewidth=2))

plt.tight_layout()
plt.savefig(RESULTS_DIR / 'saocom_height_outliers.png',
            dpi=300, bbox_inches='tight', facecolor='white')
# =============================================================================
# COMPARISON: ALL POINTS vs OUTLIERS REMOVED
# =============================================================================
print("\n" + "="*80)
print("IMPACT OF OUTLIER REMOVAL ON SAOCOM HEIGHT STATISTICS")
print("="*80)

comparison_data = {
    'Metric': ['Count', 'Mean (m)', 'Median (m)', 'Std Dev (m)',
               'Min (m)', 'Max (m)', 'Range (m)', 'IQR (m)'],
    'All Points': [
        f"{n_total:,}",
        f"{heights.mean():.2f}",
        f"{np.median(heights):.2f}",
        f"{heights.std():.2f}",
        f"{heights.min():.2f}",
        f"{heights.max():.2f}",
        f"{heights.max() - heights.min():.2f}",
        f"{iqr:.2f}"
    ],
    'Outliers Removed': [
        f"{len(normal_points):,}",
        f"{outlier_stats['normal_mean']:.2f}",
        f"{median_height:.2f}",
        f"{outlier_stats['normal_std']:.2f}",
        f"{outlier_stats['normal_min']:.2f}",
        f"{outlier_stats['normal_max']:.2f}",
        f"{outlier_stats['normal_max'] - outlier_stats['normal_min']:.2f}",
        f"{iqr:.2f}"
    ],
    'Change': [
        f"-{n_outliers:,}",
        f"{outlier_stats['normal_mean'] - heights.mean():+.2f}",
        f"{0:.2f}",  # Median doesn't change much
        f"{outlier_stats['normal_std'] - heights.std():.2f}",
        f"{outlier_stats['normal_min'] - heights.min():+.2f}",
        f"{outlier_stats['normal_max'] - heights.max():+.2f}",
        f"{(outlier_stats['normal_max'] - outlier_stats['normal_min']) - (heights.max() - heights.min()):+.2f}",
        f"{0:.2f}"  # IQR is calculated from normal distribution
    ]
}

comparison_df = pd.DataFrame(comparison_data)
print(comparison_df.to_string(index=False))
print("="*80)
plt.show()

# =============================================================================
# PRINT SUMMARY
# =============================================================================
print("\n" + "="*70)
print("SAOCOM HEIGHT OUTLIER DETECTION SUMMARY")
print("="*70)
print(f"Total Points: {n_total:,}")
print(f"Outliers Detected: {n_outliers:,} ({pct_outliers:.2f}%)")
print(f"Normal Points: {len(normal_points):,} ({100-pct_outliers:.2f}%)")
print(f"\nOutlier Detection Bounds (3×IQR):")
print(f"  Lower: {lower_bound_iqr:.2f} m")
print(f"  Upper: {upper_bound_iqr:.2f} m")
print("="*70)
print("\nSaved: saocom_height_outliers.png")
# =============================================================================
# SCATTER PLOT COMPARISONS - POINT DENSITY
# =============================================================================
fig, axes = plt.subplots(2, 2, figsize=(18, 16), facecolor='white')

# Plot 1: SAOCOM vs TINITALY
# Plot 1: SAOCOM vs TINITALY
ax = axes[0, 0]
ax.set_facecolor('white')
valid_st = saocom_tinitaly_valid[['HEIGHT_ABSOLUTE_TIN', 'tinitaly_height']].dropna()
ax.scatter(valid_st['tinitaly_height'], valid_st['HEIGHT_ABSOLUTE_TIN'],
           s=1, alpha=0.3, c='steelblue', edgecolors='none')
min_val = min(valid_st['tinitaly_height'].min(), valid_st['HEIGHT_ABSOLUTE_TIN'].min())
max_val = max(valid_st['tinitaly_height'].max(), valid_st['HEIGHT_ABSOLUTE_TIN'].max())
ax.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='1:1 line')
ax.set_xlabel('TINITALY Height (m)', color='black', fontsize=11)
ax.set_ylabel('SAOCOM Height (m)', color='black', fontsize=11)
ax.set_title('SAOCOM vs TINITALY', fontweight='bold', fontsize=12, color='black')
ax.grid(True, color='black', alpha=0.3, linewidth=0.5)
ax.tick_params(colors='black')
ax.legend()
stats_text = f"""n = {saocom_tinitaly_metrics['n_points']:,}
Bias (ME) = {saocom_tinitaly_metrics['mean_diff']:.2f} m
RMSE = {saocom_tinitaly_metrics['rmse']:.2f} m
MAE = {saocom_tinitaly_metrics['mae']:.2f} m
Std Dev = {saocom_tinitaly_metrics['std_diff']:.2f} m
NMAD = {saocom_tinitaly_metrics['nmad']:.2f} m
Correlation (r) = {saocom_tinitaly_metrics['correlation']:.3f}"""
ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=9, verticalalignment='top',
        fontfamily='monospace', bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='black'))
for spine in ax.spines.values():
    spine.set_edgecolor('black')

# Plot 2: SAOCOM vs Copernicus
ax = axes[0, 1]
ax.set_facecolor('white')
valid_sc = saocom_copernicus_valid[['HEIGHT_ABSOLUTE_COP', 'copernicus_height']].dropna()
ax.scatter(valid_sc['copernicus_height'], valid_sc['HEIGHT_ABSOLUTE_COP'],
           s=1, alpha=0.3, c='steelblue', edgecolors='none')
min_val = min(valid_sc['copernicus_height'].min(), valid_sc['HEIGHT_ABSOLUTE_COP'].min())
max_val = max(valid_sc['copernicus_height'].max(), valid_sc['HEIGHT_ABSOLUTE_COP'].max())
ax.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='1:1 line')
ax.set_xlabel('Copernicus Height (m)', color='black', fontsize=11)
ax.set_ylabel('SAOCOM Height (m)', color='black', fontsize=11)
ax.set_title('SAOCOM vs Copernicus', fontweight='bold', fontsize=12, color='black')
ax.grid(True, color='black', alpha=0.3, linewidth=0.5)
ax.tick_params(colors='black')
ax.legend()
stats_text = f"""n = {saocom_copernicus_metrics['n_points']:,}
Bias (ME) = {saocom_copernicus_metrics['mean_diff']:.2f} m
RMSE = {saocom_copernicus_metrics['rmse']:.2f} m
MAE = {saocom_copernicus_metrics['mae']:.2f} m
Std Dev = {saocom_copernicus_metrics['std_diff']:.2f} m
NMAD = {saocom_copernicus_metrics['nmad']:.2f} m
Correlation (r) = {saocom_copernicus_metrics['correlation']:.3f}"""
ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=9, verticalalignment='top',
        fontfamily='monospace', bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='black'))
for spine in ax.spines.values():
    spine.set_edgecolor('black')

# Plot 3: Copernicus vs TINITALY
ax = axes[1, 0]
ax.set_facecolor('white')
ax.scatter(valid_copernicus, valid_tinitaly, s=1, alpha=0.3, c='steelblue', edgecolors='none')
min_val = min(valid_copernicus.min(), valid_tinitaly.min())
max_val = max(valid_copernicus.max(), valid_tinitaly.max())
ax.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='1:1 line')
ax.set_xlabel('Copernicus Height (m)', color='black', fontsize=11)
ax.set_ylabel('TINITALY Height (m)', color='black', fontsize=11)
ax.set_title('TINITALY vs Copernicus', fontweight='bold', fontsize=12, color='black')
ax.grid(True, color='black', alpha=0.3, linewidth=0.5)
ax.tick_params(colors='black')
ax.legend()
stats_text = f"""n = {ref_metrics['n_pixels']:,}
Bias (ME) = {ref_metrics['mean_diff']:.2f} m
RMSE = {ref_metrics['rmse']:.2f} m
MAE = {ref_metrics['mae']:.2f} m
Std Dev = {ref_metrics['std_diff']:.2f} m
NMAD = {ref_metrics['nmad']:.2f} m
Correlation (r) = {ref_metrics['correlation']:.3f}"""
ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=9, verticalalignment='top',
        fontfamily='monospace', bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='black'))
for spine in ax.spines.values():
    spine.set_edgecolor('black')

# Hide unused subplot
axes[1, 1].axis('off')

plt.tight_layout()
plt.show()

In [None]:
# =============================================================================
# VOID ZONES vs LAND COVER ANALYSIS
# =============================================================================

# =============================================================================
# 1. CALCULATE VOID STATISTICS BY LAND COVER
# =============================================================================
void_lc_stats = []

for lc_code in np.unique(corine_10m[corine_10m > 0]):
    lc_mask = (corine_10m == lc_code) & study_area_mask

    total_lc_cells = np.sum(lc_mask)
    void_lc_cells = np.sum(lc_mask & void_mask)

    if total_lc_cells == 0:
        continue

    pct_lc_is_void = 100 * void_lc_cells / total_lc_cells
    pct_of_total_voids = 100 * void_lc_cells / np.sum(void_mask)

    void_lc_stats.append({
        'LC_Code': lc_code,
        'LC_Name': CORINE_CLASSES.get(lc_code, f'Unknown_{lc_code}'),
        'Total_Cells': total_lc_cells,
        'Void_Cells': void_lc_cells,
        'Area_km2': total_lc_cells * (GRID_SIZE**2) / 1e6,
        'Void_Area_km2': void_lc_cells * (GRID_SIZE**2) / 1e6,
        'Pct_LC_is_Void': pct_lc_is_void,
        'Pct_of_Total_Voids': pct_of_total_voids
    })

void_lc_df = pd.DataFrame(void_lc_stats).sort_values('Pct_LC_is_Void', ascending=False)

# Display table
print(f"\n{'='*120}")
print("VOID ZONES BY LAND COVER CLASS")
print(f"{'='*120}")
print(void_lc_df[['LC_Code', 'LC_Name', 'Area_km2', 'Void_Area_km2', 'Pct_LC_is_Void', 'Pct_of_Total_Voids']].to_string(index=False))
print(f"{'='*120}\n")

# =============================================================================
# 2. MAP: VOID ZONES WITH LAND COVER OVERLAY
# =============================================================================
fig, ax = plt.subplots(1, 1, figsize=(16, 12), facecolor='white')
ax.set_facecolor('white')

extent = [xmin_grid, xmax_grid, ymin_grid, ymax_grid]

# Sentinel background
ax.imshow(sentinel_rgb_norm, extent=extent, origin='upper', alpha=0.6)

# Land cover in void zones only
lc_in_voids = corine_10m.copy()
lc_in_voids[~void_mask] = 0
lc_display = np.ma.masked_where(lc_in_voids == 0, lc_in_voids)

void_codes = np.unique(lc_display.compressed())

if len(void_codes) > 0:
    colors_list = [CORINE_COLORS_MPL.get(code, (0.5, 0.5, 0.5)) for code in void_codes]
    cmap = ListedColormap(colors_list)
    norm = BoundaryNorm(boundaries=np.append(void_codes, void_codes[-1]+1) - 0.5,
                        ncolors=len(void_codes))

    im = ax.imshow(lc_display, cmap=cmap, norm=norm, origin='upper',
                  extent=extent, alpha=0.7)

# Study area boundary
hull_gdf.boundary.plot(ax=ax, color='black', linewidth=2.5, linestyle='--')

# Statistics box
void_area = np.sum(void_mask) * (GRID_SIZE**2) / 1e6
total_area = np.sum(study_area_mask) * (GRID_SIZE**2) / 1e6
pct_void = 100 * np.sum(void_mask) / np.sum(study_area_mask)

stats_text = f"""Void Area: {void_area:.2f} km²
Total Area: {total_area:.2f} km²
Void %: {pct_void:.1f}%
LC Classes: {len(void_codes)}"""

ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=11,
        verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white',
        alpha=0.9, edgecolor='black'))

# Legend
if len(void_codes) > 0:
    legend_elements = [mpatches.Rectangle((0,0),1,1,
                                         facecolor=CORINE_COLORS_MPL[code],
                                         edgecolor='black', linewidth=0.5,
                                         label=f"{code}: {CORINE_CLASSES.get(code, 'Unknown')}")
                      for code in sorted(void_codes)]

    ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5),
             fontsize=9, frameon=True, fancybox=False, edgecolor='black',
             title='Land Cover in Void Zones')

ax.set_title('Land Cover Distribution in SAOCOM Void Zones',
             fontweight='bold', fontsize=14, pad=15)
ax.set_xlabel('UTM Easting (m)', fontsize=11)
ax.set_ylabel('UTM Northing (m)', fontsize=11)
ax.grid(True, alpha=0.3, color='gray', linewidth=0.5)

plt.tight_layout()
plt.savefig(RESULTS_DIR / 'voids_by_landcover_map.png', dpi=300, bbox_inches='tight', facecolor='white')
plt.show()

print("Saved: voids_by_landcover_map.png")

# =============================================================================
# 3. BAR CHART: WHICH LAND COVERS HAVE THE MOST VOIDS
# =============================================================================
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 8), facecolor='white')

# Chart 1: % of each land cover that is void
top_pct = void_lc_df.nlargest(15, 'Pct_LC_is_Void')
bars1 = ax1.barh(range(len(top_pct)), top_pct['Pct_LC_is_Void'])

for i, (_, row) in enumerate(top_pct.iterrows()):
    bars1[i].set_color(CORINE_COLORS_MPL.get(row['LC_Code'], (0.5, 0.5, 0.5)))
    bars1[i].set_edgecolor('black')
    bars1[i].set_linewidth(0.5)

ax1.set_yticks(range(len(top_pct)))
ax1.set_yticklabels([f"{row['LC_Code']}: {row['LC_Name'][:35]}"
                      for _, row in top_pct.iterrows()], fontsize=9)
ax1.set_xlabel('% of Land Cover Class that is Void', fontsize=11)
ax1.set_title('Land Covers with Highest Void Percentage\n(Worst Coverage Performance)',
              fontweight='bold', fontsize=12)
ax1.grid(axis='x', alpha=0.3, linestyle='--')
ax1.axvline(pct_void, color='red', linestyle='--', linewidth=2,
            label=f'Overall Void Rate: {pct_void:.1f}%')
ax1.legend()

# Chart 2: Contribution to total voids
top_contrib = void_lc_df.nlargest(15, 'Pct_of_Total_Voids')
bars2 = ax2.barh(range(len(top_contrib)), top_contrib['Pct_of_Total_Voids'])

for i, (_, row) in enumerate(top_contrib.iterrows()):
    bars2[i].set_color(CORINE_COLORS_MPL.get(row['LC_Code'], (0.5, 0.5, 0.5)))
    bars2[i].set_edgecolor('black')
    bars2[i].set_linewidth(0.5)

ax2.set_yticks(range(len(top_contrib)))
ax2.set_yticklabels([f"{row['LC_Code']}: {row['LC_Name'][:35]}"
                      for _, row in top_contrib.iterrows()], fontsize=9)
ax2.set_xlabel('% of Total Void Area', fontsize=11)
ax2.set_title('Land Covers Contributing Most to Total Voids\n(Largest Void Areas)',
              fontweight='bold', fontsize=12)
ax2.grid(axis='x', alpha=0.3, linestyle='--')

plt.tight_layout()
plt.savefig(RESULTS_DIR / 'voids_by_landcover_charts.png', dpi=300, bbox_inches='tight', facecolor='white')
plt.show()

print("Saved: voids_by_landcover_charts.png")
print("\nVoid analysis complete!")

In [None]:
# =============================================================================
# VOID ZONES vs LAND COVER - "SWISS CHEESE" VISUALIZATION
# =============================================================================

# =============================================================================
# LAND COVER WITH VOIDS - OUTLINED POLYGONS VERSION
# =============================================================================



fig, ax = plt.subplots(1, 1, figsize=(16, 12), facecolor='white')
ax.set_facecolor('white')

extent = [xmin_grid, xmax_grid, ymin_grid, ymax_grid]

# Get unique land cover classes
display_codes = np.unique(corine_10m[(corine_10m > 0) & ~void_mask])

print("Vectorizing land cover polygons (this may take a moment)...")
im = ax.imshow(corine_display, cmap=cmap, norm=norm, origin='upper', extent=extent, alpha = 0.2)

# Process each land cover class
for lc_code in sorted(display_codes):
    # Create mask: this land cover AND has SAOCOM coverage (not void)
    lc_with_coverage = (corine_10m == lc_code) & (~void_mask)

    # Vectorize to get boundaries
    mask_shapes = shapes(lc_with_coverage.astype(np.uint8),
                        mask=lc_with_coverage,
                        transform=target_transform)

    # Convert to shapely polygons
    polys = [shape(geom) for geom, val in mask_shapes if val == 1]

    # Get color for this land cover
    fill_color = CORINE_COLORS_MPL.get(lc_code, (0.5, 0.5, 0.5))

    # Draw each polygon
    for poly in polys:
        if poly.is_valid:
            x, y = poly.exterior.xy

            # Very faint fill
            ax.fill(x, y, color=fill_color, alpha=0.15, edgecolor='none', zorder=1)

            # Colored outline
            ax.plot(x, y, color=fill_color, linewidth=1.5, alpha=0.9, zorder=2)

# Study area boundary (on top)
hull_gdf.boundary.plot(ax=ax, color='black', linewidth=2.5, linestyle='--', zorder=3)

# Statistics
void_area = np.sum(void_mask) * (GRID_SIZE**2) / 1e6
total_area = np.sum(study_area_mask) * (GRID_SIZE**2) / 1e6
pct_void = 100 * np.sum(void_mask) / np.sum(study_area_mask)

stats_text = f"""Void Area: {void_area:.2f} km²
Coverage Area: {total_area - void_area:.2f} km²
Void %: {pct_void:.1f}%
Coverage %: {100 - pct_void:.1f}%"""

ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=13,
        verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white',
        alpha=0.9, edgecolor='black'), zorder=4)
# Legend
legend_elements = []
for code in sorted(display_codes):
    color = CORINE_COLORS_MPL.get(code, (0.5, 0.5, 0.5))
    legend_elements.append(mpatches.Rectangle((0,0),1,1,
                                             facecolor=color,
                                             edgecolor=color,
                                             alpha=0.3,
                                             linewidth=2,
                                             label=f"{code}: {CORINE_CLASSES.get(code, 'Unknown')}"))

legend_elements.append(mpatches.Rectangle((0,0),1,1,
                                         facecolor='white',
                                         edgecolor='gray',
                                         linewidth=1,
                                         label='VOID (No SAOCOM Data)'))

ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5),
         fontsize=13, frameon=True, fancybox=False, edgecolor='black',
         title='Land Cover Classes')

ax.set_title('CORINE Land Cover with SAOCOM Void Zones\n(White areas = No coverage)',
             fontweight='bold', fontsize=14, pad=15)
ax.set_xlabel('UTM Easting (m)', fontsize=11)
ax.set_ylabel('UTM Northing (m)', fontsize=11)
ax.set_xlim(extent[0], extent[1])
ax.set_ylim(extent[2], extent[3])
ax.grid(True, alpha=0.3, color='gray', linewidth=0.5)

plt.tight_layout()
plt.savefig(RESULTS_DIR / 'landcover_with_voids_outlined.png', dpi=300, bbox_inches='tight', facecolor='white')
plt.show()

print("Saved: landcover_with_voids_outlined.png")

# =============================================================================
# 3. BAR CHART WITH REFERENCE LINES
# =============================================================================
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 8), facecolor='white')

# Chart 1: % of each land cover that is void
top_pct = void_lc_df.nlargest(15, 'Pct_LC_is_Void')
bars1 = ax1.barh(range(len(top_pct)), top_pct['Pct_LC_is_Void'])

for i, (_, row) in enumerate(top_pct.iterrows()):
    bars1[i].set_color(CORINE_COLORS_MPL.get(row['LC_Code'], (0.5, 0.5, 0.5)))
    bars1[i].set_edgecolor('black')
    bars1[i].set_linewidth(0.5)

ax1.set_yticks(range(len(top_pct)))
ax1.set_yticklabels([f"{row['LC_Code']}: {row['LC_Name'][:35]}"
                      for _, row in top_pct.iterrows()], fontsize=9)
ax1.set_xlabel('% of Land Cover Class that is Void', fontsize=11)
ax1.set_title('Land Covers with Highest Void Percentage\n(Worst Coverage Performance)',
              fontweight='bold', fontsize=12)
ax1.grid(axis='x', alpha=0.3, linestyle='--')

# Add reference lines
ax1.axvline(100, color='black', linestyle='-', linewidth=2, label='100% (Total Void)', zorder=3)
for pct in [20, 40, 60, 80]:
    ax1.axvline(pct, color='gray', linestyle='--', linewidth=1.5, alpha=0.7, zorder=3)
    ax1.text(pct, -0.5, f'{pct}%', ha='center', fontsize=9, color='gray')

ax1.set_xlim(0, 105)
# ax1.legend(loc='lower right')

# Chart 2: Contribution to total voids
top_contrib = void_lc_df.nlargest(15, 'Pct_of_Total_Voids')
bars2 = ax2.barh(range(len(top_contrib)), top_contrib['Pct_of_Total_Voids'])

for i, (_, row) in enumerate(top_contrib.iterrows()):
    bars2[i].set_color(CORINE_COLORS_MPL.get(row['LC_Code'], (0.5, 0.5, 0.5)))
    bars2[i].set_edgecolor('black')
    bars2[i].set_linewidth(0.5)

ax2.set_yticks(range(len(top_contrib)))
ax2.set_yticklabels([f"{row['LC_Code']}: {row['LC_Name'][:35]}"
                      for _, row in top_contrib.iterrows()], fontsize=9)
ax2.set_xlabel('% of Total Void Area', fontsize=11)
ax2.set_title('Land Covers Contributing Most to Total Voids\n(Largest Void Areas)',
              fontweight='bold', fontsize=12)
ax2.grid(axis='x', alpha=0.3, linestyle='--')

plt.tight_layout()
plt.savefig(RESULTS_DIR / 'voids_by_landcover_charts.png', dpi=300, bbox_inches='tight', facecolor='white')
plt.show()

print("Saved: voids_by_landcover_charts.png")
print("\nVoid analysis complete!")

In [None]:
# =============================================================================
# INDIVIDUAL LAND COVER MAPS WITH VOID VISUALIZATION
# Combines outlined polygons with void zone identification
# =============================================================================

from matplotlib.patches import Patch
from shapely.geometry import shape
from rasterio.features import shapes
import numpy as np
import matplotlib.pyplot as plt

# Get unique classes present in data
unique_classes = np.unique(corine_10m[corine_10m > 0])

print(f"Processing {len(unique_classes)} land cover classes...")

# Create one map per class with void visualization
for lc_code in sorted(unique_classes):
    fig, ax = plt.subplots(1, 1, figsize=(14, 10), facecolor='white')
    ax.set_facecolor('white')

    # Display Sentinel RGB as background
    ax.imshow(sentinel_rgb_norm, extent=[xmin_grid, xmax_grid, ymin_grid, ymax_grid],
              origin='upper', alpha=0.4)  # Lower alpha for better overlay visibility

    # Get color for this land cover class
    fill_color = tuple(c/255 for c in CORINE_COLORS.get(lc_code, (128, 128, 128)))

    # Create masks
    lc_mask = (corine_10m == lc_code)  # All areas of this land cover
    lc_with_coverage = lc_mask & (~void_mask)  # Areas WITH SAOCOM coverage
    lc_void = lc_mask & void_mask  # Areas WITHOUT SAOCOM coverage (voids)

    # =======================
    # 1. Draw COVERAGE areas (with data)
    # =======================
    if np.any(lc_with_coverage):
        # Vectorize coverage areas
        coverage_shapes = shapes(lc_with_coverage.astype(np.uint8),
                               mask=lc_with_coverage,
                               transform=target_transform)
        coverage_polys = [shape(geom) for geom, val in coverage_shapes if val == 1]

        for poly in coverage_polys:
            if poly.is_valid:
                x, y = poly.exterior.xy

                # Fill with semi-transparent color and hatching
                ax.fill(x, y, color=fill_color, alpha=0.35,
                       edgecolor='none', hatch='///', linewidth=0)

                # Black outline for definition
                ax.plot(x, y, color='black', linewidth=2.0, alpha=0.9)

                # Colored inner outline
                ax.plot(x, y, color=fill_color, linewidth=1.2, alpha=1.0)

    # =======================
    # 2. Draw VOID areas (no data)
    # =======================
    if np.any(lc_void):
        # Vectorize void areas
        void_shapes = shapes(lc_void.astype(np.uint8),
                           mask=lc_void,
                           transform=target_transform)
        void_polys = [shape(geom) for geom, val in void_shapes if val == 1]

        for poly in void_polys:
            if poly.is_valid:
                x, y = poly.exterior.xy

                # White fill with dots for voids
                ax.fill(x, y, color='white', alpha=0.8,
                       edgecolor='none', hatch='....', linewidth=0)

                # Red dashed outline for void areas
                ax.plot(x, y, color='red', linewidth=1.5,
                       linestyle='--', alpha=0.8)

                # Gray secondary outline
                ax.plot(x, y, color='gray', linewidth=0.8,
                       linestyle='--', alpha=0.6)

    # Add study area boundary
    hull_gdf.boundary.plot(ax=ax, color='black', linewidth=3,
                           linestyle='-', alpha=0.8)

    # =======================
    # Calculate statistics
    # =======================
    lc_total_pixels = np.sum(lc_mask)
    lc_coverage_pixels = np.sum(lc_with_coverage)
    lc_void_pixels = np.sum(lc_void)

    total_area_km2 = lc_total_pixels * (GRID_SIZE**2) / 1e6
    coverage_area_km2 = lc_coverage_pixels * (GRID_SIZE**2) / 1e6
    void_area_km2 = lc_void_pixels * (GRID_SIZE**2) / 1e6

    pct_total = 100 * lc_total_pixels / np.sum(corine_10m > 0) if np.sum(corine_10m > 0) > 0 else 0
    pct_coverage = 100 * lc_coverage_pixels / lc_total_pixels if lc_total_pixels > 0 else 0
    pct_void = 100 * lc_void_pixels / lc_total_pixels if lc_total_pixels > 0 else 0

    # Title with comprehensive statistics
    class_name = CORINE_CLASSES.get(lc_code, f'Class {lc_code}')
    ax.set_title(f'Land Cover: {class_name} (Code {lc_code})\n'
                 f'Total: {total_area_km2:.1f} km² ({pct_total:.1f}% of study area) | '
                 f'Coverage: {coverage_area_km2:.1f} km² ({pct_coverage:.1f}%) | '
                 f'Void: {void_area_km2:.1f} km² ({pct_void:.1f}%)',
                 fontweight='bold', fontsize=12, pad=15)

    ax.set_xlabel('UTM Easting (m)', fontsize=11)
    ax.set_ylabel('UTM Northing (m)', fontsize=11)
    ax.grid(True, alpha=0.3, color='gray', linewidth=0.5)

    # Statistics box
    stats_text = f"""Coverage: {pct_coverage:.1f}%
Void: {pct_void:.1f}%
Area w/ data: {coverage_area_km2:.1f} km²
Area w/o data: {void_area_km2:.1f} km²"""

    ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=10,
            verticalalignment='top',
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.9,
                     edgecolor='black', linewidth=1))

    # Legend
    legend_elements = [
        Patch(facecolor=fill_color, edgecolor='black', linewidth=2,
              alpha=0.35, hatch='///', label=f'{class_name} (Coverage)'),
        Patch(facecolor='white', edgecolor='red', linewidth=1.5,
              alpha=0.8, hatch='....', linestyle='--',
              label=f'{class_name} (Void/No Data)'),
        Patch(facecolor='none', edgecolor='black', linewidth=3,
              label='Study Area Boundary')
    ]

    ax.legend(handles=legend_elements, loc='upper right', fontsize=10,
              frameon=True, fancybox=False, edgecolor='black',
              title='Legend', title_fontsize=10)

    plt.tight_layout()

    # Save with descriptive filename
    safe_name = class_name.replace(' ', '_').replace(',', '').replace('/', '_')
    filename = f'landcover_{lc_code}_{safe_name}_coverage_void.png'
    plt.savefig(RESULTS_DIR / filename, dpi=300, bbox_inches='tight',
                facecolor='white')
    plt.show()
    plt.close()

    print(f"Saved: {filename}")
    print(f"  - Total area: {total_area_km2:.1f} km²")
    print(f"  - Coverage: {pct_coverage:.1f}% ({coverage_area_km2:.1f} km²)")
    print(f"  - Void: {pct_void:.1f}% ({void_area_km2:.1f} km²)\n")

print(f"\n✓ Generated {len(unique_classes)} land cover maps with coverage/void visualization")
print(f"All maps saved to: {RESULTS_DIR}")

# =============================================================================
# SUMMARY TABLE
# =============================================================================
print("\n" + "="*70)
print("LAND COVER COVERAGE SUMMARY")
print("="*70)

summary_data = []
for lc_code in sorted(unique_classes):
    lc_mask = (corine_10m == lc_code)
    lc_coverage = np.sum(lc_mask & (~void_mask))
    lc_void = np.sum(lc_mask & void_mask)
    lc_total = np.sum(lc_mask)

    if lc_total > 0:
        pct_coverage = 100 * lc_coverage / lc_total
        pct_void = 100 * lc_void / lc_total

        summary_data.append({
            'Code': lc_code,
            'Name': CORINE_CLASSES.get(lc_code, 'Unknown')[:30],
            'Coverage%': pct_coverage,
            'Void%': pct_void,
            'Total_km2': lc_total * (GRID_SIZE**2) / 1e6
        })

# Sort by void percentage (worst coverage first)
summary_sorted = sorted(summary_data, key=lambda x: x['Void%'], reverse=True)

print(f"{'Code':<6} {'Name':<32} {'Coverage%':<12} {'Void%':<10} {'Total km²':<10}")
print("-"*70)
for item in summary_sorted:
    print(f"{item['Code']:<6} {item['Name']:<32} {item['Coverage%']:>10.1f}% "
          f"{item['Void%']:>8.1f}% {item['Total_km2']:>9.1f}")