# CSB-FOSS: Validation and Comparison

Compare CSB-FOSS output with existing official CSB to validate the implementation.

In [None]:
import sys
from pathlib import Path
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.ops import unary_union

sys.path.insert(0, str(Path.cwd().parent / 'src'))

print("Ready for validation!")

## 1. Load Data

In [None]:
# Paths - update these with your actual files
FOSS_BASELINE = Path("../output/baseline_test/csb_baseline_test.gpkg")
FOSS_EXPERIMENTAL = Path("../output/experimental_test/experimental_cdl_watershed_10m.gpkg")
OFFICIAL_CSB = Path("/path/to/official/csb.gpkg")  # Update this!

# Load FOSS outputs
foss_outputs = {}

if FOSS_BASELINE.exists():
    foss_outputs['baseline'] = gpd.read_file(FOSS_BASELINE)
    print(f"Loaded baseline: {len(foss_outputs['baseline'])} polygons")

if FOSS_EXPERIMENTAL.exists():
    foss_outputs['experimental'] = gpd.read_file(FOSS_EXPERIMENTAL)
    print(f"Loaded experimental: {len(foss_outputs['experimental'])} polygons")

# Load official CSB if available
official_csb = None
if OFFICIAL_CSB.exists():
    official_csb = gpd.read_file(OFFICIAL_CSB)
    print(f"Loaded official CSB: {len(official_csb)} polygons")
else:
    print("Official CSB not found - skipping comparison with official")

## 2. Calculate Metrics

In [None]:
def calculate_metrics(gdf, name="Dataset"):
    """Calculate standard metrics for a CSB dataset."""
    metrics = {
        'name': name,
        'polygon_count': len(gdf),
        'total_area_ha': gdf.geometry.area.sum() / 10000,
        'mean_area_ha': gdf.geometry.area.mean() / 10000,
        'median_area_ha': gdf.geometry.area.median() / 10000,
        'min_area_ha': gdf.geometry.area.min() / 10000,
        'max_area_ha': gdf.geometry.area.max() / 10000,
        'mean_roughness': (gdf.geometry.length / np.sqrt(gdf.geometry.area)).mean(),
        'total_perimeter_km': gdf.geometry.length.sum() / 1000,
    }
    return metrics

# Calculate metrics for each dataset
all_metrics = []

for name, gdf in foss_outputs.items():
    metrics = calculate_metrics(gdf, f"FOSS {name}")
    all_metrics.append(metrics)

if official_csb is not None:
    metrics = calculate_metrics(official_csb, "Official CSB")
    all_metrics.append(metrics)

# Display as table
import pandas as pd
metrics_df = pd.DataFrame(all_metrics).set_index('name')
metrics_df

## 3. Visual Comparison

In [None]:
# Side-by-side comparison
n_plots = len(foss_outputs) + (1 if official_csb is not None else 0)
fig, axes = plt.subplots(1, n_plots, figsize=(6 * n_plots, 6))

if n_plots == 1:
    axes = [axes]

i = 0
for name, gdf in foss_outputs.items():
    gdf.plot(ax=axes[i], facecolor='lightblue', edgecolor='blue', linewidth=0.5)
    axes[i].set_title(f"FOSS {name}\n({len(gdf)} polygons)")
    axes[i].axis('equal')
    i += 1

if official_csb is not None:
    official_csb.plot(ax=axes[i], facecolor='lightgreen', edgecolor='green', linewidth=0.5)
    axes[i].set_title(f"Official CSB\n({len(official_csb)} polygons)")
    axes[i].axis('equal')

plt.tight_layout()
plt.show()

## 4. Area Distribution Comparison

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

for name, gdf in foss_outputs.items():
    areas = gdf.geometry.area / 10000  # hectares
    ax.hist(areas, bins=50, alpha=0.5, label=f"FOSS {name}", density=True)

if official_csb is not None:
    areas = official_csb.geometry.area / 10000
    ax.hist(areas, bins=50, alpha=0.5, label="Official CSB", density=True)

ax.set_xlabel("Area (hectares)")
ax.set_ylabel("Density")
ax.set_title("Polygon Area Distribution")
ax.legend()
ax.set_xlim(0, 100)  # Focus on smaller polygons

plt.tight_layout()
plt.show()

## 5. Intersection Over Union (IoU)

In [None]:
def calculate_iou(gdf1, gdf2):
    """Calculate IoU between two polygon datasets."""
    union1 = unary_union(gdf1.geometry)
    union2 = unary_union(gdf2.geometry)
    
    intersection = union1.intersection(union2)
    union = union1.union(union2)
    
    return intersection.area / union.area

# Calculate IoU between datasets
print("Intersection over Union (IoU):")

if 'baseline' in foss_outputs and 'experimental' in foss_outputs:
    iou = calculate_iou(foss_outputs['baseline'], foss_outputs['experimental'])
    print(f"  Baseline vs Experimental: {iou:.4f}")

if official_csb is not None:
    for name, gdf in foss_outputs.items():
        iou = calculate_iou(gdf, official_csb)
        print(f"  FOSS {name} vs Official: {iou:.4f}")

## 6. Boundary Roughness Comparison

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

for name, gdf in foss_outputs.items():
    roughness = gdf.geometry.length / np.sqrt(gdf.geometry.area)
    ax.hist(roughness, bins=50, alpha=0.5, label=f"FOSS {name}", density=True)

if official_csb is not None:
    roughness = official_csb.geometry.length / np.sqrt(official_csb.geometry.area)
    ax.hist(roughness, bins=50, alpha=0.5, label="Official CSB", density=True)

ax.set_xlabel("Roughness (perimeter / sqrt(area))")
ax.set_ylabel("Density")
ax.set_title("Boundary Roughness Distribution")
ax.legend()

plt.tight_layout()
plt.show()

## 7. Summary Report

In [None]:
print("="*60)
print("CSB-FOSS VALIDATION SUMMARY")
print("="*60)

for name, gdf in foss_outputs.items():
    print(f"\n{name.upper()}:")
    print(f"  Polygons: {len(gdf):,}")
    print(f"  Total area: {gdf.geometry.area.sum()/10000:,.1f} ha")
    print(f"  Mean polygon size: {gdf.geometry.area.mean()/10000:.2f} ha")
    print(f"  Mean roughness: {(gdf.geometry.length/np.sqrt(gdf.geometry.area)).mean():.2f}")

if official_csb is not None:
    print(f"\nOFFICIAL CSB:")
    print(f"  Polygons: {len(official_csb):,}")
    print(f"  Total area: {official_csb.geometry.area.sum()/10000:,.1f} ha")

print("\n" + "="*60)