In [None]:
# Install geospatial vector packages
!pip install geopandas fiona shapely matplotlib contextily rasterio scikit-image

# Vector Geospatial Tutorial - From Arrays to Shapefiles

**The Vector Geospatial Stack:**
- **Fiona**: Low-level vector I/O (read/write shapefiles, GeoJSON, etc.)
- **Shapely**: Geometric operations (polygons, intersections, buffers)
- **GeoPandas**: High-level spatial data analysis ("pandas for GIS")

Perfect for: converting raster patterns to vectors, spatial analysis, GIS workflows.

In [None]:
import numpy as np
import shapely
import matplotlib.pyplot as plt
import geopandas as gpd
import fiona
from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.ops import unary_union
import rasterio
from rasterio.features import shapes
from rasterio.transform import from_bounds
from skimage import measure
import pandas as pd
import os
from pathlib import Path
import contextily as ctx
import warnings
warnings.filterwarnings('ignore')

print(f"GeoPandas version: {gpd.__version__}")
print(f"Fiona version: {fiona.__version__}")
print(f"Shapely version: {shapely.__version__}")

# Create output directory
output_dir = Path('vector_outputs')
output_dir.mkdir(exist_ok=True)
print(f"Output directory: {output_dir.absolute()}")

## 🎨 Creating 2D Pattern Array

In [None]:
# Create interesting 2D pattern for polygon extraction
def create_pattern_array(width=800, height=600, bounds=(-10, 40, -5, 45)):
    """Create 2D array with interesting patterns for polygon extraction"""
    np.random.seed(42)
    
    # Create coordinate arrays
    x = np.linspace(bounds[0], bounds[2], width)
    y = np.linspace(bounds[1], bounds[3], height)
    X, Y = np.meshgrid(x, y)
    
    # Base pattern with multiple scales
    pattern = np.zeros_like(X)
    
    # Large circular features ("islands")
    center1 = (-8.5, 42.5)
    center2 = (-6.5, 43.8)
    center3 = (-7.2, 41.2)
    
    island1 = np.exp(-((X - center1[0])**2 + (Y - center1[1])**2) / 0.3)
    island2 = np.exp(-((X - center2[0])**2 + (Y - center2[1])**2) / 0.2)
    island3 = np.exp(-((X - center3[0])**2 + (Y - center3[1])**2) / 0.4)
    
    pattern += island1 + island2 + island3
    
    # Linear features ("rivers" or "ridges")
    ridge1 = 0.8 * np.exp(-((X + 8)**2 + (Y - 2*X - 90)**2) / 0.05)
    ridge2 = 0.6 * np.exp(-((Y - 43)**2 + (X + 7.5)**2) / 0.02)
    
    pattern += ridge1 + ridge2
    
    # Complex wave pattern
    waves = 0.4 * np.sin(X * 3) * np.cos(Y * 2) + 0.3 * np.sin(X * 8) * np.sin(Y * 5)
    waves = np.maximum(waves, 0)  # Only positive parts
    pattern += waves
    
    # Random speckle pattern
    speckle = 0.5 * np.random.exponential(0.3, (height, width))
    pattern += speckle
    
    # Smooth boundaries
    from scipy import ndimage
    pattern = ndimage.gaussian_filter(pattern, sigma=2)
    
    # Normalize to 0-1 range
    pattern = (pattern - pattern.min()) / (pattern.max() - pattern.min())
    
    return pattern, bounds

# Generate pattern
pattern_data, bounds = create_pattern_array(800, 600, bounds=(-10, 40, -5, 45))
print(f"Pattern array created: {pattern_data.shape}")
print(f"Value range: {pattern_data.min():.3f} to {pattern_data.max():.3f}")
print(f"Geographic bounds: {bounds} (lon_min, lat_min, lon_max, lat_max)")

# Create geospatial transform
transform = from_bounds(*bounds, pattern_data.shape[1], pattern_data.shape[0])
print(f"Pixel size: {abs(transform.a):.6f}° × {abs(transform.e):.6f}°")

# Visualize the pattern
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Full pattern
im1 = axes[0].imshow(pattern_data, cmap='viridis', extent=bounds, aspect='equal')
axes[0].set_title('2D Pattern Array (Full Range)')
axes[0].set_xlabel('Longitude (°)')
axes[0].set_ylabel('Latitude (°)')
plt.colorbar(im1, ax=axes[0], label='Pattern Value')

# Threshold visualization (>0.7)
threshold = 0.7
binary_pattern = pattern_data > threshold
im2 = axes[1].imshow(binary_pattern, cmap='RdYlBu_r', extent=bounds, aspect='equal')
axes[1].set_title(f'Thresholded Pattern (Values > {threshold})')
axes[1].set_xlabel('Longitude (°)')
axes[1].set_ylabel('Latitude (°)')
plt.colorbar(im2, ax=axes[1], label='Above Threshold')

plt.tight_layout()
plt.show()

high_value_pixels = np.sum(binary_pattern)
total_pixels = pattern_data.size
print(f"\nPixels above threshold ({threshold}): {high_value_pixels:,} ({high_value_pixels/total_pixels*100:.1f}%)")

## 🔍 Converting Array to Polygons

In [None]:
# Convert thresholded array to polygons using rasterio.features
print("Converting raster pattern to vector polygons...")

def array_to_polygons(array, transform, threshold=0.7, min_area=0.01):
    """Convert 2D array to polygons using rasterio.features.shapes"""
    
    # Create binary mask
    binary_mask = (array > threshold).astype(np.uint8)
    
    # Extract shapes (polygons) from binary mask
    polygon_generator = shapes(binary_mask, transform=transform)
    
    polygons = []
    areas = []
    
    for geom, value in polygon_generator:
        if value == 1:  # Only high-value areas
            poly = Polygon(geom['coordinates'][0])
            area = poly.area
            
            # Filter by minimum area to remove noise
            if area > min_area:
                polygons.append(poly)
                areas.append(area)
    
    return polygons, areas

# Convert to polygons
polygons, areas = array_to_polygons(pattern_data, transform, threshold=0.7, min_area=0.005)

print(f"Extracted {len(polygons)} polygons from pattern")
print(f"Area statistics:")
print(f"  Total area: {sum(areas):.3f} square degrees")
print(f"  Largest polygon: {max(areas):.3f} square degrees")
print(f"  Smallest polygon: {min(areas):.3f} square degrees")
print(f"  Mean area: {np.mean(areas):.3f} square degrees")

# Alternative method using scikit-image contours
def array_to_polygons_contour(array, transform, threshold=0.7, min_area=0.01):
    """Convert array to polygons using contour detection"""
    
    # Find contours
    contours = measure.find_contours(array, threshold)
    
    polygons_contour = []
    
    for contour in contours:
        # Convert pixel coordinates to geographic coordinates
        geo_coords = []
        for point in contour:
            # Transform from array indices to geographic coordinates
            lon, lat = rasterio.transform.xy(transform, point[0], point[1])
            geo_coords.append((lon, lat))
        
        # Create polygon if we have enough points
        if len(geo_coords) >= 3:
            try:
                poly = Polygon(geo_coords)
                if poly.is_valid and poly.area > min_area:
                    polygons_contour.append(poly)
            except:
                continue  # Skip invalid polygons
    
    return polygons_contour

# Compare methods
polygons_contour = array_to_polygons_contour(pattern_data, transform, threshold=0.7)
print(f"\nComparison of extraction methods:")
print(f"  Rasterio.features method: {len(polygons)} polygons")
print(f"  Scikit-image contours:    {len(polygons_contour)} polygons")

# Use the rasterio method (typically more accurate for raster data)
selected_polygons = polygons
print(f"\nUsing rasterio.features method with {len(selected_polygons)} polygons")

## 📄 Creating Shapefile with Fiona

In [None]:
# Create shapefile using Fiona (low-level approach)
print("Creating shapefile with Fiona...")

def create_shapefile_fiona(polygons, output_path, crs='EPSG:4326'):
    """Create shapefile using Fiona"""
    
    # Define schema
    schema = {
        'geometry': 'Polygon',
        'properties': {
            'id': 'int',
            'area': 'float',
            'perimeter': 'float',
            'compactness': 'float',
            'category': 'str'
        }
    }
    
    # Create shapefile
    with fiona.open(output_path, 'w', driver='ESRI Shapefile', 
                   schema=schema, crs=crs) as shp:
        
        for i, poly in enumerate(polygons):
            # Calculate properties
            area = poly.area
            perimeter = poly.length
            compactness = 4 * np.pi * area / (perimeter**2) if perimeter > 0 else 0
            
            # Classify by size
            if area > 0.1:
                category = 'Large'
            elif area > 0.05:
                category = 'Medium'
            else:
                category = 'Small'
            
            # Create feature
            feature = {
                'geometry': {
                    'type': 'Polygon',
                    'coordinates': [list(poly.exterior.coords)]
                },
                'properties': {
                    'id': i + 1,
                    'area': round(area, 6),
                    'perimeter': round(perimeter, 6),
                    'compactness': round(compactness, 4),
                    'category': category
                }
            }
            
            shp.write(feature)
    
    return len(polygons)

# Create shapefile
shapefile_path = output_dir / 'pattern_polygons.shp'
features_written = create_shapefile_fiona(selected_polygons, shapefile_path)

print(f"Shapefile created: {shapefile_path}")
print(f"Features written: {features_written}")

# List all shapefile components
shapefile_files = list(output_dir.glob('pattern_polygons.*'))
total_size = 0
print(f"\nShapefile components:")
for file_path in sorted(shapefile_files):
    size_kb = os.path.getsize(file_path) / 1024
    total_size += size_kb
    print(f"  {file_path.name:20s} {size_kb:6.1f} KB")
print(f"  {'Total:':20s} {total_size:6.1f} KB")

# Verify shapefile by reading it back
print(f"\nVerifying shapefile:")
with fiona.open(shapefile_path) as shp:
    print(f"  Driver: {shp.driver}")
    print(f"  CRS: {shp.crs}")
    print(f"  Schema: {shp.schema}")
    print(f"  Feature count: {len(shp)}")
    print(f"  Bounds: {shp.bounds}")

## 🐼 GeoPandas Analysis & Operations

In [None]:
# Load shapefile with GeoPandas for analysis
print("Loading shapefile with GeoPandas...")

# Read the shapefile
gdf = gpd.read_file(shapefile_path)

print(f"GeoPandas GeoDataFrame loaded:")
print(f"  Shape: {gdf.shape}")
print(f"  CRS: {gdf.crs}")
print(f"  Columns: {list(gdf.columns)}")
print(f"  Geometry type: {gdf.geometry.type.iloc[0]}")

# Display first few rows
print(f"\nFirst 5 features:")
print(gdf.head())

# Basic statistics
print(f"\nSpatial statistics:")
print(f"  Total area: {gdf['area'].sum():.4f} square degrees")
print(f"  Mean area: {gdf['area'].mean():.4f} square degrees")
print(f"  Area std: {gdf['area'].std():.4f} square degrees")
print(f"  Mean compactness: {gdf['compactnes'].mean():.3f} (1.0 = perfect circle)")

# Category analysis
print(f"\nCategory distribution:")
category_counts = gdf['category'].value_counts()
for category, count in category_counts.items():

    total_area = gdf[gdf['category'] == category]['area'].sum()
    print(f"  {category:6s}: {count:3d} features, {total_area:.4f} total area")

In [None]:
# GeoPandas spatial operations
print("Performing GeoPandas spatial operations...")

# 1. Buffer operations
print("\n1. Creating buffers...")
buffer_distance = 0.1  # degrees
gdf['geometry_buffered'] = gdf.geometry.buffer(buffer_distance)

# Calculate buffer areas
gdf['buffer_area'] = gdf['geometry_buffered'].area
gdf['buffer_expansion'] = gdf['buffer_area'] / gdf['area']

print(f"  Buffer distance: {buffer_distance}°")
print(f"  Mean area expansion: {gdf['buffer_expansion'].mean():.2f}x")

# 2. Centroid calculation
print("\n2. Calculating centroids...")
gdf['centroid'] = gdf.geometry.centroid
gdf['centroid_x'] = gdf['centroid'].x
gdf['centroid_y'] = gdf['centroid'].y

print(f"  Centroid longitude range: {gdf['centroid_x'].min():.3f} to {gdf['centroid_x'].max():.3f}")
print(f"  Centroid latitude range: {gdf['centroid_y'].min():.3f} to {gdf['centroid_y'].max():.3f}")

# 3. Spatial relationships
print("\n3. Analyzing spatial relationships...")

# Find the largest polygon for spatial queries
largest_poly_idx = gdf['area'].idxmax()
largest_poly = gdf.loc[largest_poly_idx]

print(f"  Largest polygon: ID {largest_poly['id']}, area {largest_poly['area']:.4f}")

# Find polygons within buffer of largest polygon
largest_buffered = largest_poly.geometry.buffer(0.5)
within_buffer = gdf.geometry.within(largest_buffered)
nearby_polygons = gdf[within_buffer]

print(f"  Polygons within 0.5° of largest: {len(nearby_polygons)}")

# Calculate distances to largest polygon centroid
largest_centroid = largest_poly.geometry.centroid
gdf['dist_to_largest'] = gdf['centroid'].distance(largest_centroid)

print(f"  Mean distance to largest polygon: {gdf['dist_to_largest'].mean():.3f}°")
print(f"  Max distance to largest polygon: {gdf['dist_to_largest'].max():.3f}°")

# 4. Spatial joins and aggregation
print("\n4. Spatial aggregation...")

# Create a coarse grid for aggregation
grid_size = 1.0  # degrees
lon_min, lat_min, lon_max, lat_max = gdf.total_bounds

# Create grid polygons
grid_polygons = []
grid_ids = []
grid_id = 0

for lon in np.arange(lon_min, lon_max, grid_size):
    for lat in np.arange(lat_min, lat_max, grid_size):
        grid_poly = Polygon([
            (lon, lat),
            (lon + grid_size, lat),
            (lon + grid_size, lat + grid_size),
            (lon, lat + grid_size),
            (lon, lat)
        ])
        grid_polygons.append(grid_poly)
        grid_ids.append(grid_id)
        grid_id += 1

# Create grid GeoDataFrame
grid_gdf = gpd.GeoDataFrame({
    'grid_id': grid_ids,
    'geometry': grid_polygons
}, crs=gdf.crs)

# Spatial join: assign each polygon to a grid cell
joined = gpd.sjoin(gdf, grid_gdf, how='left', predicate='within')

# Aggregate by grid cell
grid_stats = joined.groupby('grid_id').agg({
    'area': ['count', 'sum', 'mean'],
    'compactnes': 'mean'
}).round(4)

print(f"  Created {len(grid_gdf)} grid cells of {grid_size}° × {grid_size}°")
print(f"  Grid cells with polygons: {len(grid_stats)}")
print(f"  Max polygons per cell: {grid_stats[('area', 'count')].max()}")

In [None]:
# Advanced GeoPandas operations
print("Advanced GeoPandas operations...")

# 5. Dissolve operation (union polygons by category)
print("\n5. Dissolving polygons by category...")
dissolved = gdf.dissolve(by='category', aggfunc={
    'area': 'sum',
    'perimeter': 'sum',
    'id': 'count'
})

dissolved = dissolved.rename(columns={'id': 'polygon_count'})
print("Dissolved polygons by category:")
print(dissolved[['area', 'perimeter', 'polygon_count']])

# 6. Overlay operations
print("\n6. Overlay operations...")

# Create a circular area of interest
center_point = Point(-7.5, 42.5)
aoi_circle = center_point.buffer(1.0)  # 1 degree radius
aoi_gdf = gpd.GeoDataFrame([1], geometry=[aoi_circle], crs=gdf.crs, columns=['aoi_id'])

# Intersection with area of interest
intersection = gpd.overlay(gdf, aoi_gdf, how='intersection')
print(f"  Original polygons: {len(gdf)}")
print(f"  Polygons intersecting AOI: {len(intersection)}")
print(f"  Total intersection area: {intersection.geometry.area.sum():.4f}°²")

# 7. Convex hull operations
print("\n7. Convex hulls...")
gdf['convex_hull'] = gdf.geometry.convex_hull
gdf['convex_area'] = gdf['convex_hull'].area
gdf['convexity'] = gdf['area'] / gdf['convex_area']

print(f"  Mean convexity: {gdf['convexity'].mean():.3f} (1.0 = already convex)")
print(f"  Most complex polygon convexity: {gdf['convexity'].min():.3f}")

# 8. Nearest neighbor analysis
print("\n8. Nearest neighbor distances...")
from shapely.ops import nearest_points

# Calculate distance to nearest neighbor for each polygon
nearest_distances = []
for i, poly in gdf.iterrows():
    distances = []
    for j, other_poly in gdf.iterrows():
        if i != j:
            distances.append(poly.geometry.distance(other_poly.geometry))
    nearest_distances.append(min(distances) if distances else np.nan)

gdf['nearest_neighbor_dist'] = nearest_distances

print(f"  Mean nearest neighbor distance: {gdf['nearest_neighbor_dist'].mean():.4f}°")
print(f"  Min nearest neighbor distance: {gdf['nearest_neighbor_dist'].min():.4f}°")
print(f"  Max nearest neighbor distance: {gdf['nearest_neighbor_dist'].max():.4f}°")

# 9. Export enhanced dataset
print("\n9. Exporting enhanced dataset...")
enhanced_cols = ['id', 'area', 'perimeter', 'compactnes', 'category', 
                'centroid_x', 'centroid_y', 'dist_to_largest', 'convexity', 
                'nearest_neighbor_dist', 'geometry']

enhanced_gdf = gdf[enhanced_cols].copy()
enhanced_shapefile = output_dir / 'pattern_polygons_enhanced.shp'
enhanced_gdf.to_file(enhanced_shapefile)

print(f"  Enhanced shapefile saved: {enhanced_shapefile}")
print(f"  Columns: {len(enhanced_cols)}")
print(f"  Features: {len(enhanced_gdf)}")

## 📊 Visualization & Mapping

In [None]:
# Comprehensive visualization of results
fig, axes = plt.subplots(2, 3, figsize=(20, 14))

# 1. Original pattern with extracted polygons
axes[0,0].imshow(pattern_data, cmap='viridis', extent=bounds, aspect='equal', alpha=0.7)
gdf.plot(ax=axes[0,0], facecolor='none', edgecolor='red', linewidth=1)
axes[0,0].set_title('Original Pattern + Extracted Polygons')
axes[0,0].set_xlabel('Longitude (°)')
axes[0,0].set_ylabel('Latitude (°)')

# 2. Polygons colored by category
gdf.plot(column='category', ax=axes[0,1], cmap='Set1', legend=True)
axes[0,1].set_title('Polygons by Size Category')
axes[0,1].set_xlabel('Longitude (°)')
axes[0,1].set_ylabel('Latitude (°)')

# 3. Polygons colored by area
gdf.plot(column='area', ax=axes[0,2], cmap='plasma', legend=True)
axes[0,2].set_title('Polygons by Area')
axes[0,2].set_xlabel('Longitude (°)')
axes[0,2].set_ylabel('Latitude (°)')

# 4. Buffered polygons
gdf.set_geometry('geometry_buffered').plot(ax=axes[1,0], alpha=0.5, color='lightblue')
gdf.plot(ax=axes[1,0], color='darkblue', alpha=0.8)
axes[1,0].set_title('Original + Buffered Polygons')
axes[1,0].set_xlabel('Longitude (°)')
axes[1,0].set_ylabel('Latitude (°)')

# 5. Centroids with distance to largest
gdf.plot(ax=axes[1,1], color='lightgray', alpha=0.5)
gdf.set_geometry('centroid').plot(column='dist_to_largest', ax=axes[1,1], 
                                 cmap='coolwarm', legend=True, markersize=50)
# Highlight largest polygon
gdf.loc[[largest_poly_idx]].plot(ax=axes[1,1], color='red', edgecolor='black', linewidth=2)
axes[1,1].set_title('Distance to Largest Polygon')
axes[1,1].set_xlabel('Longitude (°)')
axes[1,1].set_ylabel('Latitude (°)')

# 6. Dissolved polygons by category
dissolved.plot(column='polygon_count', ax=axes[1,2], cmap='viridis', 
              legend=True, alpha=0.8, edgecolor='black')
axes[1,2].set_title('Dissolved Polygons by Category')
axes[1,2].set_xlabel('Longitude (°)')
axes[1,2].set_ylabel('Latitude (°)')

plt.tight_layout()
plt.show()

# Additional analysis plots
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Area distribution histogram
axes[0,0].hist(gdf['area'], bins=20, alpha=0.7, color='skyblue', edgecolor='black')
axes[0,0].set_xlabel('Area (square degrees)')
axes[0,0].set_ylabel('Frequency')
axes[0,0].set_title('Polygon Area Distribution')
axes[0,0].grid(True, alpha=0.3)

# Compactness vs Area scatter
scatter = axes[0,1].scatter(gdf['area'], gdf['compactnes'], 
                           c=gdf['perimeter'], cmap='plasma', alpha=0.7)
axes[0,1].set_xlabel('Area (square degrees)')
axes[0,1].set_ylabel('Compactness')
axes[0,1].set_title('Area vs Compactness (colored by perimeter)')
plt.colorbar(scatter, ax=axes[0,1], label='Perimeter')
axes[0,1].grid(True, alpha=0.3)

# Nearest neighbor distances
axes[1,0].hist(gdf['nearest_neighbor_dist'], bins=15, alpha=0.7, 
              color='orange', edgecolor='black')
axes[1,0].set_xlabel('Nearest Neighbor Distance (degrees)')
axes[1,0].set_ylabel('Frequency')
axes[1,0].set_title('Nearest Neighbor Distance Distribution')
axes[1,0].grid(True, alpha=0.3)

# Category statistics
category_areas = [gdf[gdf['category'] == cat]['area'].values for cat in gdf['category'].unique()]
axes[1,1].boxplot(category_areas, labels=gdf['category'].unique())
axes[1,1].set_ylabel('Area (square degrees)')
axes[1,1].set_title('Area Distribution by Category')
axes[1,1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 🔄 Format Conversions & Export

In [None]:
# Export to different vector formats
print("Exporting to different vector formats...")

export_formats = {
    'GeoJSON': {'driver': 'GeoJSON', 'ext': '.geojson'},
    'KML': {'driver': 'KML', 'ext': '.kml'},
    'GeoPackage': {'driver': 'GPKG', 'ext': '.gpkg'},
    'PostGIS_SQL': {'driver': 'PostgreSQL', 'ext': '.sql'}  # SQL dump
}

export_results = {}

# Prepare simplified dataset for export
export_gdf = gdf[['id', 'area', 'perimeter', 'compactnes', 'category', 'geometry']].copy()

for format_name, format_config in export_formats.items():
    try:
        output_path = output_dir / f'pattern_polygons{format_config["ext"]}'
        
        if format_name == 'PostGIS_SQL':
            # Special handling for SQL export
            continue  # Skip for now (requires PostGIS setup)
        
        export_gdf.to_file(output_path, driver=format_config['driver'])
        file_size = os.path.getsize(output_path) / 1024
        
        export_results[format_name] = {
            'path': output_path,
            'size_kb': file_size,
            'driver': format_config['driver']
        }
        
        print(f"{format_name:12s}: {file_size:7.1f} KB - {output_path.name}")
        
    except Exception as e:
        print(f"{format_name:12s}: FAILED - {e}")

# Create summary statistics export
print("\nExporting summary statistics...")

# Summary table
summary_stats = {
    'total_polygons': len(gdf),
    'total_area': gdf['area'].sum(),
    'mean_area': gdf['area'].mean(),
    'largest_polygon_area': gdf['area'].max(),
    'smallest_polygon_area': gdf['area'].min(),
    'mean_compactness': gdf['compactnes'].mean(),
    'category_distribution': dict(gdf['category'].value_counts()),
    'mean_nearest_neighbor_dist': gdf['nearest_neighbor_dist'].mean(),
    'spatial_extent': {
        'lon_min': float(gdf.bounds['minx'].min()),
        'lon_max': float(gdf.bounds['maxx'].max()),
        'lat_min': float(gdf.bounds['miny'].min()),
        'lat_max': float(gdf.bounds['maxy'].max())
    }
}

# Export to JSON
import json
with open(output_dir / 'polygon_analysis_summary.json', 'w') as f:
    json.dump(summary_stats, f, indent=2, default=str)

# Export detailed statistics to CSV
detailed_stats = gdf[['id', 'area', 'perimeter', 'compactnes', 'category', 
                     'centroid_x', 'centroid_y', 'dist_to_largest', 'convexity',
                     'nearest_neighbor_dist']].copy()
detailed_stats.to_csv(output_dir / 'polygon_detailed_stats.csv', index=False)

print(f"  Summary JSON: polygon_analysis_summary.json")
print(f"  Detailed CSV: polygon_detailed_stats.csv")

# Format comparison
if export_results:
    print(f"\nFormat size comparison:")
    sorted_formats = sorted(export_results.items(), key=lambda x: x[1]['size_kb'])
    smallest_size = sorted_formats[0][1]['size_kb']
    
    for format_name, info in sorted_formats:
        ratio = info['size_kb'] / smallest_size
        print(f"  {format_name:12s}: {info['size_kb']:7.1f} KB ({ratio:4.1f}x)")

## 📊 Final Summary

In [None]:
# Comprehensive workflow summary
print("\n" + "="*70)
print("            VECTOR GEOSPATIAL WORKFLOW SUMMARY")
print("="*70)

# File inventory
print(f"\n📁 Generated Files:")
all_files = list(output_dir.glob('*'))
total_size = 0

file_categories = {
    'Shapefiles': [],
    'Other Formats': [],
    'Analysis': []
}

for file_path in sorted(all_files):
    if file_path.is_file():
        size_kb = os.path.getsize(file_path) / 1024
        total_size += size_kb
        
        if file_path.suffix in ['.shp', '.shx', '.dbf', '.prj', '.cpg']:
            file_categories['Shapefiles'].append((file_path.name, size_kb))
        elif file_path.suffix in ['.geojson', '.kml', '.gpkg']:
            file_categories['Other Formats'].append((file_path.name, size_kb))
        else:
            file_categories['Analysis'].append((file_path.name, size_kb))

for category, files in file_categories.items():
    if files:
        print(f"\n  {category}:")
        for name, size in files:
            print(f"    {name:35s} {size:6.1f} KB")

print(f"\n  {'Total:':37s} {total_size:6.1f} KB")

# Workflow steps
print(f"\n⚡ Workflow Steps Completed:")
print(f"  ✅ Created 2D pattern array ({pattern_data.shape[1]}×{pattern_data.shape[0]} pixels)")
print(f"  ✅ Applied threshold (>{0.7}) and extracted {len(selected_polygons)} polygons")
print(f"  ✅ Created shapefile with Fiona (low-level vector I/O)")
print(f"  ✅ Loaded data with GeoPandas for spatial analysis")
print(f"  ✅ Performed {8} different spatial operations")
print(f"  ✅ Exported to {len(export_results)} different vector formats")
print(f"  ✅ Generated comprehensive visualizations and statistics")

# Key transformations
print(f"\n🔄 Key Transformations:")
print(f"  Raster → Vector: {pattern_data.size:,} pixels → {len(selected_polygons)} polygons")
print(f"  Threshold applied: Values > {0.7} ({high_value_pixels/total_pixels*100:.1f}% of pixels)")
print(f"  Spatial analysis: {len(enhanced_cols)} attributes per polygon")
print(f"  Format diversity: Shapefile, GeoJSON, KML, GeoPackage")

# Spatial statistics
print(f"\n📊 Spatial Analysis Results:")
print(f"  Total polygon area: {gdf['area'].sum():.4f} square degrees")
print(f"  Largest polygon: {gdf['area'].max():.4f} square degrees")
print(f"  Mean compactness: {gdf['compactnes'].mean():.3f} (circle = 1.0)")
print(f"  Category breakdown: {dict(gdf['category'].value_counts())}")
print(f"  Mean nearest neighbor: {gdf['nearest_neighbor_dist'].mean():.4f}°")
print(f"  Spatial extent: {gdf.total_bounds}")

# Library capabilities demonstrated
print(f"\n🚀 Library Capabilities Demonstrated:")
print(f"\n  Fiona (Low-level I/O):")
print(f"    • Direct shapefile creation with custom schema")
print(f"    • Metadata and attribute handling")
print(f"    • Format validation and verification")

print(f"\n  Shapely (Geometry Operations):")
print(f"    • Polygon creation from coordinates")
print(f"    • Buffer operations and geometric calculations")
print(f"    • Convex hulls and spatial relationships")
print(f"    • Distance calculations and validity checks")

print(f"\n  GeoPandas (High-level Analysis):")
print(f"    • Spatial joins and overlay operations")
print(f"    • Dissolve and aggregation by attributes")
print(f"    • Centroid calculation and coordinate extraction")
print(f"    • Multi-format export capabilities")
print(f"    • Statistical analysis and visualization")

print(f"\n🎯 Practical Applications:")
print(f"  • Satellite imagery segmentation → vector features")
print(f"  • Environmental monitoring (water bodies, deforestation)")
print(f"  • Urban planning (building footprints, land use)")
print(f"  • Ecological studies (habitat mapping, species distribution)")
print(f"  • Climate analysis (precipitation zones, temperature regions)")

print("\n" + "="*70)

## 🧹 Cleanup

In [None]:
# Optional cleanup
cleanup_files = input("Remove all generated files? (y/N): ").lower().startswith('y')

if cleanup_files:
    import shutil
    if output_dir.exists():
        shutil.rmtree(output_dir)
        print("✅ All output files removed")
else:
    print(f"💾 Files preserved in '{output_dir}/' directory")
    print(f"   Total: {len(all_files)} files, {total_size:.1f} KB")
    print(f"   Key files: pattern_polygons.shp, pattern_polygons_enhanced.shp")
    # print(f"   Formats: {', '.join([f'{k} ({v['ext']})' for k, v in export_formats.items() if k in export_results])}")

print(f"\n🎉 Vector geospatial tutorial completed!")

## 📋 Quick Reference

In [None]:
from IPython.display import HTML, display
import html

SECTIONS = [
  ("Fiona — Low-level I/O", r"""import fiona
# Write Shapefile
with fiona.open('file.shp', 'w', driver='ESRI Shapefile',
                schema=schema, crs='EPSG:4326') as shp:
    shp.write({'geometry': geom_dict, 'properties': props})

# Read features
with fiona.open('file.shp') as shp:
    for feature in shp:
        geom  = feature['geometry']
        props = feature['properties']"""),

  ("Shapely — Geometry Ops", r"""from shapely.geometry import Point, Polygon, MultiPolygon
poly  = Polygon([(x1, y1), (x2, y2), ...])
buff  = poly.buffer(distance)
area  = poly.area
cent  = poly.centroid
inter = poly1.intersection(poly2)
inside = poly1.within(poly2)"""),

  ("GeoPandas — High-level Analysis", r"""import geopandas as gpd
gdf = gpd.read_file('file.shp')     # Read
gdf.to_file('output.geojson')       # Write

gdf.plot(column='attr', cmap='viridis')  # Quick viz
buff = gdf.buffer(100)                   # Buffer all
cent = gdf.centroid                      # Centroids
diss = gdf.dissolve(by='attr')           # Dissolve

# Spatial ops
joined  = gpd.sjoin(gdf1, gdf2)                 # Spatial join
overlay = gpd.overlay(gdf1, gdf2, how='intersection')"""),

  ("Raster → Vector (polygonize)", r"""from rasterio.features import shapes
from rasterio.transform import from_bounds
from shapely.geometry import Polygon

transform = from_bounds(*bounds, width, height)
for geom, value in shapes(binary_array, transform=transform):
    if value == 1:
        polygon = Polygon(geom['coordinates'][0])"""),

  ("Common Spatial Analysis", r"""gdf['area']     = gdf.geometry.area
gdf['length']   = gdf.geometry.length
gdf['centroid'] = gdf.geometry.centroid
gdf['bounds']   = gdf.bounds               # bbox per row
dist_series     = gdf.distance(point)      # distance to point
within_mask     = gdf.within(polygon)      # point-in-polygon"""),

  ("Export Formats", r"""gdf.to_file('output.shp')                        # Shapefile
gdf.to_file('output.geojson', driver='GeoJSON')
gdf.to_file('output.kml',     driver='KML')
gdf.to_file('output.gpkg',    driver='GPKG')"""),
]

BEST_PRACTICES = [
  "Pick the right CRS for measurement vs web maps (project to meters for areas/lengths).",
  "Validate & fix geometries (e.g., .buffer(0) or shapely.make_valid) before overlays.",
  "Handle MultiPolygons / MultiLines explicitly when summarizing.",
  "Choose formats wisely (GeoPackage for rich, Shapefile for legacy, GeoJSON for web).",
  "Preserve metadata and set driver/CRS on write.",
]

def section_html(title, code):
    esc = html.escape(code)
    return f"""
    <section class="vsec">
      <div class="vsec-head">
        <h3>🧭 {html.escape(title)}</h3>
        <button class="copy" onclick="navigator.clipboard.writeText(this.parentElement.nextElementSibling.innerText)">Copy</button>
      </div>
      <pre><code>{esc}</code></pre>
    </section>
    """

best_list = "".join(f"<li>• {html.escape(x)}</li>" for x in BEST_PRACTICES)

html_block = f"""
<style>
:root {{
  --bg: #0b132b;      /* deep night */
  --panel: #0f1f3a;   /* marine slate */
  --accent: #22d3ee;  /* cyan */
  --accent2: #38bdf8; /* sky */
  --text: #e6f1ff;
  --muted: #9fb3c8;
  --code-bg: #0a1a33;
}}
.v-wrap {{
  font-family: Inter, ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial, sans-serif;
  color: var(--text);
  max-width: 980px;
  margin: 10px 0 24px 0;
}}
.v-card {{
  border-radius: 16px; overflow: hidden;
  box-shadow: 0 10px 30px rgba(0,0,0,.25);
  border: 1px solid rgba(255,255,255,.06);
}}
.v-hero {{
  padding: 18px 20px;
  background:
    linear-gradient(135deg, rgba(34,211,238,.18), rgba(56,189,248,.12)),
    radial-gradient(1000px 400px at 0% 0%, rgba(34,211,238,.18), transparent 60%),
    radial-gradient(800px 400px at 100% 0%, rgba(56,189,248,.18), transparent 60%),
    var(--bg);
  border-bottom: 1px solid rgba(255,255,255,.07);
  display:flex; align-items:center; justify-content:space-between; gap:10px;
}}
.v-title {{ margin:0; font-size:22px; letter-spacing:.2px; }}
.badge {{
  font-size:12px; color:#052;
  background: linear-gradient(90deg, #22d3ee 0%, #38bdf8 100%);
  -webkit-background-clip: text; background-clip: text; color: transparent;
  font-weight:700;
}}
.actions {{ display:flex; gap:8px; }}
.btn {{
  cursor:pointer; border:1px solid rgba(255,255,255,.18);
  background: rgba(255,255,255,.06); color: var(--text);
  padding:6px 10px; border-radius:10px; font-size:12px;
}}
.btn:hover {{ background: rgba(255,255,255,.12); }}

.v-body {{ background: var(--panel); padding: 16px 18px; }}
.grid {{
  display:grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap:14px;
}}
.vsec {{ background: var(--code-bg); border:1px solid rgba(255,255,255,.06); border-radius:12px; overflow:hidden; }}
.vsec-head {{ display:flex; align-items:center; justify-content:space-between; padding:10px 12px; background: rgba(255,255,255,.03); }}
.vsec h3 {{ margin:0; font-size:14px; color: var(--accent2); letter-spacing:.3px; }}
.copy {{ all: unset; cursor:pointer; padding:4px 8px; border-radius:8px; border:1px solid rgba(255,255,255,.15); font-size:12px; color: var(--text); }}
.copy:hover {{ background: rgba(255,255,255,.10); }}
pre {{
  margin:0; padding:12px; color:#e6f1ff; line-height:1.35; font-size:12.8px;
  overflow:auto; white-space:pre; tab-size:2;
}}
code {{ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, "Liberation Mono", monospace; }}
.note {{ margin-top: 14px; color: var(--muted); font-size:13px; }}
.kicker {{
  margin-top: 14px; padding:10px 12px; background: rgba(56,189,248,.08);
  border:1px solid rgba(56,189,248,.25); border-radius:12px; font-size:13px;
}}
ul.best {{ margin:10px 0 0 0; padding-left: 18px; color: var(--text); }}
</style>

<div class="v-wrap">
  <div class="v-card">
    <div class="v-hero">
      <h2 class="v-title">📍 Vector Geospatial Quick Reference
        <span class="badge">Fiona • Shapely • GeoPandas</span>
      </h2>
      <div class="actions">
        <button class="btn" onclick="(async()=>{{await navigator.clipboard.writeText(document.querySelector('#vector-cheat').innerText)}})()">Copy All</button>
      </div>
    </div>

    <div class="v-body" id="vector-cheat">
      <div class="grid">
        {''.join(section_html(t,c) for t,c in SECTIONS)}
      </div>

      <div class="kicker">
        <b>Best Practices</b>
        <ul class="best">
          {best_list}
        </ul>
      </div>

      <div class="note">Master the vector stack: read/write with <b>Fiona</b>, compute with <b>Shapely</b>, analyze & plot with <b>GeoPandas</b>. Don’t forget CRS & geometry validity. 🗺️</div>
    </div>
  </div>
</div>
"""

display(HTML(html_block))
