# PMU Disturbance Analysis - Spatial & Network Analysis

Geographic clustering, spatial statistics, and network topology analysis.

In [None]:
import sys
sys.path.append('..')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import folium
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

from src import spatial, visualizations as viz
import config

sns.set_style(config.PLOT_SETTINGS['style'])
print("Libraries loaded!")

## 1. Load Data & Validate Coordinates

In [None]:
merged_df = pd.read_parquet(config.CLEANED_DATA)
pmu_df = pd.read_csv(Path(config.OUTPUT_DIR) / 'data' / 'pmu_data.csv')

# Identify coordinate columns
lat_col = [c for c in pmu_df.columns if 'lat' in c.lower()]
lon_col = [c for c in pmu_df.columns if 'lon' in c.lower()]
lat_col = lat_col[0] if lat_col else 'Latitude'
lon_col = lon_col[0] if lon_col else 'Longitude'

# Validate coordinates
validation = spatial.validate_coordinates(pmu_df, lat_col, lon_col)
print("Coordinate Validation:")
for key, value in validation.items():
    print(f"  {key}: {value}")

## 2. Geographic Clustering (DBSCAN)

In [None]:
# Perform DBSCAN clustering
pmu_clustered = spatial.perform_dbscan_clustering(
    pmu_df, lat_col, lon_col,
    eps=config.DBSCAN_EPS,
    min_samples=config.DBSCAN_MIN_SAMPLES
)

print(f"Clusters found: {pmu_clustered['Cluster'].nunique()}")
print(pmu_clustered['Cluster'].value_counts())

## 3. Interactive Map with Folium

In [None]:
# Create interactive map
if validation['valid_coordinates'] > 0:
    valid_data = pmu_clustered[[lat_col, lon_col, 'Cluster']].dropna()
    center_lat = valid_data[lat_col].mean()
    center_lon = valid_data[lon_col].mean()
    
    m = folium.Map(location=[center_lat, center_lon], zoom_start=10)
    
    # Add markers
    colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen']
    for idx, row in valid_data.iterrows():
        cluster = int(row['Cluster']) if row['Cluster'] >= 0 else -1
        color = colors[cluster % len(colors)] if cluster >= 0 else 'gray'
        folium.CircleMarker(
            location=[row[lat_col], row[lon_col]],
            radius=5,
            popup=f"Cluster: {cluster}",
            color=color,
            fill=True
        ).add_to(m)
    
    map_path = Path(config.FIGURE_DIR) / 'interactive' / '04_01_pmu_locations_map.html'
    m.save(str(map_path))
    print(f"Map saved to: {map_path}")
    display(m)
else:
    print("No valid coordinates for mapping")

## 4. Spatial Autocorrelation (Moran's I)

In [None]:
# Calculate disturbance counts per section
section_counts = merged_df.groupby('SectionID').size().reset_index(name='DisturbanceCount')
pmu_with_counts = pmu_df.merge(section_counts, on='SectionID', how='left')
pmu_with_counts['DisturbanceCount'] = pmu_with_counts['DisturbanceCount'].fillna(0)

# Calculate Moran's I
morans_result = spatial.calculate_morans_i(
    pmu_with_counts,
    value_col='DisturbanceCount',
    lat_col=lat_col,
    lon_col=lon_col,
    threshold_distance=1.0
)

print("Moran's I Spatial Autocorrelation Test:")
if 'error' not in morans_result:
    for key, value in morans_result.items():
        print(f"  {key}: {value}")
else:
    print(f"  Error: {morans_result['error']}")

## 5. Network Analysis

In [None]:
# Build proximity network
G = spatial.build_proximity_network(
    pmu_df, lat_col, lon_col,
    id_col='SectionID',
    threshold_distance=1.0
)

print(f"Network Statistics:")
print(f"  Nodes: {G.number_of_nodes()}")
print(f"  Edges: {G.number_of_edges()}")
print(f"  Density: {2 * G.number_of_edges() / (G.number_of_nodes() * (G.number_of_nodes() - 1)):.4f}")

# Calculate centrality
centrality = spatial.calculate_network_centrality(G)
print("\nTop 10 nodes by betweenness centrality:")
display(centrality.head(10))

## 6. Save Results

In [None]:
# Save spatial results
spatial_results = pd.DataFrame({
    'Metric': ['Num_Clusters', 'Valid_Coordinates', 'Morans_I', 'Network_Nodes', 'Network_Edges'],
    'Value': [
        pmu_clustered['Cluster'].nunique(),
        validation['valid_coordinates'],
        morans_result.get('Morans_I', np.nan),
        G.number_of_nodes(),
        G.number_of_edges()
    ]
})

spatial_results.to_csv(config.SPATIAL_RESULTS, index=False)
print(f"Spatial results saved to: {config.SPATIAL_RESULTS}")

## Summary

Completed spatial and network analysis.

**Next**: Notebook 05 (Predictive Modeling)