# Moran's I Spatial Autocorrelation Analysis

**Sprint 8 - Phase 2**: Advanced Geospatial Analysis

## Objectives
1. Calculate Global Moran's I for spatial autocorrelation
2. Calculate Local Moran's I (LISA) for cluster detection
3. Identify spatial cluster types (HH, LL, LH, HL)
4. Visualize LISA clusters on interactive map

In [None]:
import json
from pathlib import Path
import pandas as pd
import numpy as np
import geopandas as gpd
from libpysal.weights import KNN
from esda.moran import Moran, Moran_Local
import matplotlib.pyplot as plt
import seaborn as sns
import folium

DATA_DIR = Path('../../data')
FIG_DIR = Path('figures')
MAP_DIR = Path('maps')
print('✅ Imports complete')

In [None]:
# Load data and project
gdf = gpd.read_parquet(DATA_DIR / 'geospatial_events.parquet')
gdf_proj = gdf.to_crs('EPSG:5070')
print(f'✅ Loaded {len(gdf):,} events')

# Create spatial weights
w = KNN.from_dataframe(gdf_proj, k=8)
w.transform = 'r'
print('✅ Spatial weights created')

## Global Moran's I

In [None]:
# Calculate Global Moran's I
moran_global = Moran(gdf['inj_tot_f'].values, w, permutations=999)

print('\n=== Global Moran\'s I ===')
print(f"Moran's I: {moran_global.I:.4f}")
print(f"Expected I: {moran_global.EI:.4f}")
print(f"Variance: {moran_global.VI_norm:.6f}")
print(f"Z-score: {moran_global.z_norm:.4f}")
print(f"P-value: {moran_global.p_norm:.4f}")
print(f"\nInterpretation: {'Positive' if moran_global.I > 0 else 'Negative'} spatial autocorrelation")
print(f"Significance: {'Significant' if moran_global.p_norm < 0.05 else 'Not Significant'} at α=0.05")

## Local Moran's I (LISA)

In [None]:
# Calculate LISA
lisa = Moran_Local(gdf['inj_tot_f'].values, w, permutations=999)

# Add to GeoDataFrame
gdf['lisa_I'] = lisa.Is
gdf['lisa_q'] = lisa.q
gdf['lisa_p'] = lisa.p_sim
gdf['lisa_significant'] = gdf['lisa_p'] < 0.05

# Classify cluster types
cluster_names = {1: 'HH (High-High)', 2: 'LH (Low-High)', 3: 'LL (Low-Low)', 4: 'HL (High-Low)', 0: 'Not Significant'}
gdf['lisa_cluster'] = gdf.apply(
    lambda row: cluster_names[row['lisa_q']] if row['lisa_significant'] else 'Not Significant', 
    axis=1
)

# Count clusters
lisa_counts = gdf['lisa_cluster'].value_counts()
print('\n=== LISA Cluster Counts ===')
print(lisa_counts)

## Visualizations

In [None]:
# Moran scatterplot
from splot.esda import moran_scatterplot
fig, ax = plt.subplots(figsize=(10, 8))
moran_scatterplot(moran_global, ax=ax)
ax.set_title(f"Moran's I Scatterplot (I={moran_global.I:.4f})", fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(FIG_DIR / 'morans_i_scatterplot.png', dpi=150, bbox_inches='tight')
plt.show()
print('✅ Saved: morans_i_scatterplot.png')

In [None]:
# LISA cluster distribution
fig, ax = plt.subplots(figsize=(10, 6))
lisa_counts.plot(kind='barh', ax=ax, color=['red', 'orange', 'blue', 'purple', 'gray'], edgecolor='black')
ax.set_title('LISA Cluster Type Distribution', fontsize=14, fontweight='bold')
ax.set_xlabel('Count', fontsize=12)
ax.set_ylabel('Cluster Type', fontsize=12)
ax.invert_yaxis()
for i, v in enumerate(lisa_counts.values):
    ax.text(v, i, f' {v:,}', va='center')
plt.tight_layout()
plt.savefig(FIG_DIR / 'lisa_cluster_distribution.png', dpi=150, bbox_inches='tight')
plt.show()
print('✅ Saved: lisa_cluster_distribution.png')

## Interactive LISA Map

In [None]:
# Create map
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)

# Color mapping
color_map = {
    'HH (High-High)': 'red',
    'LL (Low-Low)': 'blue',
    'LH (Low-High)': 'pink',
    'HL (High-Low)': 'lightblue',
    'Not Significant': 'gray'
}

# Add significant clusters only
significant = gdf[gdf['lisa_significant']].copy()
if len(significant) > 5000:
    significant = significant.sample(5000, random_state=42)

for idx, row in significant.iterrows():
    folium.CircleMarker(
        location=[row['dec_latitude'], row['dec_longitude']],
        radius=4,
        color=color_map[row['lisa_cluster']],
        fill=True,
        fillColor=color_map[row['lisa_cluster']],
        fillOpacity=0.7,
        popup=f"""<b>{row['lisa_cluster']}</b><br>
                  Event: {row['ev_id']}<br>
                  Fatalities: {row['inj_tot_f']}<br>
                  LISA I: {row['lisa_I']:.4f}<br>
                  P-value: {row['lisa_p']:.4f}"""
    ).add_to(m)

# Legend
legend_html = f'''<div style="position: fixed; bottom: 50px; right: 50px; width: 250px; 
                background-color: white; border:2px solid grey; z-index:9999; 
                font-size:14px; padding: 10px">
                <p><b>LISA Clusters</b></p>
                <p style="color:red">● HH (High-High): {lisa_counts.get("HH (High-High)", 0):,}</p>
                <p style="color:blue">● LL (Low-Low): {lisa_counts.get("LL (Low-Low)", 0):,}</p>
                <p style="color:pink">● LH (Low-High): {lisa_counts.get("LH (Low-High)", 0):,}</p>
                <p style="color:lightblue">● HL (High-Low): {lisa_counts.get("HL (High-Low)", 0):,}</p>
                </div>'''
m.get_root().html.add_child(folium.Element(legend_html))

m.save(str(MAP_DIR / 'lisa_clusters.html'))
print('✅ Saved: maps/lisa_clusters.html')
m

## Save Results

In [None]:
# Save GeoJSON
gdf_output = gdf[['ev_id', 'ev_state', 'dec_latitude', 'dec_longitude', 'inj_tot_f',
                  'lisa_I', 'lisa_q', 'lisa_p', 'lisa_cluster', 'geometry']].copy()
gdf_output.to_file(DATA_DIR / 'lisa_clusters.geojson', driver='GeoJSON')
print('✅ Saved: lisa_clusters.geojson')

# Save statistics
stats = {
    'global_morans_i': {
        'I': float(moran_global.I),
        'expected_I': float(moran_global.EI),
        'z_score': float(moran_global.z_norm),
        'p_value': float(moran_global.p_norm),
        'interpretation': 'Positive spatial autocorrelation' if moran_global.I > 0 else 'Negative spatial autocorrelation',
        'significant': bool(moran_global.p_norm < 0.05)
    },
    'lisa_clusters': lisa_counts.to_dict()
}

with open(DATA_DIR / 'morans_i_results.json', 'w') as f:
    json.dump(stats, f, indent=2)
print('✅ Saved: morans_i_results.json')
print('\n✅ Moran\'s I Analysis Complete')