# PyEhsa Demo - São Paulo Grid Analysis


In [None]:
import sys
import os
sys.path.append(os.path.join(os.path.dirname(''), '..', 'src'))

import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from datetime import datetime, timedelta

from pyehsa.emerging_hotspot_analysis import EmergingHotspotAnalysis
from pyehsa.ehsa_plotting import EhsaPlotting

np.random.seed(42)
print("Libraries imported successfully")


In [None]:
# Create synthetic data - 5x5 grid in central São Paulo
center_lat, center_lon = -23.5489, -46.6388
step = 0.01

data = []
for i in range(5):
    for j in range(5):
        lat = center_lat + (i - 2) * step
        lon = center_lon + (j - 2) * step
        location_id = f'SP_{i}_{j}'
        
        for month in range(6):
            time_period = datetime(2024, 1, 1) + timedelta(days=30*month)
            
            value = np.random.poisson(10)
            
            # Emerging hotspot in upper right corner
            if i >= 3 and j >= 3 and month >= 2:
                value += (month - 1) * 8
            
            value += np.random.normal(0, 2)
            value = max(0, value)
            
            data.append({
                'location_id': location_id,
                'time_period': time_period,
                'value': value,
                'geometry': Point(lon, lat)
            })

gdf = gpd.GeoDataFrame(data, geometry='geometry', crs='EPSG:4326')
print(f"Dataset: {len(gdf)} observations, {gdf['location_id'].nunique()} locations")


Dataset: 150 observações, 25 locais


In [11]:
gdf.head()

Unnamed: 0,location_id,time_period,value,geometry
0,SP_0_0,2024-01-01,11.531693,POINT (-46.6588 -23.5689)
1,SP_0_0,2024-01-31,10.531726,POINT (-46.6588 -23.5689)
2,SP_0_0,2024-03-01,13.073165,POINT (-46.6588 -23.5689)
3,SP_0_0,2024-03-31,7.06854,POINT (-46.6588 -23.5689)
4,SP_0_0,2024-04-30,6.974338,POINT (-46.6588 -23.5689)


In [None]:
# Run EHSA analysis
results = EmergingHotspotAnalysis.emerging_hotspot_analysis(
    gdf,
    region_id_field='location_id',
    time_period_field='time_period', 
    value='value',
    k=1,
    nsim=99
)


2025-10-09 10:09:09 - INFO - 🚀 Starting Emerging Hotspot Analysis
2025-10-09 10:09:09 - INFO - 📊 Input DataFrame shape: (150, 4)
2025-10-09 10:09:09 - INFO - 🎯 Analysis parameters:
2025-10-09 10:09:09 - INFO -    - Region ID field: location_id
2025-10-09 10:09:09 - INFO -    - Time period field: time_period
2025-10-09 10:09:09 - INFO -    - Value field: value
2025-10-09 10:09:09 - INFO -    - Random seed: 77
2025-10-09 10:09:09 - INFO -    - Time lags (k): 1
2025-10-09 10:09:09 - INFO -    - Simulations (nsim): 99
2025-10-09 10:09:09 - INFO - 📈 Data overview:
2025-10-09 10:09:09 - INFO -    - Total rows: 150
2025-10-09 10:09:09 - INFO -    - Unique regions: 25
2025-10-09 10:09:09 - INFO -    - Unique time periods: 6
2025-10-09 10:09:09 - INFO -    - Value range: [0.043, 43.752]
2025-10-09 10:09:09 - INFO -    - Missing values in value: 0
2025-10-09 10:09:09 - INFO - 🔧 Step 1: Validating and cleaning input data...
2025-10-09 10:09:09 - INFO - ✅ Data validation completed
2025-10-09 10:09

Geometries are already Shapely objects, creating GeoDataFrame...
Setting CRS...
Creating complete spacetime cube:
  - 25 locations
  - 6 time periods
  - 150 total combinations
  - Original data: 150 rows
  - Complete cube: 150 rows
  - Missing combinations filled with NAs: 0
Spacetime cube dimensions:
  - 25 locations
  - 6 time periods
  - 150 total observations
Gi* range: [-4.417, 6.473]
Significant observations (p<=0.01): 18/150
Significant observations (p<=0.05): 42/150
DEBUG - Sample of borderline cases:
  Obs 0: Gi*=1.326, p=0.4500, sig=False
  Obs 10: Gi*=1.845, p=0.5000, sig=False
  Obs 20: Gi*=0.948, p=0.4500, sig=False
  Obs 30: Gi*=1.527, p=0.0500, sig=False
  Obs 40: Gi*=1.719, p=0.0600, sig=False
DEBUG - Neighbor permutation test:
  Original neighbors: [[6, 5, 1, 0], [6, 0, 7, 5, 2, 1], [6, 1, 7, 8, 3, 2]]
  Permuted neighbors: [[9, 1, 3, 6], [6, 7, 9, 2, 0, 5], [3, 7, 4, 9, 1, 5]]
  Neighbor structure changed: True


2025-10-09 10:09:09 - INFO - ✅ Gi* statistics calculated in 0.24s using spacetime method
2025-10-09 10:09:09 - INFO -    - Output shape: (150, 7)
2025-10-09 10:09:09 - INFO -    - Used 99 simulations for p-values
2025-10-09 10:09:09 - INFO - 🎯 Step 5: Performing emerging hotspot classification...
2025-10-09 10:09:09 - INFO - ✅ EHSA classification completed in 0.01s
2025-10-09 10:09:09 - INFO -    - Results shape: (25, 8)
2025-10-09 10:09:09 - INFO - 📋 ANALYSIS RESULTS SUMMARY
2025-10-09 10:09:09 - INFO - ⏱️  Total execution time: 0:00:00.270737
2025-10-09 10:09:09 - INFO - 🏷️  Classification distribution:
2025-10-09 10:09:09 - INFO -    - no pattern detected: 15 regions (60.0%)
2025-10-09 10:09:09 - INFO -    - sporadic hotspot: 5 regions (20.0%)
2025-10-09 10:09:09 - INFO -    - consecutive hotspot: 3 regions (12.0%)
2025-10-09 10:09:09 - INFO -    - oscilating hotspot: 1 regions (4.0%)
2025-10-09 10:09:09 - INFO -    - new hotspot: 1 regions (4.0%)
2025-10-09 10:09:09 - INFO - 📊 Mann

Total regions to process: 25
   No pattern #1: SP_0_0, Gi* range [-2.00, 1.33], 0/10 significant
   No pattern #2: SP_0_1, Gi* range [-3.07, 1.46], 0/10 significant
   No pattern #3: SP_0_2, Gi* range [-2.72, 1.76], 0/10 significant
✓ SP_1_1: sporadic hotspot
✓ SP_1_2: sporadic hotspot
✓ SP_2_2: sporadic hotspot
✓ SP_2_3: sporadic hotspot
✓ SP_3_2: sporadic hotspot
✓ SP_3_3: oscilating hotspot
✓ SP_3_4: consecutive hotspot
✓ SP_4_2: new hotspot
✓ SP_4_3: consecutive hotspot
✓ SP_4_4: consecutive hotspot
Total results processed: 25


In [None]:
# Show results
print("Patterns identified:")
print(results['classification'].value_counts())
print("\nFirst results:")
results.head()


Padrões identificados:
classification
no pattern detected    15
sporadic hotspot        5
consecutive hotspot     3
oscilating hotspot      1
new hotspot             1
Name: count, dtype: int64

Primeiros resultados:


Unnamed: 0,location_id,classification,classification_details,mann_kendall_details,spatial_context_summary,location_data,tau,p_value
0,SP_0_0,no pattern detected,{'reason': 'Conditions for specific hotspot/co...,"{'inputs': {'time_periods_count': 6, 'gi_star_...","{'neighbors_config': ['SP_1_1', 'SP_1_0', 'SP_...","[{'time_period': 2024-01-01 00:00:00, 'value':...",-0.866667,0.024171
1,SP_0_1,no pattern detected,{'reason': 'Conditions for specific hotspot/co...,"{'inputs': {'time_periods_count': 6, 'gi_star_...","{'neighbors_config': ['SP_1_1', 'SP_0_0', 'SP_...","[{'time_period': 2024-01-01 00:00:00, 'value':...",-0.333333,0.45237
2,SP_0_2,no pattern detected,{'reason': 'Conditions for specific hotspot/co...,"{'inputs': {'time_periods_count': 6, 'gi_star_...","{'neighbors_config': ['SP_1_1', 'SP_0_1', 'SP_...","[{'time_period': 2024-01-01 00:00:00, 'value':...",-0.333333,0.45237
3,SP_0_3,no pattern detected,{'reason': 'Conditions for specific hotspot/co...,"{'inputs': {'time_periods_count': 6, 'gi_star_...","{'neighbors_config': ['SP_1_4', 'SP_0_4', 'SP_...","[{'time_period': 2024-01-01 00:00:00, 'value':...",-0.466667,0.259656
4,SP_0_4,no pattern detected,{'reason': 'Conditions for specific hotspot/co...,"{'inputs': {'time_periods_count': 6, 'gi_star_...","{'neighbors_config': ['SP_1_4', 'SP_1_3', 'SP_...","[{'time_period': 2024-01-01 00:00:00, 'value':...",-0.866667,0.024171


In [None]:
# Create visualization using EhsaPlotting
locations = gdf[['location_id', 'geometry']].drop_duplicates()
viz_data = results.merge(locations, left_on=results.columns[0], right_on='location_id')

plotter = EhsaPlotting()
map_viz = plotter.plot_ehsa_map(viz_data, 'classification')

map_viz
