# PyEhsa Demo - São Paulo


In [1]:
import sys
import os
sys.path.append(os.path.join(os.path.dirname(''), '..', 'src'))

import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from datetime import datetime, timedelta

from pyehsa.emerging_hotspot_analysis import EmergingHotspotAnalysis

np.random.seed(42)


In [3]:
# Criar dados sintéticos - Grid 5x5 no centro de SP
center_lat, center_lon = -23.5489, -46.6388
step = 0.01

data = []
for i in range(5):
    for j in range(5):
        lat = center_lat + (i - 2) * step
        lon = center_lon + (j - 2) * step
        location_id = f'SP_{i}_{j}'
        
        for month in range(6):
            time_period = datetime(2024, 1, 1) + timedelta(days=30*month)
            
            value = np.random.poisson(10)
            
            # Hotspot emergente no canto superior direito
            if i >= 3 and j >= 3 and month >= 2:
                value += (month - 1) * 8
            
            value += np.random.normal(0, 2)
            value = max(0, value)
            
            data.append({
                'location_id': location_id,
                'time_period': time_period,
                'value': value,
                'geometry': Point(lon, lat)
            })

gdf = gpd.GeoDataFrame(data, geometry='geometry', crs='EPSG:4326')
print(f"Dataset: {len(gdf)} observações, {gdf['location_id'].nunique()} locais")


Dataset: 150 observações, 25 locais


In [4]:
gdf.head()

Unnamed: 0,location_id,time_period,value,geometry
0,SP_0_0,2024-01-01,11.153114,POINT (-46.6588 -23.5689)
1,SP_0_0,2024-01-31,10.6225,POINT (-46.6588 -23.5689)
2,SP_0_0,2024-03-01,1.787107,POINT (-46.6588 -23.5689)
3,SP_0_0,2024-03-31,8.406927,POINT (-46.6588 -23.5689)
4,SP_0_0,2024-04-30,10.374283,POINT (-46.6588 -23.5689)


In [5]:
# Executar análise EHSA
results = EmergingHotspotAnalysis.emerging_hotspot_analysis(
    gdf,
    region_id_field='location_id',
    time_period_field='time_period', 
    value='value',
    k=1,
    nsim=99
)


2025-10-09 10:04:31 - INFO - 🚀 Starting Emerging Hotspot Analysis
2025-10-09 10:04:31 - INFO - 📊 Input DataFrame shape: (150, 4)
2025-10-09 10:04:31 - INFO - 🎯 Analysis parameters:
2025-10-09 10:04:31 - INFO -    - Region ID field: location_id
2025-10-09 10:04:31 - INFO -    - Time period field: time_period
2025-10-09 10:04:31 - INFO -    - Value field: value
2025-10-09 10:04:31 - INFO -    - Random seed: 77
2025-10-09 10:04:31 - INFO -    - Time lags (k): 1
2025-10-09 10:04:31 - INFO -    - Simulations (nsim): 99
2025-10-09 10:04:31 - INFO - 📈 Data overview:
2025-10-09 10:04:31 - INFO -    - Total rows: 150
2025-10-09 10:04:31 - INFO -    - Unique regions: 25
2025-10-09 10:04:31 - INFO -    - Unique time periods: 6
2025-10-09 10:04:31 - INFO -    - Value range: [0.976, 50.745]
2025-10-09 10:04:31 - INFO -    - Missing values in value: 0
2025-10-09 10:04:31 - INFO - 🔧 Step 1: Validating and cleaning input data...
2025-10-09 10:04:31 - INFO - ✅ Data validation completed
2025-10-09 10:04

Geometries are already Shapely objects, creating GeoDataFrame...
Setting CRS...
Creating complete spacetime cube:
  - 25 locations
  - 6 time periods
  - 150 total combinations
  - Original data: 150 rows
  - Complete cube: 150 rows
  - Missing combinations filled with NAs: 0
Spacetime cube dimensions:
  - 25 locations
  - 6 time periods
  - 150 total observations
Gi* range: [-5.976, 6.604]
Significant observations (p<=0.01): 23/150
Significant observations (p<=0.05): 44/150
DEBUG - Sample of borderline cases:
  Obs 0: Gi*=0.187, p=0.2800, sig=False
  Obs 10: Gi*=0.602, p=0.1900, sig=False
  Obs 20: Gi*=0.197, p=0.1900, sig=False
  Obs 30: Gi*=-1.530, p=0.0900, sig=False
  Obs 40: Gi*=-1.047, p=0.1700, sig=False
DEBUG - Neighbor permutation test:
  Original neighbors: [[6, 5, 1, 0], [6, 0, 7, 5, 2, 1], [6, 1, 7, 8, 3, 2]]
  Permuted neighbors: [[9, 1, 3, 6], [6, 7, 9, 2, 0, 5], [3, 7, 4, 9, 1, 5]]
  Neighbor structure changed: True


2025-10-09 10:04:32 - INFO -    - Output shape: (150, 7)
2025-10-09 10:04:32 - INFO -    - Used 99 simulations for p-values
2025-10-09 10:04:32 - INFO - 🎯 Step 5: Performing emerging hotspot classification...
2025-10-09 10:04:32 - INFO - ✅ EHSA classification completed in 0.01s
2025-10-09 10:04:32 - INFO -    - Results shape: (25, 8)
2025-10-09 10:04:32 - INFO - 📋 ANALYSIS RESULTS SUMMARY
2025-10-09 10:04:32 - INFO - ⏱️  Total execution time: 0:00:00.322230
2025-10-09 10:04:32 - INFO - 🏷️  Classification distribution:
2025-10-09 10:04:32 - INFO -    - no pattern detected: 15 regions (60.0%)
2025-10-09 10:04:32 - INFO -    - consecutive hotspot: 5 regions (20.0%)
2025-10-09 10:04:32 - INFO -    - sporadic coldspot: 4 regions (16.0%)
2025-10-09 10:04:32 - INFO -    - sporadic hotspot: 1 regions (4.0%)
2025-10-09 10:04:32 - INFO - 📊 Mann-Kendall Tau statistics:
2025-10-09 10:04:32 - INFO -    - Mean: -0.0240
2025-10-09 10:04:32 - INFO -    - Std: 0.5288
2025-10-09 10:04:32 - INFO -    - R

Total regions to process: 25
   No pattern #1: SP_0_0, Gi* range [-2.02, 0.19], 0/10 significant
   No pattern #2: SP_0_1, Gi* range [-3.22, -1.16], 0/10 significant
✓ SP_0_2: sporadic coldspot
   No pattern #3: SP_0_3, Gi* range [-3.29, -1.14], 0/10 significant
✓ SP_1_1: sporadic coldspot
✓ SP_1_2: sporadic coldspot
✓ SP_1_3: sporadic coldspot
✓ SP_2_4: sporadic hotspot
✓ SP_3_3: consecutive hotspot
✓ SP_3_4: consecutive hotspot
✓ SP_4_2: consecutive hotspot
✓ SP_4_3: consecutive hotspot
✓ SP_4_4: consecutive hotspot
Total results processed: 25


In [None]:
# Mostrar resultados
print("Padrões identificados:")
print(results['classification'].value_counts())
print("\nPrimeiros resultados:")
results.head()


Padrões identificados:
classification
no pattern detected    15
consecutive hotspot     5
sporadic coldspot       4
sporadic hotspot        1
Name: count, dtype: int64

Primeiros resultados:


KeyError: "['region_id'] not in index"

In [None]:
# Criar mapa
import folium

locations = gdf[['location_id', 'geometry']].drop_duplicates()
viz_data = results.merge(locations, left_on=results.columns[0], right_on='location_id')

m = folium.Map(location=[center_lat, center_lon], zoom_start=13)

color_map = {
    'no pattern detected': 'gray',
    'new hotspot': 'red',
    'consecutive hotspot': 'darkred',
    'sporadic hotspot': 'orange'
}

for _, row in viz_data.iterrows():
    color = color_map.get(row['classification'], 'gray')
    
    folium.CircleMarker(
        location=[row['geometry'].y, row['geometry'].x],
        radius=8,
        popup=f"{row[results.columns[0]]}: {row['classification']}",
        color='black',
        fillColor=color,
        fillOpacity=0.7
    ).add_to(m)

m


KeyError: 'region_id'