In [1]:
# 04 - Proposta Analítica (Demo) — Detecção de Picos (Bioma e UF)

import os
import sys
import numpy as np
import pandas as pd
from pathlib import Path

# Garantir import local de src/
BASE_DIR = Path.cwd().parent
SRC_DIR = BASE_DIR / "src"
if str(BASE_DIR) not in sys.path:
    sys.path.append(str(BASE_DIR))
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

from src.plotting import save_fig

INTERIM_PATH = os.path.join(str(BASE_DIR), "data", "interim", "focos_2019_2024.csv")
FIG_DIR = os.path.join(str(BASE_DIR), "docs", "figures")

# Carregar
df = pd.read_csv(INTERIM_PATH, low_memory=False)
if "data" not in df.columns and "data_pas" in df.columns:
    df["data"] = pd.to_datetime(df["data_pas"]).dt.date

# Função auxiliar: escore robusto (MAD)
def robust_z(x: pd.Series) -> pd.Series:
    med = np.median(x)
    mad = np.median(np.abs(x - med)) or 1.0
    return 0.6745 * (x - med) / mad

# Pipeline por nível (bioma/estado)
resultados = []
ranks = []
for nivel in ["bioma", "estado"]:
    if nivel not in df.columns:
        continue
    daily = df.groupby(["data", nivel]).size().reset_index(name="focos")
    daily["focos_mm7"] = daily.groupby(nivel)["focos"].transform(lambda s: s.rolling(7, min_periods=1).mean())
    daily["z"] = daily.groupby(nivel)["focos_mm7"].transform(robust_z)
    daily["anomalia"] = daily["z"] > 3
    resultados.append(daily.assign(nivel=nivel))
    rk = daily.groupby(nivel).agg(anomalias=("anomalia","sum"), focos=("focos","sum"))\
             .reset_index().sort_values(["anomalias","focos"], ascending=[False, False])
    rk.insert(0, "nivel", nivel)
    ranks.append(rk)

rank = pd.concat(ranks, ignore_index=True) if ranks else pd.DataFrame()
rank.head()


In [2]:
# Exportar ranking e gráficos de anomalias (robusto a ausência de colunas)
rank_path = BASE_DIR / "data" / "interim" / "ranking_anomalias.csv"
if 'nivel' in rank.columns:
    rank.to_csv(rank_path, index=False)
else:
    # nada a exportar
    rank.to_csv(rank_path, index=False)

# Barras: top 10 por bioma e por UF, apenas se existirem
import matplotlib.pyplot as plt
from src.plotting import save_fig

if 'nivel' in rank.columns:
    for nivel in ["bioma","estado"]:
        sub = rank[rank["nivel"]==nivel].head(10)
        if not sub.empty and nivel in sub.columns:
            fig, ax = plt.subplots(figsize=(8,4))
            ax.barh(sub[nivel].astype(str), sub["anomalias"])
            ax.set_title(f"Top 10 anomalias — {nivel}")
            ax.invert_yaxis()
            save_fig(fig, str(FIG_DIR), f"rank_anomalias_{nivel}.png")
            plt.close(fig)
