In [1]:
# 03 - AED Temporal e Espacial (Bioma e UF)

import os
import sys
import pandas as pd
from pathlib import Path

# Garantir import local de src/
BASE_DIR = Path.cwd().parent
SRC_DIR = BASE_DIR / "src"
if str(BASE_DIR) not in sys.path:
    sys.path.append(str(BASE_DIR))
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

from src.plotting import plot_series_grouped, save_fig

INTERIM_PATH = os.path.join(str(BASE_DIR), "data", "interim", "focos_2019_2024.csv")
FIG_DIR = os.path.join(str(BASE_DIR), "docs", "figures")

# Carregar
df = pd.read_csv(INTERIM_PATH, low_memory=False)

# Resolver coluna temporal (prioridade: data -> data_pas -> month)
if "data" in df.columns:
    df["data_dt"] = pd.to_datetime(df["data"], errors="coerce")
elif "data_pas" in df.columns:
    df["data_dt"] = pd.to_datetime(df["data_pas"], errors="coerce")
elif "month" in df.columns:
    # converter AAAA-MM em último dia do mês
    df["data_dt"] = pd.to_datetime(df["month"].astype(str) + "-01", errors="coerce")
else:
    # sem coluna temporal conhecida: abortar com preview
    df.head()

# Agregar por tempo e Bioma/UF
if "bioma" in df.columns:
    bioma_ts = df.groupby([df["data_dt"].dt.date, "bioma"]).size().reset_index(name="focos")
    plot_series_grouped(bioma_ts, "data_dt", "focos", "bioma", FIG_DIR, "series_bioma.png")

if "estado" in df.columns:
    uf_ts = df.groupby([df["data_dt"].dt.date, "estado"]).size().reset_index(name="focos")
    plot_series_grouped(uf_ts, "data_dt", "focos", "estado", FIG_DIR, "series_uf.png")

(bioma_ts.head() if "bioma" in df.columns else df.head())


Unnamed: 0,month,Amazônia,Caatinga,Cerrado,Mata Atlântica,Pampa,Pantanal,nan,ACRE,ALAGOAS,...,Vaupés,Vichada,Wanica,Yaracuy,Zamora Chinchipe,Zulia,Ñeembucú,day,focos,data_dt
0,2019-01,1419.0,433.0,1213.0,608.0,20.0,337.0,8427.0,,,...,,,,,,,,,,2019-01-01
1,2019-02,1368.0,73.0,574.0,607.0,32.0,211.0,15019.0,,,...,,,,,,,,,,2019-02-01
2,2019-03,3383.0,58.0,936.0,661.0,82.0,93.0,14349.0,,,...,,,,,,,,,,2019-03-01
3,2019-04,1702.0,28.0,753.0,276.0,50.0,33.0,10269.0,,,...,,,,,,,,,,2019-04-01
4,2019-05,854.0,59.0,1719.0,240.0,23.0,68.0,3200.0,,,...,,,,,,,,,,2019-05-01


In [2]:
# Sazonalidade mensal (Bioma e UF) usando a coluna unificada data_dt
from pandas import to_datetime

if "data_dt" not in df.columns:
    raise RuntimeError("Coluna temporal unificada 'data_dt' ausente")

df["mes"] = to_datetime(df["data_dt"]).dt.to_period("M").astype(str)

if "bioma" in df.columns:
    bioma_month = df.groupby(["mes","bioma"]).size().reset_index(name="focos")
    pivot = bioma_month.pivot(index="mes", columns="bioma", values="focos").fillna(0)
    fig = pivot.plot(figsize=(10,5), title="Focos por mês — Biomas").get_figure()
    from src.plotting import save_fig
    save_fig(fig, str(FIG_DIR), "focos_mes_bioma.png")
    plt.close(fig)

if "estado" in df.columns:
    uf_month = df.groupby(["mes","estado"]).size().reset_index(name="focos")
    pivot = uf_month.pivot(index="mes", columns="estado", values="focos").fillna(0)
    fig = pivot.rolling(3, min_periods=1).mean().plot(figsize=(12,6), title="Focos por mês — UFs (MM3)").get_figure()
    save_fig(fig, str(FIG_DIR), "focos_mes_uf.png")
    plt.close(fig)


In [3]:
# Série das UFs: focar nas top-8 por volume para melhorar legibilidade
from src.plotting import save_fig
import matplotlib.pyplot as plt

if 'estado' in df.columns:
    agg_uf = df.groupby('estado').size().sort_values(ascending=False)
    top_ufs = agg_uf.head(8).index.tolist()
    uf_top = df[df['estado'].isin(top_ufs)].groupby([df['data_dt'].dt.date, 'estado']).size().reset_index(name='focos')
    pivot = uf_top.pivot(index='data_dt', columns='estado', values='focos').fillna(0)
    fig, ax = plt.subplots(figsize=(12,6))
    pivot.rolling(7, min_periods=1).mean().plot(ax=ax, linewidth=2.0)
    ax.set_title('Série temporal — Top 8 UFs (média móvel 7d)')
    ax.set_xlabel('Data')
    ax.set_ylabel('Focos')
    ax.grid(True, linestyle='--', alpha=0.4)
    ax.legend(title='UF', loc='center left', bbox_to_anchor=(1.02, 0.5), frameon=False)
    save_fig(fig, str(FIG_DIR), 'series_uf.png')
    plt.close(fig)


In [4]:
# Caso os dados estejam em formato "wide" (colunas por Bioma/UF), gerar séries
import numpy as np
import matplotlib.pyplot as plt
from src.plotting import save_fig

# Séries por Bioma a partir de colunas wide
bioma_cols = [c for c in ["Amazônia","Caatinga","Cerrado","Mata Atlântica","Pampa","Pantanal"] if c in df.columns]
if bioma_cols and ("month" in df.columns or "data_dt" in df.columns):
    time_col = "data_dt" if "data_dt" in df.columns else "month"
    base = df[['month']+bioma_cols] if 'month' in df.columns else df[[time_col]+bioma_cols].rename(columns={time_col:'month'})
    long_bioma = base.melt(id_vars=['month'], value_vars=bioma_cols, var_name='bioma', value_name='focos').dropna()
    long_bioma['data_plot'] = pd.to_datetime(long_bioma['month'].astype(str)+'-01', errors='coerce')
    pivot = long_bioma.pivot(index='data_plot', columns='bioma', values='focos').fillna(0)
    fig, ax = plt.subplots(figsize=(10,5))
    pivot.rolling(3, min_periods=1).mean().plot(ax=ax)
    ax.set_title('Séries mensais de focos — Biomas (MM3)')
    ax.set_xlabel('Data')
    ax.set_ylabel('Focos')
    save_fig(fig, str(FIG_DIR), 'series_bioma.png')
    plt.close(fig)

# Séries por UF a partir de colunas wide
ufs_br = [
    'ACRE','ALAGOAS','AMAPA','AMAZONAS','BAHIA','CEARA','DISTRITO FEDERAL','ESPIRITO SANTO',
    'GOIAS','MARANHAO','MATO GROSSO','MATO GROSSO DO SUL','MINAS GERAIS','PARA','PARAIBA',
    'PARANA','PERNAMBUCO','PIAUI','RIO DE JANEIRO','RIO GRANDE DO NORTE','RIO GRANDE DO SUL',
    'RONDONIA','RORAIMA','SANTA CATARINA','SAO PAULO','SERGIPE','TOCANTINS'
]
uf_cols = [c for c in df.columns if c.upper() in ufs_br]
if uf_cols and ("month" in df.columns or "data_dt" in df.columns):
    time_col = "data_dt" if "data_dt" in df.columns else "month"
    base = df[['month']+uf_cols] if 'month' in df.columns else df[[time_col]+uf_cols].rename(columns={time_col:'month'})
    long_uf = base.melt(id_vars=['month'], value_vars=uf_cols, var_name='estado', value_name='focos').dropna()
    long_uf['data_plot'] = pd.to_datetime(long_uf['month'].astype(str)+'-01', errors='coerce')
    pivot = long_uf.pivot(index='data_plot', columns='estado', values='focos').fillna(0)
    fig, ax = plt.subplots(figsize=(12,6))
    pivot.rolling(3, min_periods=1).mean().plot(ax=ax, legend=False)
    ax.set_title('Séries mensais de focos — UFs (MM3)')
    ax.set_xlabel('Data')
    ax.set_ylabel('Focos')
    save_fig(fig, str(FIG_DIR), 'series_uf.png')
    plt.close(fig)

