## 0. Pr√© requisitos para funcionamento das respectivas partes

### Importa√ß√µes de bibliotecas

In [4]:
import os, sys, gc, importlib
from datetime import date
from pathlib import Path
from ipywidgets import (
    Dropdown, Button, Output, VBox, BoundedIntText, HBox,
    Accordion, HTML, DatePicker, Layout, Label, Text, Checkbox
)
from IPython.display import display
import pandas as pd
import geobr
import matplotlib.pyplot as plt

from pysus.online_data import IBGE
from pysus import SIH
import geopandas as gpd
from typing import List, Tuple

# Identificar raiz do projeto automaticamente
def get_project_root():
    current = Path.cwd()
    while current.name != 'analise-temporal-municipios' and current.parent != current:
        current = current.parent
    return current

ROOT = get_project_root()
print(f"Project Root: {ROOT}")

# Adicionar raiz ao sys.path para permitir imports de 'src'
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

# Mudar diret√≥rio de trabalho para a raiz para facilitar caminhos relativos (data/...)
os.chdir(ROOT)
print(f"Working Directory defined to: {os.getcwd()}")

from src.merradownload.merra_scraping import baixar_merra

Project Root: /home/guilherme-rodrigues/Documentos/repos/PROJETO CAIPORA/analise-temporal-municipios
Working Directory defined to: /home/guilherme-rodrigues/Documentos/repos/PROJETO CAIPORA/analise-temporal-municipios


## 01 - Baixar shapefiles

In [1]:

# Paths corrigidos (relativos √† raiz)
municipios_coord_df = pd.read_csv("data/utils/municipios_coord.csv", sep=";")
estados = sorted(municipios_coord_df["uf"].unique())
municipios_por_estado = {
    uf: sorted(municipios_coord_df[municipios_coord_df["uf"] == uf]["municipio"].unique())
    for uf in estados
}

estados_dd = Dropdown(options=estados, description='Estado:')
municipios_dd = Dropdown(options=municipios_por_estado[estados[0]], description='Munic√≠pio:')
baixar_btn = Button(description="Baixar e Plotar Shapefile")
output_mapa = Output()

def atualiza_municipios(change):
    uf_selecionado = change['new']
    municipios_dd.options = municipios_por_estado[uf_selecionado]

estados_dd.observe(atualiza_municipios, names='value')

def baixar_plotar_shapefile(b):
    uf = estados_dd.value
    municipio = municipios_dd.value

    with output_mapa:
        output_mapa.clear_output()
        print(f"Baixando shapefile para {municipio}, {uf}...")
        try:
            # Lookup Code
            localizar_municipio = geobr.lookup_muni(name_muni=municipio)
            localizar_municipio = localizar_municipio[localizar_municipio['abbrev_state'] == uf]
            if localizar_municipio.empty:
                 print(f"Munic√≠pio {municipio}-{uf} n√£o encontrado no geobr.")
                 return
            
            code_muni = localizar_municipio.iloc[0]['code_muni']
            gdf = geobr.read_municipality(code_muni=code_muni)
            print("Download conclu√≠do.")

            # Save Logic
            save_dir = f"data/shapefiles/{uf}-{municipio.replace(' ', '_')}/"
            os.makedirs(save_dir, exist_ok=True)
            
            filepath = os.path.join(save_dir, f"{uf}_{municipio.replace(' ', '_')}.shp")
            gdf.to_file(filepath, driver='ESRI Shapefile')
            print(f"Shapefile salvo em: {filepath}")

            # Save IBGE CSV
            try:
                ibge_df = pd.read_csv("data/utils/municipios_ibge.csv")
                # Handle code types
                city_ibge_info = ibge_df[ibge_df['codigo_ibge'] == float(code_muni)]
                
                if city_ibge_info.empty:
                    city_ibge_info = ibge_df[ibge_df['Nome_Munic√≠pio'] == municipio]
                
                if not city_ibge_info.empty:
                    csv_path = os.path.join(save_dir, f"{uf}_{municipio.replace(' ', '_')}_ibge.csv")
                    city_ibge_info.to_csv(csv_path, index=False)
                    print(f"CSV IBGE salvo em: {csv_path}")
                else:
                    print("Aviso: N√£o foi poss√≠vel encontrar informa√ß√µes no arquivo municipios_ibge.csv")
            except Exception as e:
                print(f"Erro ao salvar CSV IBGE: {e}")

            # Plot
            fig, ax = plt.subplots(1, 1, figsize=(10, 10))
            gdf.plot(ax=ax, facecolor='#add8e6', edgecolor='black')
            ax.set_title(f"Mapa de {municipio} - {uf}")
            ax.set_xlabel("Longitude")
            ax.set_ylabel("Latitude")
            plt.grid(True)
            plt.show()
            
        except Exception as e:
            print(f"Erro ao processar: {e}")

baixar_btn.on_click(baixar_plotar_shapefile)

display(VBox([estados_dd, municipios_dd, baixar_btn, output_mapa]))

NameError: name 'pd' is not defined

## 02 - Criar arquivo final

‚ö†Ô∏è **Aviso ‚Äî Per√≠odos longos (acima de 1 ano)**

Alguns crawlers (TROPOMI, MERRA-2, MODIS, ERA5) podem enfrentar **limites de requisi√ß√£o, timeouts ou alto consumo de disco/RAM** ao baixar dados para per√≠odos superiores a 1 ano.

**Recomenda√ß√£o atual:** para per√≠odos longos (ex: 5 anos), use o modo **Seletivo** e execute em lotes separados:
1. INMET + DataSUS + CETESB (r√°pido)
2. ERA5 + OMI (moderado)
3. MERRA-2, MODIS, TROPOMI (pesado ‚Äî executar separadamente)

> Melhorias de resili√™ncia (retry autom√°tico, checkpoint/resume, chunking por ano) ser√£o implementadas em vers√µes futuras.

In [None]:
# Garantir que a raiz do projeto est√° no sys.path (auto-suficiente, n√£o depende da c√©lula 3)
def _ensure_project_root():
    current = Path.cwd()
    while current.name != 'analise-temporal-municipios' and current.parent != current:
        current = current.parent
    root = str(current)
    if root not in sys.path:
        sys.path.insert(0, root)
    os.chdir(current)  # garante cwd na raiz
_ensure_project_root()

# Force reload to pick up any code changes without restarting the kernel
import src.baixar_dados.download_all as _dal_mod
importlib.reload(_dal_mod)
from src.baixar_dados.download_all import download_all, build_shapefile_catalog, SUPPORTED_DISEASES

# ‚îÄ‚îÄ Labels amig√°veis para o dropdown de doen√ßas (agrupados por categoria) ‚îÄ‚îÄ
DISEASE_LABELS = {
    # ‚îÄ‚îÄ Respirat√≥rias ‚îÄ‚îÄ
    "ivas": "ü´Å IVAS ‚Äì Infec√ß√µes Vias A√©reas Sup. (J00-J06)",
    "influenza": "ü´Å Influenza / Gripe (J09-J11)",
    "pneumonia": "ü´Å Pneumonia (J12-J18)",
    "infec_vias_aereas_inf": "ü´Å Infec√ß√µes Vias A√©reas Inf. (J20-J22)",
    "rinite_sinusite": "ü´Å Rinite Al√©rgica / Sinusite Cr√¥nica (J30, J32)",
    "dpoc": "ü´Å DPOC / Bronquiectasia (J40, J44, J47)",
    "asma": "ü´Å Asma (J45-J46)",
    # ‚îÄ‚îÄ Cardiovasculares ‚îÄ‚îÄ
    "hipertensao": "‚ù§Ô∏è Hipertens√£o Arterial (I10, I11, I15)",
    "doenca_isquemica": "‚ù§Ô∏è Doen√ßa Isqu√™mica do Cora√ß√£o (I20-I22, I24-I25)",
    "embolia_pulmonar": "‚ù§Ô∏è Embolia Pulmonar (I26)",
    "arritmias": "‚ù§Ô∏è Arritmias Card√≠acas (I45, I47-I49)",
    "insuficiencia_cardiaca": "‚ù§Ô∏è Insufici√™ncia Card√≠aca (I50)",
    # ‚îÄ‚îÄ Cerebrovasculares ‚îÄ‚îÄ
    "avc": "üß† AVC ‚Äì Acidente Vascular Cerebral (I60-I64, G45)",
}

# Ordem de exibi√ß√£o no dropdown (mesmo agrupamento visual)
DISEASE_DISPLAY_ORDER = [
    "ivas", "influenza", "pneumonia", "infec_vias_aereas_inf",
    "rinite_sinusite", "dpoc", "asma",
    "hipertensao", "doenca_isquemica", "embolia_pulmonar",
    "arritmias", "insuficiencia_cardiaca",
    "avc",
]

# ‚îÄ‚îÄ Informa√ß√µes dos crawlers (para modo Seletivo) ‚îÄ‚îÄ
CRAWLER_ORDER = ["cetesb", "inmet", "era5", "merra2", "tropomi", "modis", "omi", "datasus", "indices"]
CRAWLER_INFO = {
    "cetesb":   "CETESB ‚Äì Qualidade do ar (SP)",
    "inmet":    "INMET ‚Äì Esta√ß√µes meteorol√≥gicas",
    "era5":     "ERA5 ‚Äì Rean√°lise clim√°tica (ECMWF)",
    "merra2":   "MERRA-2 ‚Äì Rean√°lise NASA (aeross√≥is)",
    "tropomi":  "TROPOMI ‚Äì NO‚ÇÇ, SO‚ÇÇ, CO, O‚ÇÉ (Sentinel-5P)",
    "modis":    "MODIS ‚Äì AOD (aerossol, Terra/Aqua)",
    "omi":      "OMI ‚Äì Oz√¥nio, NO‚ÇÇ, SO‚ÇÇ (Aura)",
    "datasus":  "DataSUS ‚Äì Interna√ß√µes hospitalares (SIH)",
    "indices":  "√çndices Calculados (depende do INMET)",
}

# ‚îÄ‚îÄ Schema modes mapping (UI label ‚Üí valor passado ao download_all) ‚îÄ‚îÄ
SCHEMA_MODES = {
    "Template (CSV de refer√™ncia)": "reference",
    "Completo (todas as colunas)": "all",
    "Seletivo (escolher crawlers)": "seletivo",
}

def make_download_all_widget(
    *,
    shapefiles_dir="data/shapefiles",
    default_output_dir="data/output",
    default_cache_dir="data/cache",
    default_final_csv="data/output/final/final_by_ibge_date.csv",
    default_disease="asma",
    default_schema_csv="data/utils/schema_template.csv",
    log_level="INFO",
):
    """
    UI para Jupyter Notebook (ipywidgets) ‚Äî vers√£o com 3 modos de schema
    e sele√ß√£o individual de crawlers no modo Seletivo.
    """

    # 1. Carregar cat√°logo de shapefiles
    catalog = build_shapefile_catalog(shapefiles_dir=shapefiles_dir)
    options = [("Todos os munic√≠pios (todos os shapefiles)", "__ALL__")]
    for _, row in catalog.iterrows():
        cod = str(row.get("codibge") or "").strip()
        name = str(row.get("shapefile_nome") or "").strip()
        path = str(row.get("shapefile_path") or "").strip()
        label = f"{cod} ‚Äî {name}" if cod else name
        options.append((label, path))

    # 2. Widgets ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

    # Doen√ßa (dropdown com labels amig√°veis)
    disease_options = [
        (DISEASE_LABELS.get(d, d), d) for d in DISEASE_DISPLAY_ORDER if d in SUPPORTED_DISEASES
    ]
    disease_dd = Dropdown(
        options=disease_options,
        value=default_disease if default_disease in SUPPORTED_DISEASES else SUPPORTED_DISEASES[0],
        description="Doen√ßa:",
        style={'description_width': 'initial'},
        layout=Layout(width='450px'),
    )

    # Munic√≠pio
    target_dd = Dropdown(
        options=options,
        value="__ALL__",
        description="Munic√≠pio:",
        style={'description_width': 'initial'},
    )

    # DatePicker (Calend√°rio)
    start_picker = DatePicker(description="In√≠cio", value=date(2023, 1, 1))
    end_picker = DatePicker(description="Fim", value=date(2023, 1, 31))

    # Workers
    workers_input = BoundedIntText(
        value=3, min=1, max=32, step=1,
        description="Workers:",
        style={'description_width': 'initial'},
        layout=Layout(width='150px'),
    )
    workers_warning = HTML(
        value="<span style='color:orange; font-size:0.9em;'>‚ö†Ô∏è Acima de 3 workers exige PC potente (muita RAM/CPU).</span>",
        layout=Layout(margin='5px 0 0 10px'),
    )

    # Schema mode
    schema_dd = Dropdown(
        options=list(SCHEMA_MODES.keys()),
        value=list(SCHEMA_MODES.keys())[0],  # Template por padr√£o
        description="Schema:",
        description_tooltip="Define quais colunas estar√£o no CSV final.",
        style={'description_width': 'initial'},
        layout=Layout(width='380px'),
    )
    schema_help = HTML(
        value=(
            "<div style='font-size:0.85em; color:#555; margin-left:10px;'>"
            "<b>Schema:</b> Define a estrutura do arquivo final.<br>"
            "‚Ä¢ <i>Template</i>: Segue o padr√£o do arquivo modelo (recomendado).<br>"
            "‚Ä¢ <i>Completo</i>: Inclui todas as colunas baixadas.<br>"
            "‚Ä¢ <i>Seletivo</i>: Baixa apenas os crawlers selecionados abaixo."
            "</div>"
        ),
    )

    # ‚îÄ‚îÄ Crawler checkboxes (Seletivo) ‚îÄ‚îÄ
    crawler_checkboxes = {}
    for key in CRAWLER_ORDER:
        cb = Checkbox(
            value=True,
            description=CRAWLER_INFO.get(key, key),
            indent=False,
            layout=Layout(width='auto'),
        )
        crawler_checkboxes[key] = cb

    crawler_box = VBox(
        [HTML("<b>Selecione os crawlers a executar:</b>")] + list(crawler_checkboxes.values()),
        layout=Layout(border='1px solid #ccc', padding='8px', margin='5px 0'),
    )
    crawler_box.layout.display = 'none'  # Oculto inicialmente

    def _on_schema_change(change):
        mode = SCHEMA_MODES.get(change['new'], '')
        crawler_box.layout.display = 'block' if mode == 'seletivo' else 'none'

    schema_dd.observe(_on_schema_change, names='value')

    # Caminhos (Advanced)
    output_dir_txt = Text(value=str(default_output_dir), description="Sa√≠da:")
    cache_dir_txt = Text(value=str(default_cache_dir), description="Cache:")
    final_csv_txt = Text(value=str(default_final_csv), description="Final CSV:")
    schema_csv_txt = Text(value=str(default_schema_csv), description="Schema CSV:")

    run_btn = Button(
        description="Executar Download", button_style="success",
        icon="download", layout=Layout(width='100%', margin='20px 0'),
    )
    out = Output()

    # 3. Callback ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

    def _on_run(_):
        out.clear_output()
        run_btn.disabled = True
        run_btn.description = "Executando... (Aguarde)"

        with out:
            try:
                gc.collect()
                print("‚ôªÔ∏è Mem√≥ria limpa. Iniciando nova execu√ß√£o...")

                if not start_picker.value or not end_picker.value:
                    print("‚ö†Ô∏è Selecione as datas de In√≠cio e Fim.")
                    return

                start_str = start_picker.value.isoformat()
                end_str = end_picker.value.isoformat()
                selected = target_dd.value
                schema_mode = SCHEMA_MODES[schema_dd.value]

                # Montar lista de sources para modo seletivo
                sources = None
                if schema_mode == "seletivo":
                    sources = [k for k, cb in crawler_checkboxes.items() if cb.value]
                    if not sources:
                        print("‚ö†Ô∏è Selecione ao menos um crawler no modo Seletivo.")
                        return
                    print(f"üîß Modo Seletivo ‚Äî crawlers: {', '.join(sources)}")

                print(f"üöÄ Configura√ß√£o validada.")
                print(f"üìÖ Per√≠odo: {start_str} a {end_str}")
                print(f"ü¶† Doen√ßa: {disease_dd.value}")
                print(f"‚öôÔ∏è Config: Workers={workers_input.value} | Schema={schema_mode}")

                if workers_input.value > 3:
                    print("‚ö†Ô∏è Aten√ß√£o: Alto n√∫mero de workers selecionado. Monitorando uso de recursos...")

                kwargs = {
                    "start": start_str,
                    "end": end_str,
                    "disease": disease_dd.value,
                    "output_dir": output_dir_txt.value,
                    "cache_dir": cache_dir_txt.value,
                    "final_csv": final_csv_txt.value,
                    "final_schema": schema_mode,
                    "schema_csv": schema_csv_txt.value or None,
                    "log_level": log_level,
                    "max_workers": workers_input.value,
                    "sources": sources,
                }

                if selected == "__ALL__":
                    df = download_all(shapefiles_dir=shapefiles_dir, **kwargs)
                else:
                    df = download_all(shapefile=selected, **kwargs)

                print("‚úÖ Processo finalizado com sucesso!")
                if not df.empty:
                    print(f"üìä Dados gerados: {len(df)} linhas, {len(df.columns)} colunas.")
                    display(df.tail())
                else:
                    print("‚ö†Ô∏è Nenhum dado retornado (verifique logs ou disponibilidade).")

            except Exception as e:
                print(f"‚ùå Erro na execu√ß√£o: {e}")
                import traceback
                traceback.print_exc()
                print("\nüîÑ O processo falhou. Voc√™ pode ajustar as configura√ß√µes e tentar novamente.")
            finally:
                run_btn.disabled = False
                run_btn.description = "Executar Download"

    run_btn.on_click(_on_run)

    # 4. Layout ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    ui = VBox([
        HTML("<h3>Configura√ß√£o do Download (Orquestrador)</h3>"),
        HBox([disease_dd, target_dd]),
        HBox([start_picker, end_picker]),
        HBox([workers_input, workers_warning]),
        HBox([schema_dd, schema_help]),
        crawler_box,
        Accordion(children=[
            VBox([output_dir_txt, cache_dir_txt, final_csv_txt, schema_csv_txt])
        ], titles=('Op√ß√µes Avan√ßadas (Caminhos)',)),
        run_btn,
        out,
    ])
    display(ui)

# Executar widget
make_download_all_widget(shapefiles_dir="data/shapefiles")

  schema_dd = Dropdown(


VBox(children=(HTML(value='<h3>Configura√ß√£o do Download (Orquestrador)</h3>'), HBox(children=(Dropdown(descrip‚Ä¶

2026-02-16 02:24:18,628 INFO src.baixar_dados.download_all - [DATASUS] Iniciando para 3548500...
RDSP2301.parquet: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 738k/738k [00:39<00:00, 18.5kB/s]
