In [4]:
import os
import zipfile
from pathlib import Path
import requests

In [5]:
os.makedirs("datalake/raw/clima", exist_ok=True)

urls = {
    "2023": "https://portal.inmet.gov.br/uploads/dadoshistoricos/2023.zip",
    "2024": "https://portal.inmet.gov.br/uploads/dadoshistoricos/2024.zip",
    "2025": "https://portal.inmet.gov.br/uploads/dadoshistoricos/2025.zip"
}

for ano, url in urls.items():
    path = f"datalake/raw/clima/{ano}.zip"
    with requests.get(url, stream=True) as r:
        with open(path, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)


In [7]:
output_dir = "datalake/raw/clima"
for file_name in os.listdir(output_dir):
    file_path = Path(output_dir) / file_name

    if file_name.endswith(".zip"):
        try:
            with zipfile.ZipFile(file_path, 'r') as zip_ref:
                zip_ref.extractall(output_dir)
                print(f"Extraído: {file_name}")
            file_path.unlink()
        except zipfile.BadZipFile:
            print(f"Erro ao descompactar: {file_name}")

Extraído: 2023.zip
Extraído: 2024.zip
Extraído: 2025.zip


In [9]:
folder = Path("datalake/raw/clima")
prefixo_manaus = "INMET_N_AM_A101_MANAUS"

for file in folder.iterdir():
    if file.is_file() and not file.name.startswith(prefixo_manaus):
        file.unlink()

In [8]:
base_path = 'datalake/raw/dengue'
os.makedirs(base_path, exist_ok=True)

anos = [2023, 2024, 2025]

for ano in anos:
    sufixo = str(ano)[-2:]
    url = f'https://arquivosdadosabertos.saude.gov.br/ftp/SINAN/Dengue/csv/DENGBR{sufixo}.csv.zip'
    zip_path = os.path.join(base_path, f'dengue_{ano}.zip')

    print(f'Baixando: {url}')
    response = requests.get(url)

    if response.status_code == 200 and response.content[:2] == b'PK':
        with open(zip_path, 'wb') as f:
            f.write(response.content)
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(base_path)
        os.remove(zip_path)
    else:
        print(f'Erro ao baixar ou arquivo inválido para {ano}. Status: {response.status_code}\n')

Baixando: https://arquivosdadosabertos.saude.gov.br/ftp/SINAN/Dengue/csv/DENGBR23.csv.zip
Baixando: https://arquivosdadosabertos.saude.gov.br/ftp/SINAN/Dengue/csv/DENGBR24.csv.zip
Baixando: https://arquivosdadosabertos.saude.gov.br/ftp/SINAN/Dengue/csv/DENGBR25.csv.zip
