In [2]:
import pandas as pd
from ftplib import FTP
import io
import datasus_dbc
from dbfread import DBF
from dbfread import DBF, FieldParser 
import matplotlib.pyplot as plt
import os
import datetime
import numpy as np

In [3]:
class CustomFieldParser(FieldParser):
    def parseD(self, field, data):
        try:
            return super().parseD(field, data)
        except Exception:
            return None   # datas inválidas viram None

def ger_dfs(cwd, doenca):
    ftp = FTP('ftp.datasus.gov.br')
    ftp.login('anonymous', 'anonymous@')
    dfs = []
    ftp.cwd(cwd)
    lista = ftp.nlst()

    arquivos = [j for j in lista if doenca.upper() in j.upper()]
    print("Arquivos filtrados:", arquivos)

    for j in arquivos:
        try:
            buffer = io.BytesIO()
            ftp.retrbinary(f'RETR {j}', buffer.write)
            buffer.seek(0)

            with open(f'{j}.dbc', 'wb') as f:
                f.write(buffer.getvalue())

            arquivo = f'{j}.dbc'
            dbf_arquivo = arquivo.replace(".dbc", ".dbf")
            datasus_dbc.decompress(arquivo, dbf_arquivo)

            # Aqui usamos o parser customizado
            records = DBF(dbf_arquivo, encoding="latin1",
                          parserclass=CustomFieldParser,
                          ignore_missing_memofile=True)

            df = pd.DataFrame(iter(records))
            dfs.append(df)

            print(f"[OK] {j} -> {df.shape[0]} linhas")

            for ftemp in [arquivo, dbf_arquivo]:
                if os.path.exists(ftemp):
                    os.remove(ftemp)

        except Exception as e:
            print(f"[ERRO] {j} -> {e}")
            continue

    ftp.quit()

    if dfs:
        df_final = pd.concat(dfs, ignore_index=True)
        print(f"[OK] DataFrame final: {df_final.shape[0]} linhas")
        return df_final
    else:
        print("[ERRO] Nenhum DataFrame válido foi criado.")
        return pd.DataFrame()

In [4]:
df = ger_dfs('/dissemin/publicos/SINAN/DADOS/FINAIS', 'deng')

Arquivos filtrados: ['DENGBR00.dbc', 'DENGBR01.dbc', 'DENGBR02.dbc', 'DENGBR03.dbc', 'DENGBR04.dbc', 'DENGBR05.dbc', 'DENGBR06.dbc', 'DENGBR07.dbc', 'DENGBR08.dbc', 'DENGBR09.dbc', 'DENGBR10.dbc', 'DENGBR11.dbc', 'DENGBR12.dbc', 'DENGBR13.dbc', 'DENGBR14.dbc', 'DENGBR15.dbc', 'DENGBR16.dbc', 'DENGBR17.dbc', 'DENGBR18.dbc', 'DENGBR19.dbc', 'DENGBR20.dbc', 'DENGBR21.dbc', 'DENGBR22.dbc', 'DENGBR23.dbc', 'DENGBR24.dbc']


KeyboardInterrupt: 