<a href="https://colab.research.google.com/github/faguiarfaria/parser-convenios/blob/main/notebooks/parser_unimed_colab_xls.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Parser Unimed Odonto
Este notebook extrai procedimentos de um relatório Unimed Odonto em PDF e exporta para Excel (.xls).

In [None]:
# 📌 PASSO 1 - Instala as dependências necessárias
!pip install pdfplumber xlwt

Collecting pdfplumber
  Downloading pdfplumber-0.11.7-py3-none-any.whl.metadata (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting xlwt
  Downloading xlwt-1.3.0-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting pdfminer.six==20250506 (from pdfplumber)
  Downloading pdfminer_six-20250506-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.5/48.5 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Downloading pdfplumber-0.11.7-py3-none-any.whl (60 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.0/60.0 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pdfmine

In [None]:
# 📁 PASSO 2 - Faz upload do PDF manualmente (ou use do Google Drive)
from google.colab import files
uploaded = files.upload()

import os
for fname in uploaded.keys():
    pdf_path = os.path.abspath(fname)
print("PDF carregado:", pdf_path)

Saving Relatorio de convenio - modelos - unimed - 2025_06.pdf to Relatorio de convenio - modelos - unimed - 2025_06.pdf
PDF carregado: /content/Relatorio de convenio - modelos - unimed - 2025_06.pdf


In [None]:
# 🔎 PASSO 3 - Função principal para extração dos dados
import pdfplumber
import pandas as pd
import re

def extrair_dados_unimed(pdf_path):
    dados = []
    gto_atual = ""
    codigo_paciente = ""
    nome_paciente = ""

    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            linhas = page.extract_text().split('\n')
            for linha in linhas:
                match_gto = re.match(r"GTO:(\d+)\s+CÓDIGO E NOME DO BENEFICIÁRIO:\s+(\d+)\s+-\s+(.+)", linha)
                if match_gto:
                    gto_atual = match_gto.group(1).strip()
                    codigo_paciente = match_gto.group(2).strip()
                    nome_paciente = match_gto.group(3).strip()
                    continue

                match_proc = re.match(
                    r"(\d{8}) (.+?)\s+(\S+)\s+(Pago|Glosado|Deferido|Indeferido)\s+([\d,]+)\s+([\d,]+)\s+([\d,]+)\s+(\d{2}/\d{2}/\d{4})",
                    linha
                )

                if match_proc:
                    cod_proc = match_proc.group(1)
                    desc_proc = match_proc.group(2).strip()
                    detalhe_face = match_proc.group(3).strip()
                    status = match_proc.group(4)
                    valor_proc = match_proc.group(5).replace(",", ".")
                    valor_glosa = match_proc.group(6).replace(",", ".")
                    valor_final = match_proc.group(7).replace(",", ".")
                    data = match_proc.group(8)

                    if detalhe_face.isdigit():
                        detalhe = detalhe_face
                        face = ""
                    elif detalhe_face.isalpha():
                        detalhe = ""
                        face = detalhe_face
                    else:
                        detalhe = detalhe_face[:-2]
                        face = detalhe_face[-2:]

                    dados.append({
                        "Data do atendimento": data,
                        "Código do paciente": codigo_paciente,
                        "Nome do paciente": nome_paciente,
                        "Descrição do procedimento": desc_proc,
                        "Detalhe": detalhe,
                        "Face": face,
                        "Número da GTO": gto_atual,
                        "Status do procedimento": status,
                        "Valor do procedimento (R$)": valor_proc,
                        "Valor glosado (R$)": valor_glosa,
                        "Valor final (R$)": valor_final
                    })

    return pd.DataFrame(dados)

df = extrair_dados_unimed(pdf_path)
df.head()


Unnamed: 0,Data do atendimento,Código do paciente,Nome do paciente,Descrição do procedimento,Detalhe,Face,Número da GTO,Status do procedimento,Valor do procedimento (R$),Valor glosado (R$),Valor final (R$)
0,20/02/2025,90000002893034005,JOZIANI MOTA VIEIRA,COROA TOTAL METALICA,37,,35957083,Deferido,481.32,0.0,481.32
1,07/03/2025,90000003124983007,WARLEY BRAZ COELHO,COROA TOTAL METALICA,36,,35920474,Indeferido,481.32,481.32,0.0
2,28/05/2025,90000002361418011,DAIANA DE CASTRO FERNANDES MARCOLINO,COROA PROVISORIA SEM PINO,17,,36338205,Pago,171.9,0.0,171.9
3,28/05/2025,90000002361418011,DAIANA DE CASTRO FERNANDES MARCOLINO,NUCLEO DE PREENCHIMENTO,17,,36338205,Pago,84.04,0.0,84.04
4,08/05/2025,90000002464878007,FABRICIO LORENCINI ZANONI,PINO PRE FABRICADO,45,,36195365,Pago,276.95,0.0,276.95


In [None]:

# 💾 PASSO 4 - Exporta o DataFrame para Excel
df.to_excel("procedimentos_unimed.xlsx", index=False, engine='openpyxl')
files.download("procedimentos_unimed.xlsx")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>