In [None]:
import nest_asyncio
nest_asyncio.apply()

import re
import csv
import requests
from bs4 import BeautifulSoup
from playwright.async_api import async_playwright

# =====================================================
# CONFIGURA√á√ïES
# =====================================================

LOGIN_EMAIL = "licitacao@boasnovasgestao.com"
LOGIN_SENHA = "#BoasNovas@$10"

LOGIN_URL = "https://conlicitacao.com.br/"
BOLETINS_URL = "https://consultaonline.conlicitacao.com.br/boletim_web/public/boletins.json"
BOLETIM_CONTEUDO_URL = "https://consultaonline.conlicitacao.com.br/boletim_web/bulletin_filter_content"

PADROES_REGEX = [
    r"consulta[s]?",
    r"enfermage(m|ns)",
    r"enfermeiro[s]?",
    r"equipe[s]?\s*&\s*enfermagem[s]?",
    r"equipe[s]?\s*de\s*enfermage(m|ns)",
    r"equipe[s]?\s*para\s*enfermage(m|ns)",
    r"equipe[s]?\s*medica[s]?",
    r"especialidade[s]?\s*medica[s]?",
    r"gest(ao|√£o|√µes|√¥es)\s*enfermage(m|ns)",
    r"gest(ao|√£o|√µes|√¥es)\s*medica[s]?",
    r"gest(ao|√£o|√µes|√¥es)\s*medico[s]?",
    r"mao[s]?\s*de\s*obra[s]?\s*enfermage(m|ns)",
    r"mao[s]?\s*obra[s]?\s*enfermage(m|ns)",
    r"mao[s]?\s*de\s*obra[s]?\s*medica[s]?",
    r"medico[s]?",
    r"servi√ßo[s]?\s*medic(o|a)[s]?",
    r"servi√ßo[s]?\s*medico[s]?",
    r"tele\s*atendimento[s]?",
    r"teleatendimento[s]?",
    r"tele\s*medicina[s]?",
    r"telemedicina[s]?",
]

# =====================================================
# LOGIN AUTOM√ÅTICO
# =====================================================

async def obter_sessao():
    print("üîê Logando...")

    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context()
        page = await context.new_page()

        await page.goto(LOGIN_URL)
        await page.wait_for_timeout(3000)

        await page.fill('input[type="email"]', LOGIN_EMAIL)
        await page.fill('input[type="password"]', LOGIN_SENHA)
        await page.click('button[type="submit"]')

        await page.wait_for_load_state("networkidle")
        await page.wait_for_timeout(5000)

        cookies = await context.cookies()
        await browser.close()

    session = requests.Session()
    session.headers.update({
        "User-Agent": "Mozilla/5.0",
        "Accept": "application/json",
        "Referer": "https://consulteonline.conlicitacao.com.br/",
    })

    for c in cookies:
        session.cookies.set(c["name"], c["value"], domain=c.get("domain"))

    print("‚úÖ Sess√£o ativa")
    return session

# =====================================================
# SCRAPER
# =====================================================

def bate_regex(texto):
    texto = texto.lower()
    return any(re.search(p, texto) for p in PADROES_REGEX)

def extrair_licitacoes(html):
    soup = BeautifulSoup(html, "html.parser")
    itens = []

    for i in soup.select(".bulletinItem"):
        itens.append({
            "titulo": i.select_one(".bulletinTitle") and i.select_one(".bulletinTitle").get_text(strip=True),
            "orgao": i.select_one(".bulletinAgency") and i.select_one(".bulletinAgency").get_text(strip=True),
            "objeto": i.select_one(".bulletinObject") and i.select_one(".bulletinObject").get_text(strip=True),
        })

    return itens

async def executar():
    session = await obter_sessao()
    resultados = []

    r = session.get(BOLETINS_URL)
    boletins = r.json()

    for dia in boletins:
        for b in dia["boletins"]:
            html = session.get(
                BOLETIM_CONTEUDO_URL,
                params={"bulletin_filter_id": b["id"]}
            ).text

            licitacoes = extrair_licitacoes(html)

            for lic in licitacoes:
                texto = f"{lic['titulo']} {lic['orgao']} {lic['objeto']}"
                if bate_regex(texto):
                    resultados.append({
                        "data": dia["date"],
                        "boletim": b["name"],
                        **lic
                    })

    with open("licitacoes_filtradas.csv", "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(
            f,
            fieldnames=["data", "boletim", "titulo", "orgao", "objeto"]
        )
        writer.writeheader()
        writer.writerows(resultados)

    print(f"üéâ Finalizado! {len(resultados)} registros salvos.")

# RODAR NO JUPYTER
await executar()
