<a href="https://colab.research.google.com/github/dvwinck/bilu/blob/main/RDV_moleza.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install requests beautifulsoup4

Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:4 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:6 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Get:9 https://r2u.stat.illinois.edu/ubuntu jammy/main all Packages [8,563 kB]
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:11 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:12 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [2,517 kB]
Get:13 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [2

In [None]:
import csv
import requests
from bs4 import BeautifulSoup
from google.colab import files  # Importação explícita
import os
import zipfile
import time
from datetime import datetime
import shutil
import re

# Constantes
_SLEEP_TIME = 1
NF_DIR = "NF"

# Função para limpar pastas
def limpar_pastas():
    if os.path.exists(NF_DIR):
        shutil.rmtree(NF_DIR)
    os.makedirs(NF_DIR, exist_ok=True)

def remover_caracteres_especiais(texto):
    # Substitui caracteres que não sejam letras, números ou espaço por uma string vazia
    return re.sub(r"[^a-zA-Z0-9\s:/]", "", texto)

# Função para obter os dados do cupom
def obter_dados_cupom(qrcode_url, sequencial):
    try:
        # Configurar headers para simular navegador
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.1 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
            "Accept-Language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
            "Upgrade-Insecure-Requests": "1",
            "Sec-Fetch-Dest": "document",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "none",
            "Sec-Fetch-User": "?1",
            "Cache-Control": "max-age=0",
        }

        # Fazer a requisição
        response = requests.get(qrcode_url, headers=headers)
        response.raise_for_status()

        # Salvar o HTML da nota fiscal
        arquivo_nf = f"{NF_DIR}/NF{sequencial}.html"
        with open(arquivo_nf, "w", encoding="utf-8") as f:
            f.write(response.text)

        # Parsear o HTML
        soup = BeautifulSoup(response.text, "html.parser")

        # Extrair valor total do cupom
        valor_element = soup.find("span", class_="totalNumb txtMax")
        valor_total = valor_element.text.strip() if valor_element else "Não encontrado"

        # Extrair data e hora de emissão
        emissao_element = soup.find("strong", string=" Emissão: ")
        if emissao_element:
            emissao_data = emissao_element.next_sibling
            emissao_data_limpo = remover_caracteres_especiais(emissao_data)
            partes = emissao_data_limpo.split(" ")
            data = partes[0].strip()
            hora = partes[1].strip()
            print(f" DATA:{data} HORA:{hora}")
        else:
            data = "N/A"
            hora = "N/A"

        return {
            "sequencial": sequencial,
            "data": data,
            "hora": hora,
            "valor_total": valor_total,
            "link": qrcode_url,
        }
    except Exception as e:
        return {
            "sequencial": sequencial,
            "data": "N/A",
            "hora": "N/A",
            "valor_total": "N/A",
            "link": qrcode_url,
            "erro": str(e),
        }

# Função para processar a lista de links
def processar_lista_links(lista_links, salvar_arquivo="resultados.html", salvar_csv="resultados.csv"):
    # Limpar pastas no início do processamento
    limpar_pastas()

    resultados = []

    for idx, link in enumerate(lista_links, start=1):
        print(f"Processando: {link}")
        resultado = obter_dados_cupom(link, idx)
        resultados.append(resultado)
        time.sleep(_SLEEP_TIME)

    salvar_resultados_em_arquivo(resultados, salvar_arquivo)
    salvar_resultados_em_csv(resultados, salvar_csv)
    compactar_relatorio(salvar_arquivo, salvar_csv)

# Função para salvar resultados em HTML
def salvar_resultados_em_arquivo(resultados, nome_arquivo):
    total_valor = sum(
        float(r["valor_total"].replace(",", "."))
        for r in resultados
        if r.get("valor_total") and r["valor_total"].replace(",", ".").replace(".", "", 1).isdigit()
    )



    with open(nome_arquivo, "w", encoding="utf-8") as f:
        f.write("""
        <!DOCTYPE html>
        <html lang="en">
        <head>
            <meta charset="UTF-8">
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <title>Extrato dos Cupons</title>
            <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha3/dist/css/bootstrap.min.css" rel="stylesheet">
        </head>
        <body class="bg-light">
            <div class="container mt-5">
                <h1 class="text-center text-primary mb-4">Extrato dos Cupons</h1>
                <table class="table table-striped table-hover table-bordered">
                    <thead class="table-dark">
        """)
        f.write("<tr><th>Sequência</th><th>Data</th><th>Hora</th><th>Valor Total</th><th>Erro</th><th>Link</th></tr></thead><tbody>")
        for r in resultados:
            f.write(f"<tr><td>{r['sequencial']}</td><td>{r['data']}</td><td>{r['hora']}</td><td>{r['valor_total']}</td><td>{r.get('erro', 'N/A')}</td><td><a href='{r['link']}'>Abrir</a></td></tr>")
        f.write("</tbody></table>")

        f.write(f"""
        <div style="margin-top: 20px; text-align: right;">
            <h4><strong>Total Geral: R$ {total_valor:.2f}</strong></h4>
        </div>
        """)
        f.write(f"</body></html>")

# Função para salvar resultados em CSV
def salvar_resultados_em_csv(resultados, nome_csv):
    with open(nome_csv, mode="w", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Sequência", "Data", "Hora", "Valor Total", "Erro", "Link"])
        for r in resultados:
            writer.writerow([r["sequencial"], r["data"], r["hora"], r["valor_total"], r.get("erro", "N/A"), r["link"]])

# Função para compactar arquivos
def compactar_relatorio(relatorio_arquivo, relatorio_csv):
    zip_filename = "relatorio_e_notas.zip"
    with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zf:
        zf.write(relatorio_arquivo)
        zf.write(relatorio_csv)
        for root, dirs, files_ in os.walk(NF_DIR):
            for file in files_:
                zf.write(os.path.join(root, file))
    # Corrigindo o erro de download
    files.download(zip_filename)

# Função para carregar links de arquivo
def carregar_links_de_arquivo():
    print("Carregue um arquivo contendo os links:")
    uploaded = files.upload()
    for filename in uploaded.keys():
        with open(filename, "r", encoding="utf-8") as f:
            return f.read().splitlines()

# Executar o programa
lista_links = carregar_links_de_arquivo()
data_hora_atual = datetime.now().strftime("%Y%m%d_%H%M")
processar_lista_links(lista_links, f"relatorio_cupons_{data_hora_atual}.html", f"relatorio_cupons_{data_hora_atual}.csv")


Carregue um arquivo contendo os links:


Saving teest.txt to teest (11).txt
Processando: https://sat.sef.sc.gov.br/nfce/consulta?p=42241279901047000186650000002241761818517023|2|1|1|9354841EB8D460509571431C1BC0A2C51081FF38
ALL: ['22/12/2024', '14:06:32\r\n\t\t\t\t\t\t\t\t', 'Via', 'Consumidor', '2\r\n\t\t\t\t\t\t\t\t'] DATA:22/12/2024 HORA:14:06:32
Processando: https://sat.sef.sc.gov.br/nfce/consulta?p=42241033152447000135650010001371831001479833|2|1|1|C067A5BB9EB4C185CA6A8358077B20C53D8564D5
ALL: ['14/10/2024', '15:46:11\r\n\t\t\t\t\t\t\t\t', 'Via', 'Consumidor', '2\r\n\t\t\t\t\t\t\t\t'] DATA:14/10/2024 HORA:15:46:11
Processando: https://sat.sef.sc.gov.br/nfce/consulta?p=42241033152447000135650010001370631001478618|2|1|1|ED348AAEB053318AC97E617F7C45C176948A423F
ALL: ['14/10/2024', '08:24:47\r\n\t\t\t\t\t\t\t\t', 'Via', 'Consumidor', '2\r\n\t\t\t\t\t\t\t\t'] DATA:14/10/2024 HORA:08:24:47
Processando: https://sat.sef.sc.gov.br/nfce/consulta?p=42241033152447000135650010001372351001480350|2|1|1|7FD41BF1B051D134A4E2400CDC683FA60D

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>