In [4]:
import lxml.etree as ET
import os

# Carregar o XML e o XSLT
xml_file = "exemplo.xml" # exemplo_full exemplo
xslt_file = "style_csv.xsl" # style_csv style

if not os.path.exists(xslt_file):
    raise FileNotFoundError(f"Arquivo XSLT não encontrado: {xslt_file}")


In [5]:
# Ler os arquivos
xml = ET.parse(xml_file)
xslt = ET.parse(xslt_file)

# Transformar
transform = ET.XSLT(xslt)
result = transform(xml)



In [6]:
# Salvar em um arquivo HTML
with open("saida3.html", "wb") as f:
    f.write(ET.tostring(result, pretty_print=True))
print("Transformação concluída. Verifique o arquivo 'saida.html'.")

Transformação concluída. Verifique o arquivo 'saida.html'.


In [None]:
# python -m http.server
# http://localhost:8000/exemplo.xml
# https://www.freeformatter.com/xml-formatter.html
# https://www.gov.br/inpi/pt-br/servicos/patentes/INPIBRAuthorityFileDefinitionFile_pt_v3.pdf
# https://www.gov.br/inpi/pt-br/servicos/patentes/informacao-tecnologica

In [7]:
import xml.etree.ElementTree as ET
import pandas as pd

# Caminho do arquivo XML
xml_file = "exemplo.xml"

# Parse do arquivo XML
tree = ET.parse(xml_file)
root = tree.getroot()

# Lista para armazenar os dados
data = []

# Iterar sobre as tags <authority-file-entry>
for entry in root.findall(".//authority-file-entry"):
    # Extrair informações de <publication-reference>
    publication = entry.find(".//publication-reference/document-id")
    country = publication.find("country").text if publication.find("country") is not None else None
    doc_number = publication.find("doc-number").text if publication.find("doc-number") is not None else None
    kind = publication.find("kind").text if publication.find("kind") is not None else None
    date = publication.find("date").text if publication.find("date") is not None else None

    # Extrair código de descrição não pesquisável
    description_code = entry.find(".//searchable-description-code/not-searchable-code")
    description_code_value = description_code.attrib.get("code") if description_code is not None else None

    # Extrair código de reivindicações não pesquisável
    claims_code = entry.find(".//searchable-claims-code/not-searchable-code")
    claims_code_value = claims_code.attrib.get("code") if claims_code is not None else None

    # Extrair idioma do resumo
    abstract_code = entry.find(".//searchable-abstract-code/searchable-language-code")
    abstract_language = abstract_code.text if abstract_code is not None else None

    # Adicionar os dados a uma lista
    data.append({
        "Country": country,
        "Document Number": doc_number,
        "Kind": kind,
        "Date": date,
        "Description Code": description_code_value,
        "Claims Code": claims_code_value,
        "Abstract Language": abstract_language
    })

# Criar um DataFrame
df = pd.DataFrame(data)

# Exibir o DataFrame
df


Unnamed: 0,Country,Document Number,Kind,Date,Description Code,Claims Code,Abstract Language
0,BR,PI0408054,A2,20060214,N,N,pt
1,BR,PI0303738,A2,20050412,N,N,pt
2,BR,PI0705988,A2,20090728,N,N,pt
3,BR,PI0502817,A2,20070227,N,N,pt
4,BR,PI9701861,A2,19981208,N,N,pt
...,...,...,...,...,...,...,...
56,BR,PI0806952,A8,20170919,N,N,pt
57,BR,PI0210184,A8,20060822,N,N,pt
58,BR,MU7503070,Y1,20030429,N,N,pt
59,BR,PI0904377,A8,20180327,N,N,pt
