# `element.removeAttribute(attname)`

## Example with simplified data

In [9]:
from xml.dom.pulldom import CHARACTERS, START_ELEMENT, parseString, END_ELEMENT
from xml.dom.minidom import Document


class Stack(list):
    def push(self, item):
        self.append(item)

    def peek(self):
        return self[-1]


open_elements = Stack()
d = Document()
open_elements.push(d)

input = '''<root>
    <p start="p1"/>This is a <word start="w1"/>paragraph<word end="w1"/> that contains
    some stuff.<p end="p1"/>
</root>'''

for event, node in parseString(input):
    if event == START_ELEMENT:
        if not node.hasAttribute('end'): # process pseudo-end-tags on END_ELEMENT event
            open_elements.peek().appendChild(node)
            open_elements.push(node)
    elif event == END_ELEMENT:
        if node.hasAttribute('start'): 
            node.removeAttribute('start') # can't remove @start until we're done with the node
        else:
            open_elements.pop()
    elif event == CHARACTERS:
        t = d.createTextNode(node.data)
        open_elements.peek().appendChild(t)
    else:
        continue

print(open_elements[0].toxml())

<?xml version="1.0" ?><root>
    <p>This is a <word>paragraph</word> that contains
    some stuff.</p>
</root>


So far, so good …

## Now with real data!

First take a look at the input file:

In [3]:
with open('flattened.xml') as input:
    print(input.read())

<?xml version="1.0" encoding="UTF-8"?>
<root xmlns:th="http://www.blackmesatech.com/2017/nss/trojan-horse">
    <p th:sID="d1e3"/>This is a <word th:sID="d1e5"/>paragraph<word th:eID="d1e5"/> that contains
    some <nonTrojan type="test"/> stuff.<p th:eID="d1e3"/>
    <p th:sID="d1e9"/>This is <emphasis role="bold">another</emphasis> paragraph <phrase
        th:sID="d1e11"/><word th:sID="d1e12"/>that<word th:eID="d1e12"/>
    <word th:sID="d1e15"/>contains<word th:eID="d1e15"/>
    <word th:sID="d1e18"/>more<word th:eID="d1e18"/><phrase th:eID="d1e11"/> stuff.<p th:eID="d1e9"
    />
</root>



Transform it:

In [13]:
from xml.dom.pulldom import CHARACTERS, START_ELEMENT, parseString, END_ELEMENT
from xml.dom.minidom import Document


class Stack(list):
    def push(self, item):
        self.append(item)

    def peek(self):
        return self[-1]


open_elements = Stack()
d = Document()
open_elements.push(d)

with open('flattened.xml') as input:
    for event, node in parseString(input.read()):
        if event == START_ELEMENT:
            if not node.hasAttribute('th:eID'): # process pseudo-end-tags on END_ELEMENT event
                open_elements.peek().appendChild(node)
                open_elements.push(node)
        elif event == END_ELEMENT:
            if node.hasAttribute('xmlns:th'): # don't declare now-unused th: namespace
                node.removeAttribute('xmlns:th')
            if node.hasAttribute('th:sID'): # can't remove @start until we're done with the node
                node.removeAttribute('th:sID') 
            else: # pop only on container elements and Trojan end-tags
                open_elements.pop()
        elif event == CHARACTERS:
            t = d.createTextNode(node.data)
            open_elements.peek().appendChild(t)
        else:
            continue

print(open_elements[0].toxml())

<?xml version="1.0" ?><root>
    <p>This is a <word>paragraph</word> that contains
    some <nonTrojan type="test"/> stuff.</p>
    <p>This is <emphasis role="bold">another</emphasis> paragraph <phrase><word>that</word>
    <word>contains</word>
    <word>more</word></phrase> stuff.</p>
</root>
