In [1]:
import glob
import argparse
import lxml.etree as ET

In [None]:
print("Add progressive numeric attribute to a XML tag.\n\n## Instructions ##\nDrag&drop the XML file on the 'Files' tab of Colab, then insert the options\n")
config = {}

config["src"] = glob.glob('*.xml')[0]
print(f"Input file: {config['src']}")
config["tag"] = input("XML tag to which add id: ")
config["dest"] = input("Name of output file (if not inserted, <original file name>_new.xml): ")
# optional values
config["initial"] = input("Initial value of the attribute (1 if not inserted): ")
config["attribute"] = input("Name of the counter attribute (n if not inserted): ")
config["restart"] = input("Restart numbering after this tag (None if not inserted): ")
config["recto_or_verso"] = input("Only for pb and lb tags, add recto/verso to number (no recto/verso if not inserted, r for start with recto, v for start with verso): ")
config["root"] = input("Root tag (TEI if not inserted): ")
# convert values
config["initial"] = 1 if config["initial"] == "" else int(config["initial"])
config["rectoverso"] = (config["recto_or_verso"] == 'r')
config["versorecto"] = (config["recto_or_verso"] == 'v')
config["dest"] = config["src"][:-4]+"_new.xml" if config["dest"] == "" else config["dest"]
config["attribute"] = "n" if config["attribute"] == "" else config["attribute"]
config["attribute"] = " " + config["attribute"] + " "
config["root"] = None if config["root"] == "" else config["root"]

# print input data
print(f"\n\nRecap of your choices\nInput filename: {config['src']}\nTag: {config['tag']}\nOutput filename: {config['dest']}\nInitial value: {config['initial']}\nId attribute: {config['attribute']}\nRecto or verso: {config['recto_or_verso']}\nRestart on this tag: {config['restart']}\nRoot tag: {config['root']}\n")


Add progressive numeric attribute to a XML tag.

## Instructions ##
Drag&drop the XML file on the 'Files' tab of Colab, then insert the options

Input file: VB-DOTR.xml
XML tag to which add id: lb
Name of output file (if not inserted, <original file name>_new.xml): 
Initial value of the attribute (1 if not inserted): 
Name of the counter attribute (n if not inserted): 
Restart numbering after this tag (None if not inserted): pb
Only for pb and lb tags, add recto/verso to number (no recto/verso if not inserted, r for start with recto, v for start with verso): 
Root tag (TEI if not inserted): 


Recap of your choices
Input filename: VB-DOTR.xml
Tag: lb
Output filename: VB-DOTR_new.xml
Initial value: 1
Id attribute:  n 
Recto or verso: 
Restart on this tag: pb
Root tag: None



In [None]:
# read input file
ns = {
		'tei': 'http://www.tei-c.org/ns/1.0',
		'xsi': 'http://www.w3.org/2001/XMLSchema-instance'
	}
for prefix, uri in ns.items():
	ET.register_namespace(prefix, uri)
tree = ET.parse(config["src"])
root = tree.getroot()

# insert attribute
prefix = "{http://www.tei-c.org/ns/1.0}"
c_tag = config["tag"]
TAG = prefix+c_tag
count = config["initial"]
counter_tag = config["attribute"][1:-1]
rectoverso = config["rectoverso"] | config["versorecto"]
r_tag = config["root"]
l_tag = config["restart"]

# check root tag
if r_tag is not None:
	try:
		root = next(root.iter(prefix+r_tag[1:-1]))
	except StopIteration:
		print("Error: root tag '" + r_tag[1:-1] + "' not found")
		exit(1)


# check errors
if rectoverso & (c_tag!="pb"):
	print("Error: for recto/verso option the value of 'tag' must be pb")
	exit(1)

if (c_tag == "pb") & rectoverso: # recto/verso to pages
	# set values
	rv = "r" if config["rectoverso"] else "v"
	l_count = str(count)+rv
	for item in root.iter(TAG):
		item.set(counter_tag, l_count)
		if (rv == "v"):
			count += 1
			rv = "r"
		else:
			rv = "v"
		l_count = str(count)+rv
elif (c_tag == "lb"): # count restart at each pb
	# add temp attibute
	temp_att = "xyzMyTag"
	for tag in ["lb", "pb"]:
		for item in root.iter(prefix+tag):
			item.set(temp_att, "n")
	# add n attibute and delete temp attibute
	count = 1
	for item in root.findall('.//*[@'+temp_att+']'):
		if(item.tag == prefix+"pb"):
			count = 1
		elif(item.tag == prefix+"lb"):
			item.set(counter_tag, str(count))
			count += 1
		# delete temp attibute
		del item.attrib[temp_att]
elif (l_tag is not None): # count restart at each l_tag
	# add temp attibute
	temp_att = "xyzMyTag"
	for tag in [c_tag, l_tag]:
		for item in root.iter(prefix+tag):
			item.set(temp_att, "n")
	# add n attibute and delete temp attibute
	count = 1
	for item in root.findall('.//*[@'+temp_att+']'):
		if(item.tag == prefix+l_tag):
			count = 1
		elif(item.tag == prefix+c_tag):
			item.set(counter_tag, str(count))
			count += 1
		# delete temp attibute
		del item.attrib[temp_att]
else: # common case
	for item in root.iter(TAG):
		item.set(counter_tag, str(count))
		count += 1

# write output file
with open(config["dest"], 'wb') as f:
    tree.write(f, pretty_print = True, xml_declaration=True, encoding="utf-8")

print(f"All done, file {config['dest']} created or updated")

All done, file VB-DOTR_new.xml created or updated
