In [None]:
import pdfplumber

pdfpath = "SysReg_xml_A_profile-2023-09.pdf"

In [None]:
pdf = pdfplumber.open(pdfpath)

In [None]:
def append_dict(target:dict, key: any, item: str):
	if not key in target.keys():
		target[key] = str()

	if len(item):
		target[key] += item + "\n"

def parse_purpose(register: dict, line: str):
	append_dict(register, "purpose", line)

def parse_configuration(register: dict, line: str):
	append_dict(register, "configuration", line)

def parse_attributes(register: dict, line: str):
	append_dict(register, "attributes", line)

def parse_fields(register: dict, line: str, fields: str = None) -> str:
	if "fields" not in register.keys():
		register["fields"] = dict()

	if line[-1] == "]" and "bit" in line.lower() and "[" in line and "." not in line:
		register["fields"][line] = str()
		return line
	elif fields != None:
		append_dict(register["fields"], fields, line)
		return fields

	return None

def parse_accessing(register: dict, line: str):
	append_dict(register, "accessing", line)

registers = list()
register = dict()
def_parser = lambda register, line : append_dict(register, "unresolved", line)
parser = def_parser
field_key = None

# [27, 3545) for system registers
# [4942, 6336) for external registers
for p in range(4942, 6336):
	lines = pdf.pages[p].extract_text().split('\n')

	if len(register.keys()) == 0 or lines[0] != register["fullname"]:
		if len(register.keys()):
			registers.append(register)
			register = dict()
			parser = def_parser

		register["name"] = lines[0].split(",")[0]
		register["fullname"] = lines[0]

		print("register %s" % (register["fullname"]))

	for line in lines[1 : -2]:
		if not "name" in register.keys():
			continue

		if line == "Purpose":
			parser = parse_purpose
		elif line == "Configuration":
			parser = parse_configuration
		elif line == "Attributes":
			parser = parse_attributes
		elif line == "Field descriptions":
			parser = parse_fields
		elif line == "Accessing %s" % (register["name"]):
			parser = parse_accessing
		else:
			if parser == parse_fields:
				field_key = parser(register, line, field_key)
			else:
				field_key = None
				parser(register, line)


In [None]:
for r in registers:
	for key in r.keys():
		print(key + ":")

		if key == "fields":
			for f in r[key].keys():
				print("        %s:" % f)
				print("            %s" % (r[key][f]))
		else:
			print(r[key])

In [None]:
import sqlite3

db = sqlite3.connect("arm64_registers.db")
cursor = db.cursor()

for r in registers:
	for k in r.keys():
		if k == "name" or k == "fullname" or key == "unresolved":
			continue

		entry = [ r["name"], r["fullname"]]

		if k == "fields":
			for f in r[k].keys():
				bits = f.split('[')[1].split(']')[0].split(':')
				if len(bits[-1].split(',')) > 1 or len(bits[0].split(',')) > 1:
					bits = ["-1", "-1"]
				print(f, bits)
				cursor.execute("INSERT INTO arm64_external_register \
		  			(register, register_fullname, attribute, start_bit, end_bit, description) \
		  			VALUES(?, ?, ?, ?, ?, ?)", \
					entry + [f, int(bits[0]), int(bits[-1]), r[k][f]])
		else:
			entry.append(k)
			entry.append(-1)
			entry.append(-1)
			entry.append(r[k])

			cursor.execute("INSERT INTO arm64_external_register \
		  			(register, register_fullname, attribute, start_bit, end_bit, description) \
		  			VALUES(?, ?, ?, ?, ?, ?)", entry)

db.commit()
db.close()
