From 7b1c5c78aca9d250ff9ebaacbec7337b2d7f9b9d Mon Sep 17 00:00:00 2001 From: David J Birnbaum Date: Sun, 19 Aug 2018 16:06:43 +0200 Subject: [PATCH 1/2] minor cleanup of core_functions.py --- collatex-pythonport/collatex/core_functions.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/collatex-pythonport/collatex/core_functions.py b/collatex-pythonport/collatex/core_functions.py index 635621641..5c62a0527 100644 --- a/collatex-pythonport/collatex/core_functions.py +++ b/collatex-pythonport/collatex/core_functions.py @@ -3,7 +3,6 @@ @author: Ronald Haentjens Dekker """ -import re from xml.etree import ElementTree as etree from xml.dom.minidom import Document from collections import defaultdict @@ -42,7 +41,8 @@ def collate(collation, output="table", layout="horizontal", segmentation=True, n # assume collation is collation (by now); no error trapping if not astar: - algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions, debug_scores=debug_scores, properties_filter=properties_filter) + algorithm = EditGraphAligner(collation, near_match=False, detect_transpositions=detect_transpositions, + debug_scores=debug_scores, properties_filter=properties_filter) else: algorithm = ExperimentalAstarAligner(collation, near_match=False, debug_scores=debug_scores) @@ -116,10 +116,11 @@ def export_alignment_table_as_xml(table): readings.append(result) return "" + "".join(readings) + "" + def export_alignment_table_as_tei(table, indent=None): d = Document() root = d.createElementNS("http://interedition.eu/collatex/ns/1.0", "cx:apparatus") # fake namespace declarations - root.setAttribute("xmlns:cx","http://interedition.eu/collatex/ns/1.0") + root.setAttribute("xmlns:cx", "http://interedition.eu/collatex/ns/1.0") root.setAttribute("xmlns", "http://www.tei-c.org/ns/1.0") d.appendChild(root) for column in table.columns: @@ -145,7 +146,7 @@ def export_alignment_table_as_tei(table, indent=None): ws_flag = False # add space after if any ends in whitespace app = d.createElementNS("http://www.tei-c.org/ns/1.0", "app") root.appendChild(app) - for key,value in value_dict.items(): + for key, value in value_dict.items(): # key is reading, value is list of witnesses rdg = d.createElementNS("http://www.tei-c.org/ns/1.0", "rdg") rdg.setAttribute("wit", " ".join(["#" + item for item in value_dict[key]])) From 075df1b1f03eb883296d5e2a2e0d21e1635ae8ec Mon Sep 17 00:00:00 2001 From: David J Birnbaum Date: Sun, 19 Aug 2018 16:25:51 +0200 Subject: [PATCH 2/2] group TEI elements independently of trailing whitespace --- .../collatex/core_functions.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/collatex-pythonport/collatex/core_functions.py b/collatex-pythonport/collatex/core_functions.py index 5c62a0527..2371b8e6f 100644 --- a/collatex-pythonport/collatex/core_functions.py +++ b/collatex-pythonport/collatex/core_functions.py @@ -125,9 +125,13 @@ def export_alignment_table_as_tei(table, indent=None): d.appendChild(root) for column in table.columns: value_dict = defaultdict(list) + ws_flag = False for key, value in sorted(column.tokens_per_witness.items()): - # key is reading, value is list of witnesses - value_dict["".join(str(item.token_data["t"]) for item in value)].append(key) + # value_dict key is reading, value is list of witnesses + t_readings = "".join(item.token_data["t"] for item in value) + if ws_flag == False and t_readings.endswith((" ", r"\u0009", r"\000a")): # space, tab, lf + ws_flag = True + value_dict[t_readings.strip()].append(key) # REVIEW [RHD]: Isn't there a method on table that can be used instead of this len(next(iter() etc? # otherwise I think there should be. Not sure what len(next(iter(etc))) represents. @@ -143,21 +147,18 @@ def export_alignment_table_as_tei(table, indent=None): root.appendChild(text_node) else: # variation is either more than one reading, or one reading plus nulls - ws_flag = False # add space after if any ends in whitespace app = d.createElementNS("http://www.tei-c.org/ns/1.0", "app") root.appendChild(app) for key, value in value_dict.items(): - # key is reading, value is list of witnesses + # key is reading (with trailing whitespace stripped), value is list of witnesses rdg = d.createElementNS("http://www.tei-c.org/ns/1.0", "rdg") rdg.setAttribute("wit", " ".join(["#" + item for item in value_dict[key]])) - if ws_flag == False and key.endswith((" ", r"\u0009", r"\u000a")): # space, tab, linefeed - ws_flag = True - text_node = d.createTextNode(key.strip()) + text_node = d.createTextNode(key) rdg.appendChild(text_node) app.appendChild(rdg) - if ws_flag: - text_node = d.createTextNode(" ") - root.appendChild(text_node) + if ws_flag: + text_node = d.createTextNode(" ") + root.appendChild(text_node) if indent: result = d.toprettyxml() else: