diff --git a/src/macrogen/diff.py b/src/macrogen/diff.py
index e8ff385..4e39201 100644
--- a/src/macrogen/diff.py
+++ b/src/macrogen/diff.py
@@ -2,6 +2,7 @@
from collections import Counter, defaultdict
from difflib import SequenceMatcher
from itertools import zip_longest, combinations
+from operator import sub
from typing import Union, Tuple, List, Mapping, Any
from macrogen import Reference, config
@@ -9,7 +10,7 @@
from .graph import MacrogenesisInfo
from pathlib import Path
-from .report import HtmlTable, _fmt_node, SingleItem, write_html
+from .report import HtmlTable, _fmt_node, SingleItem, write_html, AssertionTable, nodeformatter
import pandas as pd
logger = config.getLogger(__name__)
@@ -87,8 +88,8 @@ def diff_order_table(self) -> HtmlTable:
.column('nicht vor', attrs={'class': 'right'})
.column('nicht nach', attrs={'class': 'right'})
.column('Rang', attrs={'class': 'right'})
- .column(self.a.title, _fmt_node, attrs={'class': 'right border-right'})
- .column(self.b.title, _fmt_node)
+ .column(self.a.title, nodeformatter(self.a.title + '/'), attrs={'class': 'right border-right'})
+ .column(self.b.title, nodeformatter(self.b.title + '/'))
.column('nicht vor')
.column('nicht nach')
.column('Rang')
@@ -109,6 +110,29 @@ def diff_order_table(self) -> HtmlTable:
class_='equal pure-center ignore')
return table
+ def conflict_diffs(self):
+ def unsplit(node):
+ if isinstance(node, SplitReference):
+ return node.reference
+ else:
+ return node
+ c_a = {(unsplit(u), unsplit(v)) for u, v, k, attr in self.a.info.conflicts}
+ c_b = {(unsplit(u), unsplit(v)) for u, v, k, attr in self.b.info.conflicts}
+ only_a = c_a - c_b
+ only_b = c_b - c_a
+ return only_a, only_b
+
+ def conflict_diffs_html(self):
+ result = ""
+ for side, conflicts in zip([self.a, self.b], self.conflict_diffs()):
+ result += f'
{len(conflicts)} Konflikte nur in {side.title}
'
+ table = AssertionTable(prefix=side.title + '/')
+ for u, v in conflicts:
+ for w, x, k, attr in side.info.find_conflicts(u, v):
+ table.edge(w, x, attr)
+ result += table.format_table()
+ return result
+
def get_argparser() -> ArgumentParser:
parser = ArgumentParser(description="Compare the order of macrogenesis results")
@@ -132,7 +156,8 @@ def main():
.column("–")
.column("↔")
.column("=")
- .column("Rangänderungen"))
+ .column("Rangänderungen")
+ .column("Konflikte"))
for a, b in config.progress(pairs, unit=" Vergleiche"):
try:
logger.info('Comparing %s to %s ...', a, b)
@@ -140,18 +165,17 @@ def main():
table = diff.diff_order_table()
output: Path = options.output_dir / (diff.filename + ".php")
logger.info("Saving %s ...", output.absolute())
- write_html(output, table.format_table(),
+ write_html(output, table.format_table() + diff.conflict_diffs_html(),
diff.title)
opcounts = defaultdict(int)
for op, i1, i2, j1, j2 in diff.matcher.get_opcodes():
opcounts[op] += max(i2-i1, j2-j1)
rank_changed = sum((diff.a.info.details['rank'] - diff.b.info.details['rank']).dropna() != 0)
summary.row((diff, diff.matcher.ratio(), opcounts['insert'], opcounts['remove'], opcounts['replace'],
- opcounts['equal'], rank_changed))
+ opcounts['equal'], rank_changed, len(diff.b.info.conflicts) - len(diff.a.info.conflicts)))
except FileNotFoundError as e:
logger.error(e)
write_html(options.output_dir / "order-diff.php", summary.format_table(), head="Vergleiche")
-
if __name__ == '__main__':
main()
diff --git a/src/macrogen/graph.py b/src/macrogen/graph.py
index e266423..581c8a3 100644
--- a/src/macrogen/graph.py
+++ b/src/macrogen/graph.py
@@ -382,6 +382,7 @@ def _infer_details(self):
logger.info('Preparing details on references')
ordered_ref_nodes = self.order_refs()
is_split = any(isinstance(node, SplitReference) for node in ordered_ref_nodes)
+ self.is_split = is_split
if is_split:
refs_from_graphs = [ref for ref in ordered_ref_nodes if ref.side == Side.END] # FIXME Configurable?
refs_from_data = [ref.reference for ref in refs_from_graphs]
@@ -438,6 +439,24 @@ def unsplit_rank(node) -> int:
table['baseline_position'] = self.baseline_order()
self.details = table
+ def find_conflicts(self, from_: Node, to_: Node) -> List[Tuple[Node, Node, int, dict]]:
+ """
+ Finds conflict edges from a from_ node to a to_ node
+
+ Args:
+ from_: source node, never a `SplitReference`
+ to_: end node, never a `SplitReference`
+
+ Returns:
+ All info on the actual conflicting edges
+ """
+ if self.is_split:
+ return [(u, v, k, attr) for (u, v, k, attr) in self.conflicts
+ if (u == from_ or isinstance(u, SplitReference) and u.reference == from_)
+ and (v == to_ or isinstance(v, SplitReference) and v.reference == to_)]
+ else:
+ return [(u, v, k, attr) for (u, v, k, attr) in self.conflicts if u == from_ and v == to_]
+
def year_stats(self):
stats: pd.Series = self.details.avg_year.value_counts()
stats.index = pd.Int64Index(stats.index)
diff --git a/src/macrogen/report.py b/src/macrogen/report.py
index fd315c3..fe16fda 100644
--- a/src/macrogen/report.py
+++ b/src/macrogen/report.py
@@ -362,14 +362,19 @@ def write_bibliography_stats(graph: nx.MultiDiGraph):
writer.writerow([bibl, BiblSource(bibl).weight, total] + [bibls[bibl][kind] for kind in kinds])
-def _fmt_node(node: Union[Reference, object]):
+def _fmt_node(node: Union[Reference, date], prefix: str = None) -> str:
"""Formats a node by creating a link of possible"""
+ prefix = prefix or ''
if isinstance(node, Reference):
- return f'{node}'
+ return f'{node}'
else:
return format(node)
+def nodeformatter(prefix: str) -> Callable[[Union[Reference, date]], str]:
+ return partial(_fmt_node, prefix=prefix)
+
+
def _edition_link(ref: Reference):
"""Creates a link or links into the edition for the node."""
if isinstance(ref, Witness):
@@ -548,12 +553,17 @@ def _last_ref_subpage(self, ref):
class AssertionTable(HtmlTable):
- def __init__(self, **table_attrs):
+ def __init__(self, prefix=None, **table_attrs):
super().__init__(data_sortable='true', **table_attrs)
+ self.prefix = prefix or ''
+ if prefix:
+ nodeformat = nodeformatter(self.prefix)
+ else:
+ nodeformat = _fmt_node
(self.column('berücksichtigt?', data_sortable_type="alpha")
- .column('Subjekt', _fmt_node, data_sortable_type="sigil")
+ .column('Subjekt', nodeformat, data_sortable_type="sigil")
.column('Relation', RELATION_LABELS.get, data_sortable_type="alpha")
- .column('Objekt', _fmt_node, data_sortable_type="sigil")
+ .column('Objekt', nodeformat, data_sortable_type="sigil")
.column('Quelle', _fmt_source, data_sortable_type="bibliography")
.column('Kommentare', _fmt_comments, data_sortable_type="alpha")
.column('XML', _fmt_xml, data_sortable_type="alpha"))
@@ -563,7 +573,7 @@ def edge(self, u: Reference, v: Reference, attr: Dict[str, object]):
if attr.get('ignore', False): classes.append('ignore')
if attr.get('delete', False): classes.append('delete')
self.row((
- f'nein' if attr.get('delete', False) else
+ f'nein' if attr.get('delete', False) else
'ignoriert' if attr.get('ignore', False) else 'ja',
u,
attr['kind'],