diff --git a/src/macrogen/diff.py b/src/macrogen/diff.py index e8ff385..4e39201 100644 --- a/src/macrogen/diff.py +++ b/src/macrogen/diff.py @@ -2,6 +2,7 @@ from collections import Counter, defaultdict from difflib import SequenceMatcher from itertools import zip_longest, combinations +from operator import sub from typing import Union, Tuple, List, Mapping, Any from macrogen import Reference, config @@ -9,7 +10,7 @@ from .graph import MacrogenesisInfo from pathlib import Path -from .report import HtmlTable, _fmt_node, SingleItem, write_html +from .report import HtmlTable, _fmt_node, SingleItem, write_html, AssertionTable, nodeformatter import pandas as pd logger = config.getLogger(__name__) @@ -87,8 +88,8 @@ def diff_order_table(self) -> HtmlTable: .column('nicht vor', attrs={'class': 'right'}) .column('nicht nach', attrs={'class': 'right'}) .column('Rang', attrs={'class': 'right'}) - .column(self.a.title, _fmt_node, attrs={'class': 'right border-right'}) - .column(self.b.title, _fmt_node) + .column(self.a.title, nodeformatter(self.a.title + '/'), attrs={'class': 'right border-right'}) + .column(self.b.title, nodeformatter(self.b.title + '/')) .column('nicht vor') .column('nicht nach') .column('Rang') @@ -109,6 +110,29 @@ def diff_order_table(self) -> HtmlTable: class_='equal pure-center ignore') return table + def conflict_diffs(self): + def unsplit(node): + if isinstance(node, SplitReference): + return node.reference + else: + return node + c_a = {(unsplit(u), unsplit(v)) for u, v, k, attr in self.a.info.conflicts} + c_b = {(unsplit(u), unsplit(v)) for u, v, k, attr in self.b.info.conflicts} + only_a = c_a - c_b + only_b = c_b - c_a + return only_a, only_b + + def conflict_diffs_html(self): + result = "" + for side, conflicts in zip([self.a, self.b], self.conflict_diffs()): + result += f'

{len(conflicts)} Konflikte nur in {side.title}

' + table = AssertionTable(prefix=side.title + '/') + for u, v in conflicts: + for w, x, k, attr in side.info.find_conflicts(u, v): + table.edge(w, x, attr) + result += table.format_table() + return result + def get_argparser() -> ArgumentParser: parser = ArgumentParser(description="Compare the order of macrogenesis results") @@ -132,7 +156,8 @@ def main(): .column("–") .column("↔") .column("=") - .column("Rangänderungen")) + .column("Rangänderungen") + .column("Konflikte")) for a, b in config.progress(pairs, unit=" Vergleiche"): try: logger.info('Comparing %s to %s ...', a, b) @@ -140,18 +165,17 @@ def main(): table = diff.diff_order_table() output: Path = options.output_dir / (diff.filename + ".php") logger.info("Saving %s ...", output.absolute()) - write_html(output, table.format_table(), + write_html(output, table.format_table() + diff.conflict_diffs_html(), diff.title) opcounts = defaultdict(int) for op, i1, i2, j1, j2 in diff.matcher.get_opcodes(): opcounts[op] += max(i2-i1, j2-j1) rank_changed = sum((diff.a.info.details['rank'] - diff.b.info.details['rank']).dropna() != 0) summary.row((diff, diff.matcher.ratio(), opcounts['insert'], opcounts['remove'], opcounts['replace'], - opcounts['equal'], rank_changed)) + opcounts['equal'], rank_changed, len(diff.b.info.conflicts) - len(diff.a.info.conflicts))) except FileNotFoundError as e: logger.error(e) write_html(options.output_dir / "order-diff.php", summary.format_table(), head="Vergleiche") - if __name__ == '__main__': main() diff --git a/src/macrogen/graph.py b/src/macrogen/graph.py index e266423..581c8a3 100644 --- a/src/macrogen/graph.py +++ b/src/macrogen/graph.py @@ -382,6 +382,7 @@ def _infer_details(self): logger.info('Preparing details on references') ordered_ref_nodes = self.order_refs() is_split = any(isinstance(node, SplitReference) for node in ordered_ref_nodes) + self.is_split = is_split if is_split: refs_from_graphs = [ref for ref in ordered_ref_nodes if ref.side == Side.END] # FIXME Configurable? refs_from_data = [ref.reference for ref in refs_from_graphs] @@ -438,6 +439,24 @@ def unsplit_rank(node) -> int: table['baseline_position'] = self.baseline_order() self.details = table + def find_conflicts(self, from_: Node, to_: Node) -> List[Tuple[Node, Node, int, dict]]: + """ + Finds conflict edges from a from_ node to a to_ node + + Args: + from_: source node, never a `SplitReference` + to_: end node, never a `SplitReference` + + Returns: + All info on the actual conflicting edges + """ + if self.is_split: + return [(u, v, k, attr) for (u, v, k, attr) in self.conflicts + if (u == from_ or isinstance(u, SplitReference) and u.reference == from_) + and (v == to_ or isinstance(v, SplitReference) and v.reference == to_)] + else: + return [(u, v, k, attr) for (u, v, k, attr) in self.conflicts if u == from_ and v == to_] + def year_stats(self): stats: pd.Series = self.details.avg_year.value_counts() stats.index = pd.Int64Index(stats.index) diff --git a/src/macrogen/report.py b/src/macrogen/report.py index fd315c3..fe16fda 100644 --- a/src/macrogen/report.py +++ b/src/macrogen/report.py @@ -362,14 +362,19 @@ def write_bibliography_stats(graph: nx.MultiDiGraph): writer.writerow([bibl, BiblSource(bibl).weight, total] + [bibls[bibl][kind] for kind in kinds]) -def _fmt_node(node: Union[Reference, object]): +def _fmt_node(node: Union[Reference, date], prefix: str = None) -> str: """Formats a node by creating a link of possible""" + prefix = prefix or '' if isinstance(node, Reference): - return f'{node}' + return f'{node}' else: return format(node) +def nodeformatter(prefix: str) -> Callable[[Union[Reference, date]], str]: + return partial(_fmt_node, prefix=prefix) + + def _edition_link(ref: Reference): """Creates a link or links into the edition for the node.""" if isinstance(ref, Witness): @@ -548,12 +553,17 @@ def _last_ref_subpage(self, ref): class AssertionTable(HtmlTable): - def __init__(self, **table_attrs): + def __init__(self, prefix=None, **table_attrs): super().__init__(data_sortable='true', **table_attrs) + self.prefix = prefix or '' + if prefix: + nodeformat = nodeformatter(self.prefix) + else: + nodeformat = _fmt_node (self.column('berücksichtigt?', data_sortable_type="alpha") - .column('Subjekt', _fmt_node, data_sortable_type="sigil") + .column('Subjekt', nodeformat, data_sortable_type="sigil") .column('Relation', RELATION_LABELS.get, data_sortable_type="alpha") - .column('Objekt', _fmt_node, data_sortable_type="sigil") + .column('Objekt', nodeformat, data_sortable_type="sigil") .column('Quelle', _fmt_source, data_sortable_type="bibliography") .column('Kommentare', _fmt_comments, data_sortable_type="alpha") .column('XML', _fmt_xml, data_sortable_type="alpha")) @@ -563,7 +573,7 @@ def edge(self, u: Reference, v: Reference, attr: Dict[str, object]): if attr.get('ignore', False): classes.append('ignore') if attr.get('delete', False): classes.append('delete') self.row(( - f'nein' if attr.get('delete', False) else + f'nein' if attr.get('delete', False) else 'ignoriert' if attr.get('ignore', False) else 'ja', u, attr['kind'],