Skip to content

Commit

Permalink
Link to files from the conflicting subsets
Browse files Browse the repository at this point in the history
  • Loading branch information
thvitt committed Jul 13, 2020
1 parent 93dad2b commit 1a412e1
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 13 deletions.
38 changes: 31 additions & 7 deletions src/macrogen/diff.py
Expand Up @@ -2,14 +2,15 @@
from collections import Counter, defaultdict
from difflib import SequenceMatcher
from itertools import zip_longest, combinations
from operator import sub
from typing import Union, Tuple, List, Mapping, Any

from macrogen import Reference, config
from macrogen.splitgraph import SplitReference, Side

from .graph import MacrogenesisInfo
from pathlib import Path
from .report import HtmlTable, _fmt_node, SingleItem, write_html
from .report import HtmlTable, _fmt_node, SingleItem, write_html, AssertionTable, nodeformatter
import pandas as pd

logger = config.getLogger(__name__)
Expand Down Expand Up @@ -87,8 +88,8 @@ def diff_order_table(self) -> HtmlTable:
.column('nicht vor', attrs={'class': 'right'})
.column('nicht nach', attrs={'class': 'right'})
.column('Rang', attrs={'class': 'right'})
.column(self.a.title, _fmt_node, attrs={'class': 'right border-right'})
.column(self.b.title, _fmt_node)
.column(self.a.title, nodeformatter(self.a.title + '/'), attrs={'class': 'right border-right'})
.column(self.b.title, nodeformatter(self.b.title + '/'))
.column('nicht vor')
.column('nicht nach')
.column('Rang')
Expand All @@ -109,6 +110,29 @@ def diff_order_table(self) -> HtmlTable:
class_='equal pure-center ignore')
return table

def conflict_diffs(self):
def unsplit(node):
if isinstance(node, SplitReference):
return node.reference
else:
return node
c_a = {(unsplit(u), unsplit(v)) for u, v, k, attr in self.a.info.conflicts}
c_b = {(unsplit(u), unsplit(v)) for u, v, k, attr in self.b.info.conflicts}
only_a = c_a - c_b
only_b = c_b - c_a
return only_a, only_b

def conflict_diffs_html(self):
result = ""
for side, conflicts in zip([self.a, self.b], self.conflict_diffs()):
result += f'<h2>{len(conflicts)} Konflikte nur in {side.title}</h2>'
table = AssertionTable(prefix=side.title + '/')
for u, v in conflicts:
for w, x, k, attr in side.info.find_conflicts(u, v):
table.edge(w, x, attr)
result += table.format_table()
return result


def get_argparser() -> ArgumentParser:
parser = ArgumentParser(description="Compare the order of macrogenesis results")
Expand All @@ -132,26 +156,26 @@ def main():
.column("–")
.column("↔")
.column("=")
.column("Rangänderungen"))
.column("Rangänderungen")
.column("Konflikte"))
for a, b in config.progress(pairs, unit=" Vergleiche"):
try:
logger.info('Comparing %s to %s ...', a, b)
diff = MacrogenDiff(a, b)
table = diff.diff_order_table()
output: Path = options.output_dir / (diff.filename + ".php")
logger.info("Saving %s ...", output.absolute())
write_html(output, table.format_table(),
write_html(output, table.format_table() + diff.conflict_diffs_html(),
diff.title)
opcounts = defaultdict(int)
for op, i1, i2, j1, j2 in diff.matcher.get_opcodes():
opcounts[op] += max(i2-i1, j2-j1)
rank_changed = sum((diff.a.info.details['rank'] - diff.b.info.details['rank']).dropna() != 0)
summary.row((diff, diff.matcher.ratio(), opcounts['insert'], opcounts['remove'], opcounts['replace'],
opcounts['equal'], rank_changed))
opcounts['equal'], rank_changed, len(diff.b.info.conflicts) - len(diff.a.info.conflicts)))
except FileNotFoundError as e:
logger.error(e)
write_html(options.output_dir / "order-diff.php", summary.format_table(), head="Vergleiche")


if __name__ == '__main__':
main()
19 changes: 19 additions & 0 deletions src/macrogen/graph.py
Expand Up @@ -382,6 +382,7 @@ def _infer_details(self):
logger.info('Preparing details on references')
ordered_ref_nodes = self.order_refs()
is_split = any(isinstance(node, SplitReference) for node in ordered_ref_nodes)
self.is_split = is_split
if is_split:
refs_from_graphs = [ref for ref in ordered_ref_nodes if ref.side == Side.END] # FIXME Configurable?
refs_from_data = [ref.reference for ref in refs_from_graphs]
Expand Down Expand Up @@ -438,6 +439,24 @@ def unsplit_rank(node) -> int:
table['baseline_position'] = self.baseline_order()
self.details = table

def find_conflicts(self, from_: Node, to_: Node) -> List[Tuple[Node, Node, int, dict]]:
"""
Finds conflict edges from a from_ node to a to_ node
Args:
from_: source node, never a `SplitReference`
to_: end node, never a `SplitReference`
Returns:
All info on the actual conflicting edges
"""
if self.is_split:
return [(u, v, k, attr) for (u, v, k, attr) in self.conflicts
if (u == from_ or isinstance(u, SplitReference) and u.reference == from_)
and (v == to_ or isinstance(v, SplitReference) and v.reference == to_)]
else:
return [(u, v, k, attr) for (u, v, k, attr) in self.conflicts if u == from_ and v == to_]

def year_stats(self):
stats: pd.Series = self.details.avg_year.value_counts()
stats.index = pd.Int64Index(stats.index)
Expand Down
22 changes: 16 additions & 6 deletions src/macrogen/report.py
Expand Up @@ -362,14 +362,19 @@ def write_bibliography_stats(graph: nx.MultiDiGraph):
writer.writerow([bibl, BiblSource(bibl).weight, total] + [bibls[bibl][kind] for kind in kinds])


def _fmt_node(node: Union[Reference, object]):
def _fmt_node(node: Union[Reference, date], prefix: str = None) -> str:
"""Formats a node by creating a link of possible"""
prefix = prefix or ''
if isinstance(node, Reference):
return f'<a href="{node.filename.stem}">{node}</a>'
return f'<a href="{prefix}{node.filename.stem}">{node}</a>'
else:
return format(node)


def nodeformatter(prefix: str) -> Callable[[Union[Reference, date]], str]:
return partial(_fmt_node, prefix=prefix)


def _edition_link(ref: Reference):
"""Creates a link or links into the edition for the node."""
if isinstance(ref, Witness):
Expand Down Expand Up @@ -548,12 +553,17 @@ def _last_ref_subpage(self, ref):

class AssertionTable(HtmlTable):

def __init__(self, **table_attrs):
def __init__(self, prefix=None, **table_attrs):
super().__init__(data_sortable='true', **table_attrs)
self.prefix = prefix or ''
if prefix:
nodeformat = nodeformatter(self.prefix)
else:
nodeformat = _fmt_node
(self.column('berücksichtigt?', data_sortable_type="alpha")
.column('Subjekt', _fmt_node, data_sortable_type="sigil")
.column('Subjekt', nodeformat, data_sortable_type="sigil")
.column('Relation', RELATION_LABELS.get, data_sortable_type="alpha")
.column('Objekt', _fmt_node, data_sortable_type="sigil")
.column('Objekt', nodeformat, data_sortable_type="sigil")
.column('Quelle', _fmt_source, data_sortable_type="bibliography")
.column('Kommentare', _fmt_comments, data_sortable_type="alpha")
.column('XML', _fmt_xml, data_sortable_type="alpha"))
Expand All @@ -563,7 +573,7 @@ def edge(self, u: Reference, v: Reference, attr: Dict[str, object]):
if attr.get('ignore', False): classes.append('ignore')
if attr.get('delete', False): classes.append('delete')
self.row((
f'<a href="{Path(pathlink(u, v)).stem}">nein</a>' if attr.get('delete', False) else
f'<a href="{self.prefix}{Path(pathlink(u, v)).stem}">nein</a>' if attr.get('delete', False) else
'ignoriert' if attr.get('ignore', False) else 'ja',
u,
attr['kind'],
Expand Down

0 comments on commit 1a412e1

Please sign in to comment.