Skip to content

Commit

Permalink
Simple script to diff the order from two macrogenesis graphs
Browse files Browse the repository at this point in the history
  • Loading branch information
thvitt committed Jul 11, 2020
1 parent 4ee8b79 commit 32d9a75
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 4 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ packages = [
]

[tool.poetry.scripts]
macrogen = 'macrogen.main:main'
witness-report = 'macrogen.uris:_witness_report'
macrogen-diff = 'macrogen.diff:main'
macrogen = 'macrogen.main:main'


[tool.poetry.dependencies]
Expand Down
2 changes: 1 addition & 1 deletion src/macrogen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from .config import config
from macrogen.bibliography import BiblSource
from .uris import Reference, Witness, AmbiguousRef, Inscription
from .visualize import simplify_graph, write_dot, render_file, render_all
from .visualize import simplify_graph, write_dot, render_file, render_all
58 changes: 58 additions & 0 deletions src/macrogen/diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from argparse import ArgumentParser
from difflib import SequenceMatcher
from itertools import zip_longest, combinations
from typing import Union

from .graph import MacrogenesisInfo
from pathlib import Path
from .report import HtmlTable, _fmt_node, SingleItem, write_html


def load_info(path: Union[Path, str]) -> MacrogenesisInfo:
if not isinstance(path, Path):
path = Path(path) # always wanted to write sth like this
if path.is_dir():
path /= "macrogen-info.zip"
return MacrogenesisInfo(path)


def diff_order_table(a: MacrogenesisInfo, b: MacrogenesisInfo, title_a: str = "a", title_b: str = "b"):
table = (HtmlTable()
.column(title_a, _fmt_node, attrs={'class': 'pull-right'})
.column('op', attrs={'class': 'pure-center'})
.column(title_b, _fmt_node))
diff = SequenceMatcher(a=a.order, b=b.order)
for op, i1, i2, j1, j2 in diff.get_opcodes():
if op == "replace":
for ref_a, ref_b in zip_longest(a.order[i1:i2], b.order[j1:j2]):
table.row((ref_a, '↔', ref_b), class_='replace')
elif op == "delete":
for ref_a in a.order[i1:i2]:
table.row((ref_a, '−', ''), class_='delete')
elif op == "insert":
for ref_b in b.order[j1:j2]:
table.row(('', '+', ref_b), class_='insert')
elif op == "equal":
table.row(SingleItem(f'{i2-i1} gleiche Referenzen ({a.order[i1]}{a.order[i2-1]})'), class_='equal pure-center ignore')
return table


def get_argparser() -> ArgumentParser:
parser = ArgumentParser(description="Compare the order of macrogenesis results")
parser.add_argument("base", type=Path, help="base path (or zip) for comparison")
parser.add_argument("compare", nargs="+", type=Path, help="comparison paths (or zips)")
parser.add_argument("-p", "--pairwise", action="store_true", default=False, help="compare all paths pairwise instead of base to all")
parser.add_argument("-o", "--output-dir", default=Path(), type=Path)
return parser

def main():
options = get_argparser().parse_args()
pairs = list(combinations([options.base] + options.compare, 2)) if options.pairwise \
else [(options.base, compare) for compare in options.compare]
options.output_dir.mkdir(parents=True, exist_ok=True)
for a, b in pairs:
table = diff_order_table(load_info(a), load_info(b), a.stem, b.stem)
write_html(options.output_dir / f"order-{a.stem}.{b.stem}.php", table.format_table(), f"{a.stem} / {b.stem}")

if __name__ == '__main__':
main()
27 changes: 25 additions & 2 deletions src/macrogen/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,26 @@
None: '???'
}

class SingleItem:
"""A special representation for a single item."""
def __init__(self, item):
self.item = item
def __len__(self):
return 1
def __getitem__(self, item):
s = item if isinstance(item, slice) else slice(item)
s = s.indices(1)
if s[0] != 0 or s[1] != 1:
raise IndexError(f"SingleItem only has a single item, so you cannot index it with {item}")
else:
return item
def __iter__(self):
yield self.item
def __str__(self):
return str(self.item)
def __repr__(self):
return f"{self.__class__.__name__}({self.item!r})"


class HtmlTable:
"""
Expand Down Expand Up @@ -163,8 +183,11 @@ def _format_row(self, row: Iterable, **rowattrs) -> str:
"""
attributes = _build_attrs(rowattrs)
try:
return f'<tr{attributes}>' + ''.join(
self._format_column(index, column) for index, column in enumerate(row)) + '</tr>'
if isinstance(row, SingleItem):
return f'<tr{attributes}><td colspan="{len(self.titles)}">{row.item}</td></tr>'
else:
return f'<tr{attributes}>' + ''.join(
self._format_column(index, column) for index, column in enumerate(row)) + '</tr>'
except:
row_str = ", ".join(f"{title!s}: {value!r}" for title, value in zip_longest(self.titles, row))
logger.exception('Error formatting row (%s)', row_str)
Expand Down

0 comments on commit 32d9a75

Please sign in to comment.