From 0675d3efb5d16b266017f5a9eaef3e3de6d74187 Mon Sep 17 00:00:00 2001 From: Thorsten Vitt Date: Tue, 11 Sep 2018 12:37:30 +0200 Subject: [PATCH] More links for conflict edges --- datings.py | 12 ++++++------ graph.py | 39 +++++++++++++++++++++++++++++---------- report.py | 25 ++++++------------------- visualize.py | 4 ++++ 4 files changed, 45 insertions(+), 35 deletions(-) diff --git a/datings.py b/datings.py index 34f4dc8..81cf1ec 100644 --- a/datings.py +++ b/datings.py @@ -16,7 +16,7 @@ logger = logging.getLogger(__name__) -def _parse_datestr(datestr: str) -> datetime.date: +def parse_datestr(datestr: str) -> datetime.date: if datestr is None: return None @@ -137,11 +137,11 @@ class AbsoluteDating(_AbstractDating): def __init__(self, el: etree._Element): super().__init__(el) - self.from_ = _parse_datestr(el.get('from', None)) - self.to = _parse_datestr(el.get('to', None)) - self.not_before = _parse_datestr(el.get('notBefore', None)) - self.not_after = _parse_datestr(el.get('notAfter', None)) - self.when = _parse_datestr(el.get('when', None)) + self.from_ = parse_datestr(el.get('from', None)) + self.to = parse_datestr(el.get('to', None)) + self.not_before = parse_datestr(el.get('notBefore', None)) + self.not_after = parse_datestr(el.get('notAfter', None)) + self.when = parse_datestr(el.get('when', None)) self.normalized = el.get('type', '') == 'normalized' if self.start is None and self.end is None: diff --git a/graph.py b/graph.py index 7d1a6cc..b2e0746 100644 --- a/graph.py +++ b/graph.py @@ -1,17 +1,16 @@ +import csv +from collections import defaultdict from datetime import date, timedelta +from pathlib import Path +from typing import List, Callable, Any, Dict, Tuple, Union +import dateutil +import networkx as nx from dataclasses import dataclass +from datings import base_graph, BiblSource, parse_datestr from faust_logging import logging -import csv -from collections import defaultdict, namedtuple -from typing import List, Callable, Any, Dict, Tuple, Type, Union - -import networkx as nx - -from datings import base_graph, BiblSource from igraph_wrapper import to_igraph, nx_edges -from visualize import simplify_graph, write_dot from uris import Reference, Inscription, Witness, AmbiguousRef logger = logging.getLogger(__name__) @@ -21,6 +20,28 @@ DAY = timedelta(days=1) +def pathlink(*nodes) -> Path: + node_names: List[str] = [] + for node in nodes: + if isinstance(node, str): + if node.startswith('faust://'): + node = Witness.get(node) + else: + try: + node = parse_datestr(node) + except ValueError: + pass + + if isinstance(node, Reference): + node_names.append(node.filename.stem) + elif isinstance(node, date): + node_names.append(node.isoformat()) + else: + logger.warning('Unknown node type: %s (%s)', type(node), node) + node_names.append(str(hash(node))) + return Path("--".join(node_names) + '.php') + + def subgraphs_with_conflicts(graph: nx.MultiDiGraph) -> List[nx.MultiDiGraph]: """ Extracts the smallest conflicted subgraphs of the given graph, i.e. the @@ -61,8 +82,6 @@ def analyse_conflicts(graph): " / ".join(map(str, nodes))]) conflicts_file.flush() mark_edges_to_delete(subgraph, edges_to_remove) - write_dot(subgraph, f"conflict-{index:02d}.dot") - nx.write_graphml(simplify_graph(subgraph), f"conflict-{index:02d}.graphml") return [('List of conflicts', conflicts_file_name)] diff --git a/report.py b/report.py index 99df0a1..88cfb09 100644 --- a/report.py +++ b/report.py @@ -1,5 +1,5 @@ import json -from datetime import timedelta, date, datetime +from datetime import date, datetime from itertools import chain, repeat, groupby from operator import itemgetter @@ -10,7 +10,6 @@ from more_itertools import pairwise from faust_logging import logging -from graph import MacrogenesisInfo, EARLIEST, LATEST, DAY import csv from collections.__init__ import defaultdict, Counter @@ -22,6 +21,7 @@ import faust from datings import BiblSource +from graph import MacrogenesisInfo, pathlink, EARLIEST, LATEST, DAY from uris import Reference, Witness, Inscription, UnknownRef, AmbiguousRef from visualize import write_dot, simplify_graph @@ -342,7 +342,7 @@ def _last_ref_subpage(self, DAY, ref): .column('XML', format_spec=lambda xml: ":".join(map(str, xml)))) for (u, v, attr) in self.base.in_edges(ref, data=True): delete_ = 'delete' in attr and attr['delete'] - assertionTable.row((f'nein' if delete_ else 'ja', + assertionTable.row((f'nein' if delete_ else 'ja', kinds[attr['kind']], u + DAY if isinstance(u, date) else u, attr['source'], @@ -352,7 +352,7 @@ def _last_ref_subpage(self, DAY, ref): kinds['temp-pre'] = 'entstanden vor' for (u, v, attr) in self.base.out_edges(ref, data=True): delete_ = 'delete' in attr and attr['delete'] - assertionTable.row(('nein' if delete_ else 'ja', + assertionTable.row((f'nein' if delete_ else 'ja', kinds[attr['kind']], v - DAY if isinstance(v, date) else v, attr['source'], @@ -381,7 +381,7 @@ def edge(self, u: Reference, v: Reference, attr: Dict[str,object]): if attr.get('ignore', False): classes.append('ignore') if attr.get('delete', False): classes.append('delete') self.row(( - f'nein' if attr.get('delete', False) else \ + f'nein' if attr.get('delete', False) else \ 'ignoriert' if attr.get('ignore', False) else 'ja', u, attr['kind'], @@ -483,21 +483,8 @@ def report_missing(graphs: MacrogenesisInfo): head=format(ref)) -def _path_link(*nodes) -> Path: - node_names: List[str] = [] - for node in nodes: - if isinstance(node, Reference): - node_names.append(node.filename.stem) - elif isinstance(node, date): - node_names.append(node.isoformat()) - else: - logger.warning('Unknown node type: %s (%s)', type(node), node) - node_names.append(str(hash(node))) - return Path("--".join(node_names) + '.php') - - def _report_conflict(graphs: MacrogenesisInfo, u, v): - reportfile = _path_link(u, v) + reportfile = pathlink(u, v) graphfile = reportfile.with_name(reportfile.stem + '-graph.dot') relevant_nodes = {u} | set(graphs.base.predecessors(u)) | set(graphs.base.successors(u)) \ | {v} | set(graphs.base.predecessors(v)) | set(graphs.base.successors(v)) diff --git a/visualize.py b/visualize.py index ecfbdd8..25fe715 100644 --- a/visualize.py +++ b/visualize.py @@ -10,6 +10,7 @@ from datings import BiblSource, add_timeline_edges from faust_logging import logging +from graph import pathlink from uris import Reference logger = logging.getLogger(__name__) @@ -96,6 +97,9 @@ def write_dot(graph: nx.MultiDiGraph, target='base_graph.dot', style=_load_style if 'edge' in style: for u, v, k, attr in simplified.edges(data=True, keys=True): kind = attr.get('kind', None) + if attr.get('delete', False): + attr['URL'] = pathlink(u, v).stem + attr['target'] = '_top' if kind in style['edge']: simplified.edges[u, v, k].update(style['edge'][kind]) for styled_attr in attr.keys() & style['edge']: