Skip to content

Commit

Permalink
optional models for temp-syn
Browse files Browse the repository at this point in the history
  • Loading branch information
thvitt committed Oct 14, 2019
1 parent 0489e1c commit 3f01562
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 11 deletions.
1 change: 0 additions & 1 deletion setup.py
Expand Up @@ -32,7 +32,6 @@
'logging-tree',
'colorlog',
'tqdm',
'dataclasses',
'cvxpy',
'cvxopt' # just used to pull in GLPK
],
Expand Down
1 change: 1 addition & 0 deletions src/macrogen/etc/bibscores.tsv
Expand Up @@ -8,6 +8,7 @@ faust://bibliography/gsa-datenbank 75
faust://bibliography/inventare_2_2 50
faust://self 10000
faust://progress 20000
faust://temp-syn 1000
faust://model/inscription/inline 5000
faust://bibliography/hertz1931 0
faust://bibliography/pniower1924 1
Expand Down
5 changes: 3 additions & 2 deletions src/macrogen/etc/default.yaml
Expand Up @@ -37,9 +37,10 @@ solver_options: # pass options to the solvers. maps solver name or 'all' t
verbose: false
light_timeline: true # use exclusion instead of high weight for timeline edges

model: split-reverse # single: Individual wits, split: model start and end separately
model: single # single: Individual wits, split: model start and end separately
inscriptions: # zero or more of: orphan, copy, inline (the latter only for split models)
# - inline
- copy
temp_syn: farthest # what to do with temp-syn nodes: ignore, copy, nearest, farthest

progressbar: true # allow to show a progress bar in some situations

Expand Down
2 changes: 2 additions & 0 deletions src/macrogen/etc/styles.yaml
Expand Up @@ -2,6 +2,8 @@
node:
date:
shape: none
SynAnchor:
shape: point
start:
shape: rarrow
end:
Expand Down
98 changes: 94 additions & 4 deletions src/macrogen/graph.py
Expand Up @@ -10,12 +10,13 @@
from io import TextIOWrapper
from operator import itemgetter
from pathlib import Path
from typing import List, Any, Dict, Tuple, Union, Sequence, Optional, Set, Iterable
from typing import List, Any, Dict, Tuple, Union, Sequence, Optional, Set, Iterable, FrozenSet
from warnings import warn
from zipfile import ZipFile, ZIP_DEFLATED

import networkx as nx
import pandas as pd
from dataclasses import dataclass

from .graphutils import mark_edges_to_delete, remove_edges, in_path, first
from .bibliography import BiblSource
Expand Down Expand Up @@ -219,14 +220,15 @@ def run_analysis(self):
add_edge_weights(base)
resolve_ambiguities(base)
base = collapse_edges_by_source(base)
if config.temp_syn and config.temp_syn != 'ignore':
base = add_syn_nodes(base)
self.base = base
add_iweight(base)
working = cleanup_graph(base).copy()
self.working = working
self.add_missing_wits(working)
sccs = scc_subgraphs(working)


logger.info('Calculating minimum feedback arc set for %d strongly connected components', len(sccs))

all_feedback_edges = []
Expand Down Expand Up @@ -598,7 +600,8 @@ def add_path(self, graph: nx.MultiDiGraph, source: Node, target: Node, weight='i

def subgraph(self, *nodes: Node, context: bool = True, path_to: Iterable[Node] = {}, abs_dates: bool = True,
path_from: Iterable[Node] = {}, paths: Iterable[Node] = {}, paths_without_timeline: bool = False,
paths_between_nodes: bool = True, keep_timeline: bool = False, direct_assertions: bool = False) \
paths_between_nodes: bool = True, keep_timeline: bool = False, direct_assertions: bool = False,
temp_syn_context: bool = False) \
-> nx.MultiDiGraph:
"""
Extracts a sensible subgraph from the base graph.
Expand Down Expand Up @@ -643,6 +646,10 @@ def subgraph(self, *nodes: Node, context: bool = True, path_to: Iterable[Node] =
if context:
for node in central_nodes:
relevant_nodes |= set(self.dag.pred[node]).union(self.dag.succ[node])
if temp_syn_context:
for node in set(relevant_nodes):
if isinstance(node, SynAnchor):
relevant_nodes |= set(self.dag.pred[node]).union(self.dag.succ[node]).union(node.syn_group)

subgraph: nx.MultiDiGraph = nx.subgraph(self.base, relevant_nodes).copy()
sources = set(path_from).union(paths)
Expand Down Expand Up @@ -847,7 +854,6 @@ def add_inscription_links(base: nx.MultiDiGraph):
base.add_edge(node, node.witness, kind='inscription', source=BiblSource('faust://model/inscription'))



def cleanup_graph(A: nx.MultiDiGraph) -> nx.MultiDiGraph:
logger.info('Removing edges to ignore')

Expand All @@ -867,6 +873,90 @@ def is_ignored(u, v, attr):
return remove_edges(A, is_ignored)


@dataclass(frozen=True, order=True)
class SynAnchor:
syn_group: frozenset
side: Side

def __str__(self):
return f"{self.side.label}({', '.join(ref.label for ref in self.syn_group)})"


def add_syn_nodes(source_graph: nx.MultiDiGraph, mode: Optional[str] = None) -> nx.MultiDiGraph:
"""
Creates a copy of the graph with appropriate handling of temp-syn edges.
An edge u –temp-syn→ v models that u and v have been written approximately at the same time. Semantically,
these are symmetric (or undirected) edges, so all nodes connected via (only) temp-syn edges form a cluster
(more formally, these are the non-trivial weakly connected components of the subgraph induced by the temp-syn
edges).
This function places two artificial nodes of class `SynAnchor` before and after each cluster, connecting the
SynAnchor nodes to the `Reference`s in the cluster using regular ``temp-pre`` edges, and connects the SynAnchor
nodes using the given modes:
- ``ignore``: No SynAnchors, just ignore the nodes completely
- ``copy``: Copy all in-edges that come from nodes not in the cluster to the anchor before, and all out-edges that
connect to nodes outside the cluster to the ancher after the cluster.
- ``closest``: Connect the anchors to the closest date nodes of any of the witnesses in the group
- ``farthest``: Connect the anchors to the farthest date nodes of any of the witnesses in the group
Args:
source_graph: The graph to work on. Is not modified
mode: ignore, copy, closest or farthest; if None, take the value from the config option ``temp_syn``
Returns:
a modified copy of the input graph
"""
if mode is None:
mode = config.temp_syn
if not mode or mode == 'ignore':
logger.warning('No temp-syn handling (mode==%s)', mode)
return source_graph

syn_only = source_graph.edge_subgraph((u, v, k) for (u, v, k, kind) in source_graph.edges(keys=True, data='kind')
if kind == 'temp-syn')
syn_groups = [comp for comp in nx.weakly_connected_components(syn_only) if len(comp) > 1]
logger.info('Adding temp-syn nodes in mode %s for %d clusters', mode, len(syn_groups))
result = source_graph.copy()
for component in syn_groups:
syn_group: Set[Reference] = frozenset(component)
in_edge_view = source_graph.in_edges(nbunch=syn_group, keys=True, data=True)
out_edge_view = source_graph.out_edges(nbunch=syn_group, keys=True, data=True)
in_edges = [(u, v, k, attr) for u, v, k, attr in in_edge_view if u not in syn_group]
out_edges = [(u, v, k, attr) for u, v, k, attr in out_edge_view if v not in syn_group]
before = SynAnchor(syn_group, Side.START)
after = SynAnchor(syn_group, Side.END)
for ref in syn_group:
result.add_edge(before, ref, kind='temp-pre', source=BiblSource('faust://temp-syn')) # TODO add orig source
result.add_edge(ref, after, kind='temp-pre', source=BiblSource('faust://temp-syn'))

if mode == 'copy':
result.add_edges_from([(u, before, k, attr) for u, v, k, attr in in_edges])
result.add_edges_from([(after, v, k, attr) for u, v, k, attr in out_edges])
elif mode == 'closest':
closest_before = max([edge for edge in in_edges if isinstance(edge[0], date)],
key=itemgetter(0), default=None)
closest_after = min([edge for edge in out_edges if isinstance(edge[0], date)],
key=itemgetter(0), default=None)
if closest_before:
result.add_edge(closest_before[0], before, **closest_before[-1])
if closest_after:
result.add_edge(after, closest_after[1], **closest_after[-1])
elif mode == 'farthest':
farthest_before = min([edge for edge in in_edges if isinstance(edge[0], date)],
key=itemgetter(0), default=None)
farthest_after = max([edge for edge in out_edges if isinstance(edge[0], date)],
key=itemgetter(0), default=None)
if farthest_before:
result.add_edge(farthest_before[0], before, **farthest_before[-1])
if farthest_after:
result.add_edge(after, farthest_after[1], **farthest_after[-1])

return result


class _ConflictInfo:
def __init__(self, graphs: MacrogenesisInfo, edge: MultiEdge):
self.u, self.v, self.k, self.attr = edge
Expand Down
42 changes: 39 additions & 3 deletions src/macrogen/graphutils.py
@@ -1,7 +1,7 @@
from collections import defaultdict
from datetime import date
from pathlib import Path
from typing import List, Iterable, Tuple, Any, Generator, Union, TypeVar, Callable, Dict, Sequence
from typing import List, Iterable, Tuple, Any, Generator, Union, TypeVar, Callable, Dict, Sequence, Optional

import networkx as nx

Expand Down Expand Up @@ -37,9 +37,11 @@ def pathlink(*nodes) -> Path:
node_names.append(node.filename.stem)
elif isinstance(node, date):
node_names.append(node.isoformat())
elif isinstance(node, str):
node_names.append(node)
else:
logger.warning('Unknown node type: %s (%s)', type(node), node)
node_names.append(str(hash(node)))
node_names.append(base_n(hash(node), 62))
return Path("--".join(node_names) + '.php')


Expand Down Expand Up @@ -240,4 +242,38 @@ def simplify_timeline(graph: nx.MultiDiGraph):
# else:
# if prev is not None:
# graph.add_edge(prev, node, kind='timeline')
# prev = node
# prev = node


def base_n(number: int, base: int = 10, neg: Optional[str] = '-') -> str:
"""
Calculates a base-n string representation of the given number.
Args:
number: The number to convert
base: 2-36
Returns:
string representing number_base
"""
if not(isinstance(number, int)):
raise TypeError(f"Number must be an integer, not a {type(number)}")
if neg is None and int < 0:
raise ValueError("number must not be negative if no neg character is given")
if base < 2 or base > 64:
raise ValueError("Base must be between 2 and 62")
alphabet = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
if neg in alphabet:
raise ValueError(f"neg char, '{neg}', must not be from alphabet '{alphabet}")

digits = []
if number == 0:
return alphabet[0]
rest = abs(number)
while rest > 0:
digits.append(alphabet[rest % base])
rest = rest // base

if number < 0:
digits.append(neg)

return "".join(reversed(digits))
6 changes: 5 additions & 1 deletion src/macrogen/visualize.py
Expand Up @@ -16,7 +16,7 @@
from .config import config
from .datings import add_timeline_edges
from .bibliography import BiblSource
from .graphutils import pathlink
from .graphutils import pathlink, base_n
from .uris import Reference
from .graph import Node
from .splitgraph import SplitReference
Expand Down Expand Up @@ -45,6 +45,10 @@ def simplify_graph(original_graph: nx.MultiDiGraph) -> nx.MultiDiGraph:
attrs['kind'] = node.side.value
else:
attrs['kind'] = node.__class__.__name__
else:
attrs['kind'] = type(node).__name__
attrs['label'] = str(node)
translation[node] = base_n(hash(node), 62) # use a stable, short representation
_simplify_attrs(attrs)

nx.relabel_nodes(graph, translation, copy=False)
Expand Down

0 comments on commit 3f01562

Please sign in to comment.