Skip to content

Commit

Permalink
Implement a less aggressive mode for heuristic dating supplementation.
Browse files Browse the repository at this point in the history
Dating supplementation works on the assumption that when a source states
a witness has not been created after a certain date but says nothing
about the earliest possible moment it still suggests a certain closeness
of the creation date to that terminus ante quem (and the same is valid
the other way around).

The behaviour can be configured using the new option half_interval_mode:
- always: whenever a source gives only one end of the possible dating
  interval, supplement the other interval limit
- light: when we have only sources about one interval limit but none
  about the other, take the strongest source and infer the other interval
  from that.

The length of the artificial interval can be configured by half_interval_correction.
  • Loading branch information
thvitt committed May 11, 2019
1 parent 80d6bbe commit 9bb2b03
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 4 deletions.
8 changes: 8 additions & 0 deletions src/macrogen/bibliography.py
Expand Up @@ -14,6 +14,14 @@
class BiblSource:
"""
A bibliographic source in a macrogenesis XML file.
Attributes and properties:
uri (str): The faust://bibliography URI
detail (str): Detail string like pages
weight (int): Score for the source
filename (str): Representation of the source (w/o detail) that is usable as part of a filename
citation (str): Short citation
long_citation (str): Detail string for the citation
"""

def __init__(self, uri, detail=''):
Expand Down
25 changes: 21 additions & 4 deletions src/macrogen/datings.py
Expand Up @@ -173,19 +173,19 @@ def add_to_graph(self, G: nx.MultiDiGraph):
if self.start is not None:
G.add_edge(self.date_before, item, kind=self.start_attr[0], source=source, dating=self,
xml=self.xmlsource, ignore=self.ignore, comments=self.comments)
if self.end is None and not self.ignore:
if self.end is None and config.half_interval_mode == 'always' and not self.ignore:
G.add_edge(item, min(self.date_before + timedelta(config.half_interval_correction),
date(1832, 3, 23)), kind='not_after',
trigger_node=self.date_before,
source=BiblSource('faust://heuristic'), xml=self.xmlsource)
source=BiblSource('faust://heuristic', source.citation), xml=self.xmlsource)
if self.end is not None:
G.add_edge(item, self.date_after, kind=self.end_attr[0], source=source, dating=self,
xml=self.xmlsource, ignore=self.ignore, comments=self.comments)
if self.start is None and not self.ignore:
if self.start is None and config.half_interval_mode == 'always' and not self.ignore:
G.add_edge(self.date_after - timedelta(config.half_interval_correction), item,
kind='not_before',
trigger_node=self.date_after,
source=BiblSource('faust://heuristic'), xml=self.xmlsource)
source=BiblSource('faust://heuristic', source.citation), xml=self.xmlsource)


class RelativeDating(_AbstractDating):
Expand Down Expand Up @@ -268,6 +268,23 @@ def build_datings_graph() -> nx.MultiDiGraph:
logger.info('Reading data to build base graph ...')
for dating in _parse_files():
dating.add_to_graph(graph)

if config.half_interval_mode == 'light':
for node in list(graph.nodes):
if isinstance(node, Reference):
post = [(v, attr) for (_, v, attr) in graph.out_edges(node, data=True) if isinstance(v, date)]
pre = [(u, attr) for (u, _, attr) in graph.in_edges(node, data=True) if isinstance(u, date)]
if pre and not post:
other_limit = max(pre, key=lambda item: item[1]['source'].weight)
graph.add_edge(node, other_limit[0] + timedelta(config.half_interval_correction),
source=BiblSource('faust://heuristic', other_limit[1]['source'].citation),
kind='not_after', xml=other_limit[1]['xml'])
elif post and not pre:
other_limit = max(post, key=lambda item: item[1]['source'].weight)
graph.add_edge(other_limit[0] - timedelta(config.half_interval_correction), node,
source=BiblSource('faust://heuristic', other_limit[1]['source'].citation),
kind='not_before', xml=other_limit[1]['xml'])

add_timeline_edges(graph)
return graph

Expand Down
1 change: 1 addition & 0 deletions src/macrogen/etc/default.yaml
Expand Up @@ -19,6 +19,7 @@ bibliography: https://raw.githubusercontent.com/faustedition/faust-gen-html/mast
xmlroot: https://github.com/faustedition/faust-xml/tree/master/xml/macrogenesis # base for links to xml files

## Limits
half_interval_mode: light # off, light, always
half_interval_correction: 182.5 # if we only have a start or end date, the other limit is max. this many days away
render_node_limit: 1500 # do not layout graphs with more nodes than this
render_timeout: # max number of seconds before rendering a dot file is aborted
Expand Down

0 comments on commit 9bb2b03

Please sign in to comment.