From f9d3d28d67460e8d16eb21d8e0613f9c68fcb9b4 Mon Sep 17 00:00:00 2001
From: Thorsten Vitt <thorsten.vitt@uni-wuerzburg.de>
Date: Mon, 18 Feb 2019 10:48:20 +0100
Subject: [PATCH] Basic configuration setup

---
 logging.yaml                  |   2 +-
 src/macrogen/__init__.py      |   4 +-
 src/macrogen/bibliography.py  | 117 +++++++++++++++
 src/macrogen/config.py        | 270 ++++++++++++++++++++++++++++++++++
 src/macrogen/convert_table.py |   6 +
 src/macrogen/datings.py       | 184 +++++------------------
 src/macrogen/etc/default.yaml |  24 +++
 src/macrogen/graph.py         |  34 ++---
 src/macrogen/main.py          |   5 +-
 src/macrogen/report.py        |  12 +-
 src/macrogen/uris.py          |  13 +-
 src/macrogen/visualize.py     |  29 +++-
 tests/test_config.py          |  22 +++
 tests/test_main.py            |   2 +-
 14 files changed, 537 insertions(+), 187 deletions(-)
 create mode 100644 src/macrogen/bibliography.py
 create mode 100644 src/macrogen/config.py
 create mode 100644 src/macrogen/etc/default.yaml
 create mode 100644 tests/test_config.py

diff --git a/logging.yaml b/logging.yaml
index ee9adc8..0c71cea 100644
--- a/logging.yaml
+++ b/logging.yaml
@@ -13,7 +13,7 @@ handlers:
 root:
   handlers:
     - console
-  level: INFO
+  level: WARNING
 
 loggers:
   graph:
diff --git a/src/macrogen/__init__.py b/src/macrogen/__init__.py
index 3970e29..666cf4a 100644
--- a/src/macrogen/__init__.py
+++ b/src/macrogen/__init__.py
@@ -1,2 +1,4 @@
-from .graph import macrogenesis_graphs, BiblSource
+from .graph import macrogenesis_graphs
+from macrogen.bibliography import BiblSource
 from .uris import Reference, Witness, AmbiguousRef, Inscription
+from .visualize import simplify_graph, write_dot, render_file, render_all
\ No newline at end of file
diff --git a/src/macrogen/bibliography.py b/src/macrogen/bibliography.py
new file mode 100644
index 0000000..900a804
--- /dev/null
+++ b/src/macrogen/bibliography.py
@@ -0,0 +1,117 @@
+import csv
+from collections import namedtuple, defaultdict
+from typing import Dict, Union, IO
+
+import requests
+from lxml import etree
+
+from .config import config
+from . import faust
+
+BibEntry = namedtuple('BibEntry', ['uri', 'citation', 'reference', 'weight'])
+
+
+def _parse_bibliography(bibxml: Union[str, IO]) -> Dict[str, BibEntry]:
+    """Parses the bibliography file at url. Returns a dictionary mapping an URI to a corresponding bibliography entry."""
+    db: Dict[str, BibEntry] = {}
+    scores = config.bibscores
+    et = etree.parse(bibxml)
+    for bib in et.xpath('//f:bib', namespaces=faust.namespaces):
+        uri = bib.get('uri')
+        citation = bib.find('f:citation', namespaces=faust.namespaces).text
+        reference = bib.find('f:reference', namespaces=faust.namespaces).text
+        db[uri] = BibEntry(uri, citation, reference, scores[uri])
+    return db
+
+
+_bib_labels = {
+    'faust://self': 'Faustedition',
+    'faust://model/inscription': 'Inskription von',
+    'faust://orphan/adoption': 'Datierungsinhalt für',
+    'faust://heuristic': 'künstliche Datierung'
+}
+
+
+class BiblSource:
+    """
+    A bibliographic source in a macrogenesis XML file.
+    """
+
+    def __init__(self, uri, detail=''):
+        """
+        Creates a bibliographic source.
+        Args:
+            uri: should be a faust://bibliography/ URI or one of the special values
+            detail: detail string like pages
+        """
+        self.uri = uri
+        if detail is None:
+            detail = ''
+        self.detail = detail
+        self.weight = config.bibliography[uri].weight if uri in config.bibliography else 1
+
+    def __eq__(self, other):
+        if isinstance(other, BiblSource):
+            return self.uri == other.uri and self.detail == other.detail
+        else:
+            return super().__eq__(other)
+
+    def __hash__(self):
+        return hash(self.uri) ^ hash(self.detail)
+
+    def __str__(self):
+        result = self.citation
+        if self.detail is not None:
+            result += '\n' + self.detail
+        return result
+
+    @property
+    def filename(self):
+        """
+        A string representation of the source (w/o detail) that is usable as part of a filename.
+        """
+        if self.uri.startswith('faust://bibliography'):
+            return self.uri.replace('faust://bibliography/', '')
+        else:
+            return self.uri.replace('faust://', '').replace('/', '-')
+
+    @property
+    def citation(self):
+        """
+        String representation of only the citation, w/o detail.
+
+        Example:
+            Bohnenkamp 19994
+        """
+        if self.uri in config.bibliography:
+            return config.bibliography[self.uri].citation
+        elif self.uri in _bib_labels:
+            return _bib_labels[self.uri]
+        else:
+            return self.filename
+
+    @property
+    def long_citation(self):
+        if self.uri in config.bibliography:
+            return config.bibliography[self.uri].reference
+        else:
+            return self.citation
+
+
+def read_scores(scorefile):
+    """
+    Parses the bibliography score file.
+
+    Returns:
+        Map uri -> score
+
+    """
+    scores = defaultdict(lambda: 1)
+    logger = config.getLogger(__name__)
+    r = csv.reader(scorefile, delimiter='\t')
+    for row in r:
+        try:
+            scores[row[0]] = int(row[1])
+        except ValueError as e:
+            logger.warning('Skipping scorefile row %s: %s', row, e)
+    return scores
diff --git a/src/macrogen/config.py b/src/macrogen/config.py
new file mode 100644
index 0000000..5d9887b
--- /dev/null
+++ b/src/macrogen/config.py
@@ -0,0 +1,270 @@
+"""
+Configuration etc.
+
+
+Configuration and data files:
+
+- logging.yaml
+- styles.yaml
+- reference-normalisation.csv
+- bibscores.tsv
+- sigils.json
+- paralipomena.json
+- genetic_bar_graph.json
+
+Additionally:
+
+- macrogenesis data
+- output directory
+
+Optional:
+
+- graph file(s) to read from
+
+Additional stuff to configure:
+
+- Render / Render graphs up to ...
+- algorithm / threshold
+"""
+import json
+from io import BytesIO, StringIO
+from os.path import expanduser, expandvars
+from pathlib import Path
+from typing import Optional, IO, Callable, Any, Tuple, Mapping
+from urllib.parse import urlparse
+from .bibliography import _parse_bibliography
+
+import pkg_resources
+import requests
+from lxml import etree
+from ruamel.yaml import YAML
+
+import logging
+
+logger = logging.getLogger(__name__ + '.preliminary')
+
+
+class CachedFile:
+    """Loads data from an URL, optionally caching it."""
+
+    def __init__(self, file_or_url: str, cache_dir: Optional[Path] = None):
+        """
+        Creates a cacheing file loader.
+
+        Args:
+            file_or_url: the url or path to the file to load
+            cache_dir: if present, a directory where to cache the file. If absent, don’t cache.
+        """
+        url = urlparse(file_or_url)
+
+        if url.scheme:
+            path = Path(url.path)
+            self.url = file_or_url
+            self.url_parsed = url
+            self.path = None
+            self.is_url = True
+        else:
+            path = Path(file_or_url)
+            self.url = None
+            self.path = path
+            self.is_url = False
+
+        if self.is_url and cache_dir is not None:
+            self.path = cache_dir.joinpath(path.name)
+
+    def open(self, offline=False, mode="rt") -> IO:
+        """
+        Opens the file or URL.
+
+        Args:
+            offline: Never access the internet.
+            mode: file mode for `open`
+
+        Returns:
+            open IO, either to the cached file or to the remotely fetched content
+        """
+        if self.is_url and not offline:
+            # fetch remote to cache
+            logger.debug('fetching %s', self.url)
+            response = requests.get(self.url)
+
+            if self.path:
+                # dump to cache and serve from cache file
+                logger.debug('saving as %s', self.path)
+                if "b" in mode:
+                    with self.path.open("wb") as cache_file:
+                        cache_file.write(response.content)
+                else:
+                    with self.path.open("wt", encoding='utf-8') as cache_file:
+                        cache_file.write(response.text)
+            else:
+                if "b" in mode:
+                    return BytesIO(response.content)
+                else:
+                    return StringIO(response.text)
+
+        return self.path.open(mode=mode, encoding='utf-8-sig')
+
+
+class LazyConfigLoader:
+    """
+    Descriptor that lazily loads stuff from configured paths.
+    """
+
+    def __init__(self, name: str, parser: Optional[Callable[[IO], Any]] = None,
+                 fallback_resource=Optional[Tuple[str, str]]):
+        self.name = name
+        self.parser = parser
+        self.resource = fallback_resource
+
+    def __get__(self, instance, owner):
+        if not hasattr(instance, '_data'):
+            instance._data = {}
+        if self.name not in instance.data:
+            self.load_data(instance)
+        return instance._data[self.name]
+
+    def load_data(self, instance):
+        source = instance.config.get(self.name, None)
+        if source:
+            logger.info('Loading %s from %s', self.name, source)
+            cache = Path(instance.config.get('cache', '.cache'))
+            offline = instance.config.get('offline', False)
+            cached_file = CachedFile(source, cache)
+            with cached_file.open(offline) as file:
+                self.parse_data(file, instance)
+        elif self.resource:
+            logger.debug('Loading %s from internal configuration %s', self.name, self.resource)
+            with pkg_resources.resources_stream(*self.resource) as file:
+                self.parse_data(file, instance)
+        else:
+            raise ValueError(
+                    f"Cannot access property {self.name}: Neither configured source nor fallback resource available")
+
+    def parse_data(self, file, instance):
+        instance.data[self.name] = self.parser(file) if callable(self.parser) else file.read()
+
+
+_yaml = YAML(typ='rt')
+_cfg = 'macrogen'
+
+
+class _Proxy:
+
+    def __init__(self, constructor, *args, **kwargs):
+        self._constructor = constructor
+        self._args = args
+        self._kwargs = args
+        self._target = None
+
+    def _init_proxy(self):
+        if self._target is None:
+            self._target = self._constructor(*self._args, **self._kwargs)
+
+    def __getattr__(self, item):
+        self._init_proxy()
+        return getattr(self._target, item)
+
+    def __setattr__(self, key, value):
+        self._init_proxy()
+        return setattr(self._target, key, value)
+
+    def __delattr__(self, item):
+        self._init_proxy()
+        return delattr(self._target, item)
+
+class _Accessor:
+
+    def __init__(self, accessor_function: Callable[[Any], Any]):
+        self._accessor = accessor_function
+
+    def __getattr__(self, item):
+        return self._accessor(item)
+
+
+class Configuration:
+    """
+    Ready to use configuration data for the application.
+
+    Data that is coming from files can be loaded lazily.
+    """
+
+    logging = LazyConfigLoader('logging', _yaml.load, (_cfg, 'etc/logging.yaml'))
+    styles = LazyConfigLoader('styles', _yaml.load, (_cfg, 'etc/styles.yaml'))
+    # reference-normalization.csv
+    # bibscores.tsv
+    sigils = LazyConfigLoader('sigils', json.load)
+    paralipomena = LazyConfigLoader('paralipomena', json.load)
+    genetic_bar_graph = LazyConfigLoader('bargraph', json.load)
+    bibliography = LazyConfigLoader('bibliography', _parse_bibliography)
+
+    def __init__(self, config_override=None):
+        if config_override is None:
+            config_override = {}
+        self.config_override = config_override
+
+        def get_path(key):
+            return Path(self.key)
+
+        self.path = _Accessor(get_path)
+
+    @property
+    def config_override(self):
+        return self._config_override
+
+    @config_override.setter
+    def _set_config_override(self, value):
+        if hasattr(self, 'config'):
+            logger.warning('Configuration has already been loaded. Some override values may not have any effect.')
+            self._apply_override(value)
+        self._config_override = value
+
+    def _apply_override(self, override=None):
+        if override is None:
+            override = self.config_override
+        for key, value in self.config_override:
+            if value is not None:
+                self.config[key] = value
+
+    def __getattr__(self, item):
+        if item == 'config':
+            self._load_config()
+            return self.__dict__['config']
+        if item in self.config:
+            return self.config[item]
+        raise AttributeError(f'No configuration item {item}')
+
+    def _load_config(self):
+        # First, load the default config
+        logger.debug("Loading default configuration")
+        with pkg_resources.resource_stream(_cfg, 'etc/default.yaml') as f:
+            config: Mapping = _yaml.load(f)
+        # now work through all config files configured in the default config
+        # if they exist
+        if 'config_files' in config:
+            for fn in config['config_files']:
+                p = Path(expanduser(expandvars(fn)))
+                if p.exists():
+                    logger.info('Loading configuration file %s', p)
+                    with p.open() as f:
+                        config.update(_yaml.load(f))
+        # now update using command line options etc.
+        self.config.update(self._config_override)
+
+        # finally, let’s configure logging
+        self._init_logging()
+
+    def _init_logging(self):
+        global logger
+        from logging.config import dictConfig
+        dictConfig(self.logging)
+        logger = logging.getLogger(__name__)
+
+    def getLogger(self, name):
+        return _Proxy(logging.getLogger, name)
+
+    def relative_path(self, absolute_path):
+        return Path(absolute_path).relative_to(self.path.data)
+
+
+config = Configuration()
diff --git a/src/macrogen/convert_table.py b/src/macrogen/convert_table.py
index 81fb1a2..04b6265 100644
--- a/src/macrogen/convert_table.py
+++ b/src/macrogen/convert_table.py
@@ -8,6 +8,12 @@
 from lxml.builder import ElementMaker
 import re
 
+"""
+This script can be used to convert an excel table in a special format to macrogenesis XML files.
+
+TODO move out of macrogenesis package?
+"""
+
 F = ElementMaker(namespace=faust.namespaces['f'], nsmap={None: faust.namespaces['f']})
 
 def make_group(sigils, kind='', source='', comment='', notes=''):
diff --git a/src/macrogen/datings.py b/src/macrogen/datings.py
index 82fd929..24ffb05 100644
--- a/src/macrogen/datings.py
+++ b/src/macrogen/datings.py
@@ -1,27 +1,23 @@
 """
 Functions to parse the XML datings and build a graph out of them
 """
-import csv
-import datetime
 from abc import ABCMeta, abstractmethod
-from collections import namedtuple, defaultdict
-from typing import List, Tuple, Optional, Any, Dict, Generator
+from pathlib import Path
+from time import strptime
+from typing import List, Tuple, Optional, Any, Generator
 
 import networkx as nx
-import requests
+from datetime import date, timedelta
 from lxml import etree
 from more_itertools import pairwise
 
-from . import faust
-from .faust_logging import logging
+from .bibliography import BiblSource
 from .uris import Witness, Reference
+from .config import config
 
-logger = logging.getLogger(__name__)
+logger = config.getLogger(__name__)
 
-# When there is only one notbefore/notafter border -- how much should we adjust
-HALF_INTERVAL_CORRECTION = datetime.timedelta(365/2)
-
-def parse_datestr(datestr: str) -> datetime.date:
+def parse_datestr(datestr: str) -> date:
     """
     Parses a date str like 1799-01-01 to a date.
 
@@ -31,7 +27,7 @@ def parse_datestr(datestr: str) -> datetime.date:
     if datestr is None:
         return None
 
-    dt = datetime.datetime.strptime(datestr, '%Y-%m-%d')
+    dt = strptime(datestr, '%Y-%m-%d')
     if dt is not None:
         return dt.date()
     else:
@@ -48,118 +44,6 @@ def __init__(self, msg, element: Optional[etree._Element] = None):
         super().__init__(msg)
 
 
-# Entry in the bibliography
-BibEntry = namedtuple('BibEntry', ['uri', 'citation', 'reference', 'weight'])
-
-
-def _parse_bibliography(url: str) -> Dict[str, BibEntry]:
-    """Parses the bibliography file at url. Returns a dictionary mapping an URI to a corresponding bibliography entry."""
-    db: Dict[str, BibEntry] = {}
-    scores = _read_scores()
-    et = etree.fromstring(requests.get(url).content)
-    for bib in et.xpath('//f:bib', namespaces=faust.namespaces):
-        uri = bib.get('uri')
-        citation = bib.find('f:citation', namespaces=faust.namespaces).text
-        reference = bib.find('f:reference', namespaces=faust.namespaces).text
-        db[uri] = BibEntry(uri, citation, reference, scores[uri])
-    return db
-
-
-def _read_scores():
-    """
-    Parses the bibliography score file.
-
-    Returns:
-        Map uri -> score
-
-    """
-    scores = defaultdict(lambda: 1)
-    try:
-        with open('bibscores.tsv', encoding='utf-8') as scorefile:
-            r = csv.reader(scorefile, delimiter='\t')
-            for row in r:
-                try:
-                    scores[row[0]] = int(row[1])
-                except ValueError as e:
-                    logger.warning('Skipping row %s: %s', row, e)
-    except FileNotFoundError as e:
-        logger.warning('Could not read score file: %s. Will use default score', e)
-    return scores
-
-
-bibliography = _parse_bibliography('https://raw.githubusercontent.com/faustedition/faust-gen-html/master/xslt/bibliography.xml')
-
-_bib_labels = {
-    'faust://self': 'Faustedition',
-    'faust://model/inscription': 'Inskription von',
-    'faust://orphan/adoption': 'Datierungsinhalt für',
-    'faust://heuristic': 'künstliche Datierung'
-}
-
-class BiblSource:
-    """
-    A bibliographic source in a macrogenesis XML file.
-    """
-
-    def __init__(self, uri, detail=''):
-        """
-        Creates a bibliographic source.
-        Args:
-            uri: should be a faust://bibliography/ URI or one of the special values
-            detail: detail string like pages
-        """
-        self.uri = uri
-        if detail is None:
-            detail = ''
-        self.detail = detail
-        self.weight = bibliography[uri].weight if uri in bibliography else 1
-
-    def __eq__(self, other):
-        if isinstance(other, BiblSource):
-            return self.uri == other.uri and self.detail == other.detail
-        else:
-            return super().__eq__(other)
-
-    def __hash__(self):
-        return hash(self.uri) ^ hash(self.detail)
-
-    def __str__(self):
-        result = self.citation
-        if self.detail is not None:
-            result += '\n' + self.detail
-        return result
-
-    @property
-    def filename(self):
-        """
-        A string representation of the source (w/o detail) that is usable as part of a filename.
-        """
-        if self.uri.startswith('faust://bibliography'):
-            return self.uri.replace('faust://bibliography/', '')
-        else:
-            return self.uri.replace('faust://', '').replace('/', '-')
-
-    @property
-    def citation(self):
-        """
-        String representation of only the citation, w/o detail.
-
-        Example:
-            Bohnenkamp 19994
-        """
-        if self.uri in bibliography:
-            return bibliography[self.uri].citation
-        elif self.uri in _bib_labels:
-            return _bib_labels[self.uri]
-        else:
-            return self.filename
-
-    @property
-    def long_citation(self):
-        if self.uri in bibliography:
-            return bibliography[self.uri].reference
-        else:
-            return self.citation
 
 
 class _AbstractDating(metaclass=ABCMeta):
@@ -176,11 +60,11 @@ def __init__(self, el: etree._Element):
         Args:
             el: The basic assertion element. This will usually be <relation> or <date>.
         """
-        self.items: List[Reference] = [Witness.get(uri) for uri in el.xpath('f:item/@uri', namespaces=faust.namespaces)]
+        self.items: List[Reference] = [Witness.get(uri) for uri in el.xpath('f:item/@uri', namespaces=config.namespaces)]
         self.sources = tuple(BiblSource(source.get('uri'), source.text)
-                             for source in el.xpath('f:source', namespaces=faust.namespaces))
-        self.comments = tuple(comment.text for comment in el.xpath('f:comment', namespaces=faust.namespaces))
-        self.xmlsource: Tuple[str, int] = (faust.relative_path(el.getroottree().docinfo.URL), el.sourceline)
+                             for source in el.xpath('f:source', namespaces=config.namespaces))
+        self.comments = tuple(comment.text for comment in el.xpath('f:comment', namespaces=config.namespaces))
+        self.xmlsource: Tuple[str, int] = (config.relative_path(el.getroottree().docinfo.URL), el.sourceline)
         self.ignore = el.get('ignore', 'no') == 'yes'
 
     @abstractmethod
@@ -236,7 +120,7 @@ def __init__(self, el: etree._Element):
             raise InvalidDatingError('Backwards dating (%s), this would have caused a conflict' % self, el)
 
     @property
-    def start_attr(self) -> Tuple[str, datetime.date]:
+    def start_attr(self) -> Tuple[str, date]:
         """
         The attribute representing the start of the interval.
 
@@ -246,7 +130,7 @@ def start_attr(self) -> Tuple[str, datetime.date]:
         return _firstattr(self, 'from_', 'when', 'not_before')
 
     @property
-    def end_attr(self) -> Tuple[str, datetime.date]:
+    def end_attr(self) -> Tuple[str, date]:
         """
         The attribute representing the end of the interval.
 
@@ -256,22 +140,22 @@ def end_attr(self) -> Tuple[str, datetime.date]:
         return _firstattr(self, 'to', 'when', 'not_after')
 
     @property
-    def start(self) -> Optional[datetime.date]:
+    def start(self) -> Optional[date]:
         """The start date, regardless of the detailed semantics"""
         return self.start_attr[1]
 
     @property
-    def end(self) -> Optional[datetime.date]:
+    def end(self) -> Optional[date]:
         """The end date, regardless of the detailed semantics"""
         return self.end_attr[1]
 
     @property
-    def date_before(self) -> Optional[datetime.date]:
-        return self.start - datetime.timedelta(days=1) if self.start is not None else None
+    def date_before(self) -> Optional[date]:
+        return self.start - timedelta(days=1) if self.start is not None else None
 
     @property
-    def date_after(self) -> Optional[datetime.date]:
-        return self.end + datetime.timedelta(days=1) if self.end is not None else None
+    def date_after(self) -> Optional[date]:
+        return self.end + timedelta(days=1) if self.end is not None else None
 
     def __str__(self):
         if self.when is not None:
@@ -287,14 +171,17 @@ def add_to_graph(self, G: nx.MultiDiGraph):
         for item in self.items:
             for source in self.sources:
                 if self.start is not None:
-                    G.add_edge(self.date_before, item, kind=self.start_attr[0], source=source, dating=self, xml=self.xmlsource, ignore=self.ignore, comments=self.comments)
+                    G.add_edge(self.date_before, item, kind=self.start_attr[0], source=source, dating=self,
+                               xml=self.xmlsource, ignore=self.ignore, comments=self.comments)
                     if self.end is None and not self.ignore:
-                        G.add_edge(item, self.date_before + HALF_INTERVAL_CORRECTION, kind='not_after', source=BiblSource('faust://heuristic'), xml=self.xmlsource)
+                        G.add_edge(item, self.date_before + timedelta(config.half_interval_correction), kind='not_after',
+                                   source=BiblSource('faust://heuristic'), xml=self.xmlsource)
                 if self.end is not None:
-                    G.add_edge(item, self.date_after, kind=self.end_attr[0], source=source, dating=self, xml=self.xmlsource, ignore=self.ignore, comments=self.comments)
+                    G.add_edge(item, self.date_after, kind=self.end_attr[0], source=source, dating=self,
+                               xml=self.xmlsource, ignore=self.ignore, comments=self.comments)
                     if self.start is None and not self.ignore:
-                        G.add_edge(self.date_after - HALF_INTERVAL_CORRECTION, item, kind='not_before', source=BiblSource('faust://heuristic'), xml=self.xmlsource)
-
+                        G.add_edge(self.date_after - timedelta(config.half_interval_correction), item, kind='not_before',
+                                   source=BiblSource('faust://heuristic'), xml=self.xmlsource)
 
 
 class RelativeDating(_AbstractDating):
@@ -334,9 +221,9 @@ def _parse_file(filename: str) -> Generator[_AbstractDating, None, None]:
 
     """
     tree = etree.parse(filename)
-    for element in tree.xpath('//f:relation', namespaces=faust.namespaces):
+    for element in tree.xpath('//f:relation', namespaces=config.namespaces):
         yield RelativeDating(element)
-    for element in tree.xpath('//f:date', namespaces=faust.namespaces):
+    for element in tree.xpath('//f:date', namespaces=config.namespaces):
         try:
             yield AbsoluteDating(element)
         except InvalidDatingError as e:
@@ -350,7 +237,7 @@ def _parse_files() -> Generator[_AbstractDating, None, None]:
 
     """
 
-    for file in faust.macrogenesis_files():
+    for file in Path(config.data, 'macrogenesis').rglob('**/*.xml'):
         yield from _parse_file(file)
 
 
@@ -360,7 +247,7 @@ def add_timeline_edges(graph):
 
     Afterwards, each date node in the graph will have an edge to the next date represented in the graph.
     """
-    date_nodes = sorted(node for node in graph.nodes if isinstance(node, datetime.date))
+    date_nodes = sorted(node for node in graph.nodes if isinstance(node, date))
     for earlier, later in pairwise(date_nodes):
         if earlier != later and (earlier, later) not in graph.edges:
             graph.add_edge(earlier, later, kind='timeline')
@@ -382,10 +269,11 @@ def simplify_timeline(graph: nx.MultiDiGraph):
                     1798-01-01  ->  1798-02-01
                        `-----> H.x -----^
     """
-    date_nodes = sorted(node for node in graph.nodes if isinstance(node, datetime.date))
+    date_nodes = sorted(node for node in graph.nodes if isinstance(node, date))
     prev = None
     for node in date_nodes:
-        if prev is not None and graph.in_degree(node) == graph.out_degree(node) == 1 and isinstance(graph.successors(node)[0], datetime.date):
+        if prev is not None and graph.in_degree(node) == graph.out_degree(node) == 1 and isinstance(
+                graph.successors(node)[0], date):
             graph.remove_node(node)
         else:
             if prev is not None:
diff --git a/src/macrogen/etc/default.yaml b/src/macrogen/etc/default.yaml
new file mode 100644
index 0000000..60123ca
--- /dev/null
+++ b/src/macrogen/etc/default.yaml
@@ -0,0 +1,24 @@
+## Default configuration for the macrogenesis software
+
+# configuration and data files, either paths or URIs
+data:       data/xml  # XML Data, this is a folder
+logging:    # YAML file with logging configuration
+styles:     # YAML file with styling for the graphviz graphs
+references: # CSV file featuring manual normalizations for references
+bibscores:  # TSV file (FIXME) with scores by source
+sigils:     # JSON file mapping URIs to sigils
+paralipomena: # JSON file mapping paralipomena to sources
+genetic_bar_graph: # JSON file
+report_dir: target/macrogen  # where to save reports and graphs
+
+bibliography: https://raw.githubusercontent.com/faustedition/faust-gen-html/master/xslt/bibliography.xml # The bibliography
+
+## Limits
+half_interval_correction: 182.5    # if we only have a start or end date, the other limit is max. this many days away
+
+## Other data
+namespaces:
+  f: http://www.faustedition.net/ns
+  tei: http://www.tei-c.org/ns/1.0
+
+
diff --git a/src/macrogen/graph.py b/src/macrogen/graph.py
index aaa51d1..418b920 100644
--- a/src/macrogen/graph.py
+++ b/src/macrogen/graph.py
@@ -5,20 +5,20 @@
 
 import csv
 from collections import defaultdict, Counter
+from dataclasses import dataclass
 from datetime import date, timedelta
 from pathlib import Path
-from typing import List, Callable, Any, Dict, Tuple, Union, Hashable, Set
+from typing import List, Callable, Any, Dict, Tuple, Union
 
-import dateutil
 import networkx as nx
-from dataclasses import dataclass
 
-from .datings import base_graph, BiblSource, parse_datestr
-from .faust_logging import logging
+from .bibliography import BiblSource
+from .datings import base_graph, parse_datestr
 from .igraph_wrapper import to_igraph, nx_edges
 from .uris import Reference, Inscription, Witness, AmbiguousRef
+from .config import config
 
-logger = logging.getLogger(__name__)
+logger = config.getLogger(__name__)
 
 EARLIEST = date(1749, 8, 28)
 LATEST = date.today()
@@ -409,32 +409,32 @@ def macrogenesis_graphs() -> MacrogenesisInfo:
         all_conflicting_edges.extend(selfloops)
 
     logger.info('Building DAG from remaining data')
-    dag = working.copy()
-    dag.remove_edges_from(all_conflicting_edges)
+    result_graph = working.copy()
+    result_graph.remove_edges_from(all_conflicting_edges)
 
-    if not nx.is_directed_acyclic_graph(dag):
+    if not nx.is_directed_acyclic_graph(result_graph):
         logger.error('After removing %d conflicting edges, the graph is still not a DAG!', len(all_conflicting_edges))
-        cycles = list(nx.simple_cycles(dag))
+        cycles = list(nx.simple_cycles(result_graph))
         logger.error('It contains %d simple cycles', len(cycles))
     else:
-        logging.info('Double-checking removed edges ...')
+        logger.info('Double-checking removed edges ...')
         for u, v, k, attr in sorted(all_conflicting_edges, key=lambda edge: edge[3].get('weight', 1), reverse=True):
-            dag.add_edge(u, v, **attr)
-            if nx.is_directed_acyclic_graph(dag):
+            result_graph.add_edge(u, v, **attr)
+            if nx.is_directed_acyclic_graph(result_graph):
                 all_conflicting_edges.remove((u, v, k, attr))
-                logging.info('Added edge %s -> %s (%d) back without introducing a cycle.', u, v, attr.get('weight', 1))
+                logger.info('Added edge %s -> %s (%d) back without introducing a cycle.', u, v, attr.get('weight', 1))
             else:
-                dag.remove_edge(u, v)
+                result_graph.remove_edge(u, v)
 
     logger.info('Marking %d conflicting edges for deletion', len(all_conflicting_edges))
     mark_edges_to_delete(base, all_conflicting_edges)
 
     logger.info('Removed %d of the original %d edges', len(all_conflicting_edges), len(working.edges))
 
-    closure = nx.transitive_closure(dag)
+    closure = nx.transitive_closure(result_graph)
     add_inscription_links(base)
 
-    return MacrogenesisInfo(base, working, dag, closure, conflicts)
+    return MacrogenesisInfo(base, working, result_graph, closure, conflicts)
 
 
 def cleanup_graph(A: nx.MultiDiGraph) -> nx.MultiDiGraph:
diff --git a/src/macrogen/main.py b/src/macrogen/main.py
index 8386d5d..f8d3339 100755
--- a/src/macrogen/main.py
+++ b/src/macrogen/main.py
@@ -1,14 +1,13 @@
 #!/usr/bin/env python3
 
-from .faust_logging import logging
-
 import sys
 
+from macrogen.config import config
 from . import graph
 from . import report
 from .visualize import render_all
 
-logger = logging.getLogger('main')
+logger = config.getLogger('main')
 
 
 def main(argv=sys.argv):
diff --git a/src/macrogen/report.py b/src/macrogen/report.py
index cf18fa2..86d2f9e 100644
--- a/src/macrogen/report.py
+++ b/src/macrogen/report.py
@@ -1,4 +1,5 @@
 import json
+from collections import defaultdict, Counter
 from datetime import date, datetime
 from itertools import chain, repeat, groupby
 from operator import itemgetter
@@ -9,24 +10,20 @@
 from lxml.etree import Comment
 from more_itertools import pairwise
 
-from .faust_logging import logging
-
 import csv
-from collections.__init__ import defaultdict, Counter
 from html import escape
 from pathlib import Path
 from typing import Iterable, List, Dict, Mapping, Tuple, Sequence, Union, Generator, Any, Optional
 
 import networkx as nx
 
-from . import faust
-from .datings import BiblSource
+from .config import config
+from .bibliography import BiblSource
 from .graph import MacrogenesisInfo, pathlink, EARLIEST, LATEST, DAY
 from .uris import Reference, Witness, Inscription, UnknownRef, AmbiguousRef
 from .visualize import write_dot, simplify_graph
 
-logger = logging.getLogger(__name__)
-target = Path(faust.config.get('macrogenesis', 'output-dir'))
+logger = config.getLogger(__name__)
 
 RELATION_LABELS = {'not_before': 'nicht vor',
                    'not_after': 'nicht nach',
@@ -242,6 +239,7 @@ def write_html(filename: Path, content: str, head: str = None, breadcrumbs: List
 
 
 def report_components(graphs: MacrogenesisInfo):
+    target = config.path.report_dir
     logger.info('Writing component overview to %s', target)
     target.mkdir(parents=True, exist_ok=True)
     report = f"""<h3>{len(graphs.conflicts)} stark zusammenhängende Komponenten</h3>
diff --git a/src/macrogen/uris.py b/src/macrogen/uris.py
index d050096..3230578 100644
--- a/src/macrogen/uris.py
+++ b/src/macrogen/uris.py
@@ -4,8 +4,6 @@
 """
 from pathlib import Path
 
-from .faust_logging import logging
-
 import codecs
 import csv
 import json
@@ -21,9 +19,9 @@
 import requests
 from lxml import etree
 
-from . import faust
+from .config import config
 
-logger = logging.getLogger(__name__)
+logger = config.getLogger(__name__)
 
 
 def call_recorder(function=None, argument_picker=None):
@@ -408,13 +406,14 @@ def filename(self):
 
 def _collect_wits():
     items = defaultdict(list)  # type: Dict[Union[Witness, Inscription, UnknownRef], List[Tuple[str, int]]]
-    for macrogenetic_file in faust.macrogenesis_files():
+    macrogenesis_files = list(config.path.data.join('macrogenesis').rglob('**/*.xml'))
+    for macrogenetic_file in macrogenesis_files:
         tree = etree.parse(macrogenetic_file)  # type: etree._ElementTree
-        for element in tree.xpath('//f:item', namespaces=faust.namespaces):  # type: etree._Element
+        for element in tree.xpath('//f:item', namespaces=config.namespaces):  # type: etree._Element
             uri = element.get('uri')
             wit = Witness.get(uri, allow_duplicate=True)
             items[wit].append((macrogenetic_file.split('macrogenesis/')[-1], element.sourceline))
-    logger.info('Collected %d references in %d macrogenesis files', len(items), len(faust.macrogenesis_files()))
+    logger.info('Collected %d references in %d macrogenesis files', len(items), len(macrogenesis_files))
     return items
 
 
diff --git a/src/macrogen/visualize.py b/src/macrogen/visualize.py
index 500da9c..3e73e21 100644
--- a/src/macrogen/visualize.py
+++ b/src/macrogen/visualize.py
@@ -1,3 +1,4 @@
+from collections import Sequence
 from datetime import date
 from multiprocessing.pool import Pool
 from pathlib import Path
@@ -8,7 +9,8 @@
 from pygraphviz import AGraph
 from tqdm import tqdm
 
-from .datings import BiblSource, add_timeline_edges
+from .datings import add_timeline_edges
+from macrogen import BiblSource
 from .faust_logging import logging
 from .graph import pathlink
 from .uris import Reference
@@ -53,16 +55,39 @@ def _simplify_attrs(attrs):
                 attrs[key + '_detail'] = value.detail
         elif value is None:
             del attrs[key]
+        elif isinstance(value, Sequence) and not isinstance(value, str):
+            attrs[key] = " ".join(item.uri if hasattr(item, 'uri') else str(item) for item in value)
         elif type(value) not in {str, int, float, bool}:
             attrs[key] = str(value)
 
 
 def _load_style(filename):
+    """
+    Loads a YAML Style file for :func:`write_doc`.
+    :param filename: Path to a YAML file with style directions
+    :return: dictionary with style directions
+    """
     with open(filename, encoding='utf-8') as f:
         return yaml.load(f)
 
 
-def write_dot(graph: nx.MultiDiGraph, target='base_graph.dot', style=_load_style('styles.yaml'), highlight=None, record='auto', edge_labels=True):
+def write_dot(graph: nx.MultiDiGraph, target='base_graph.dot', style=_load_style('styles.yaml'),
+              highlight=None, record='auto', edge_labels=True):
+    """
+    Writes a properly styled graphviz file for the given graph.
+
+    Args:
+        graph: the subgraph to draw
+        target: dot file that should be written, may be a Path
+        style (dict): rules for styling the graph
+        highlight: if a node, highlight that in the graph. If a tuple of nodes, highlight the shortest path(s) from the
+                   first to the second node
+        record: record in the queue for `render_all`. If ``"auto"`` dependent on graph size
+        edge_labels (bool): Should we paint edge labels?
+
+    Returns:
+        None.
+    """
     logger.info('Writing %s ...', target)
     target_path = Path(target)
     target_path.parent.mkdir(exist_ok=True, parents=True)
diff --git a/tests/test_config.py b/tests/test_config.py
new file mode 100644
index 0000000..79d9214
--- /dev/null
+++ b/tests/test_config.py
@@ -0,0 +1,22 @@
+import pytest
+
+from macrogen.config import CachedFile
+
+
+@pytest.fixture(scope='session')
+def cache_dir(tmp_path_factory):
+    return tmp_path_factory.mktemp('cache')
+
+
+def test_cf_url(cache_dir):
+    cf = CachedFile('http://faustedition.net/data/paralipomena.js', cache_dir)
+    assert cf.is_url
+    assert cf.path.name == 'paralipomena.js'
+
+
+def test_cf_open(cache_dir):
+    cf = CachedFile('http://faustedition.net/data/paralipomena.js', cache_dir)
+    with cf.open() as content:
+        text = content.read()
+        assert text
+        assert cf.path.exists()
diff --git a/tests/test_main.py b/tests/test_main.py
index 9111fba..fdf7f85 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -1,7 +1,7 @@
 import networkx as nx
 import pytest
 
-from macrogen.datings import BiblSource
+from macrogen import BiblSource
 from macrogen.graph import collapse_edges
 from macrogen.uris import Witness