Implement predefined sets of parameters using "presets.json"

earthobservations · Mar 14, 2018 · c100464 · c100464
1 parent 548aec9
commit c100464
Show file tree

Hide file tree

Showing 8 changed files with 75 additions and 10 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -8,6 +8,9 @@ development
 - More compact date output for "tabular" mode
 - Fix datetime coercion when encountering invalid datetime values
 - Humanize searching in observation and forecast data for stations, species, phases and quality information
+- Implement predefined sets of parameters using ``presets.json``.
+  Apply with e.g. "--species-preset=mellifera-de-primary".
+  See also https://community.hiveeyes.org/t/phanologischer-kalender-entwicklung/664/23.
 
 2018-03-14 0.5.0
 ================

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -5,3 +5,4 @@ exclude .bumpversion.cfg
 exclude .gitignore
 exclude Makefile
 exclude MANIFEST.in
+recursive-include phenodata *.json
diff --git a/README.rst b/README.rst
@@ -304,7 +304,9 @@ Same observations but only value which have seen corrections::
 
 Forecasting
 ===========
-Forecast based on "beginning of flowering" events of 2015-2017 in Thüringen and Bayern for the given list of species::
+Forecast based on "beginning of flowering" events of 2015-2017 in Thüringen and Bayern for the given list of species.
+Sort by species and date.
+::
 
     phenodata forecast \
         --source=dwd --dataset=annual --partition=recent \
@@ -315,6 +317,23 @@ Forecast based on "beginning of flowering" events of 2015-2017 in Thüringen and
         --humanize --language=german \
         --sort=Spezies,Datum
 
+Forecast based on "beginning of flowering" events of 2015-2017 in Berlin for the named list of species "mellifera-eu-primary".
+Sort by date.
+::
+
+    phenodata forecast \
+        --source=dwd --dataset=annual --partition=recent \
+        --station=berlin \
+        --phase="beginning of flowering" \
+        --year=2015,2016,2017 \
+        --humanize --language=german \
+        --sort=Datum \
+        --species-preset=mellifera-de-primary
+
+.. note::
+
+    The species presets like ``mellifera-eu-primary`` are currently stored in
+    `presets.json <https://github.com/hiveeyes/phenodata/blob/master/phenodata/dwd/presets.json>`__.
 
 
 *******************

diff --git a/phenodata/command.py b/phenodata/command.py
@@ -8,7 +8,7 @@
 from phenodata.ftp import FTPSession
 from phenodata.dwd.cdc import DwdCdcClient
 from phenodata.dwd.pheno import DwdPhenoData, DwdPhenoDataHumanizer
-from phenodata.util import boot_logging, normalize_options
+from phenodata.util import boot_logging, normalize_options, options_convert_lists
 
 """
 phenodata is a data acquisition and manipulation toolkit for open access phenology data.
@@ -31,7 +31,7 @@ def run():
       phenodata list-quality-bytes --source=dwd [--format=csv]
       phenodata list-filenames --source=dwd --dataset=immediate --partition=recent [--filename=Hasel,Schneegloeckchen] [--year=2017]
       phenodata list-urls --source=dwd --dataset=immediate --partition=recent [--filename=Hasel,Schneegloeckchen] [--year=2017]
-      phenodata (observations|forecast) --source=dwd --dataset=immediate --partition=recent [--filename=Hasel,Schneegloeckchen] [--station-id=164,717] [--species-id=113,127] [--phase-id=5] [--quality-level=10] [--quality-byte=1,2,3] [--station=berlin,brandenburg] [--species=hazel,snowdrop] [--phase=flowering] [--quality=blubb] [--year=2017] [--humanize] [--show-ids] [--language=german] [--long-station] [--sort=Datum] [--format=csv]
+      phenodata (observations|forecast) --source=dwd --dataset=immediate --partition=recent [--filename=Hasel,Schneegloeckchen] [--station-id=164,717] [--species-id=113,127] [--phase-id=5] [--quality-level=10] [--quality-byte=1,2,3] [--station=berlin,brandenburg] [--species=hazel,snowdrop] [--species-preset=mellifera-primary] [--phase=flowering] [--quality=ROUTKLI] [--year=2017] [--humanize] [--show-ids] [--language=german] [--long-station] [--sort=Datum] [--format=csv]
       phenodata --version
       phenodata (-h | --help)
 
@@ -51,6 +51,7 @@ def run():
       --station=<station>       Filter by strings from "stations" data (comma-separated list)
       --species=<species>       Filter by strings from "species" data (comma-separated list)
       --phase=<phase>           Filter by strings from "phases" data (comma-separated list)
+      --species-preset=<preset> Filter by strings from "species" data (comma-separated list) loaded from ``presets.json`` file
 
     Data output options:
       --format=<format>         Output data in designated format. Choose one of "tabular", "json", "csv" or "string".
@@ -76,7 +77,15 @@ def run():
     boot_logging(options)
 
     # Normalize commandline options
-    options = normalize_options(options, encoding='utf-8', list_items=[
+    options = normalize_options(options, encoding='utf-8')
+
+    # Expand options
+    preset_name = options['species-preset']
+    if preset_name:
+        options['species'] = DwdPhenoData.load_preset('options', 'species', preset_name)
+
+    # Coerce comma-separated list fields
+    options_convert_lists(options, list_items=[
 
         # Acquisition parameters
         'filename',
@@ -117,7 +126,9 @@ def run():
         humanizer = DwdPhenoDataHumanizer(language=options['language'], long_station=options['long-station'], show_ids=options['show-ids'])
         client = DwdPhenoData(cdc=cdc_client, humanizer=humanizer, dataset=options.get('dataset'))
     else:
-        raise DocoptExit('Data source "{}" not implemented'.format(options['source']))
+        message = 'Data source "{}" not implemented'.format(options['source'])
+        logger.error(message)
+        raise DocoptExit(message)
 
     # Dispatch command
     data = None

diff --git a/phenodata/dwd/pheno.py b/phenodata/dwd/pheno.py
@@ -1,8 +1,10 @@
 # -*- coding: utf-8 -*-
 # (c) 2018 Andreas Motl <andreas@hiveeyes.org>
+import json
 import attr
 import logging
 import pandas as pd
+import pkg_resources
 from datetime import datetime
 from phenodata.util import haversine_distance
 
@@ -411,6 +413,17 @@ def scan_files(self, partition, include=None, field=None):
 
         return results
 
+    @classmethod
+    def load_preset(cls, section, option, name):
+        resource = pkg_resources.resource_stream(__name__, 'presets.json')
+        presets = json.load(resource)
+        try:
+            value = presets[section][option][name]
+            return value
+        except KeyError:
+            message = 'Preset "{}" not found in file "{}"'.format(name, resource.name)
+            logger.error(message)
+            raise KeyError(message)
 
 @attr.s
 class DwdPhenoDataHumanizer(object):

diff --git a/phenodata/dwd/presets.json b/phenodata/dwd/presets.json
@@ -0,0 +1,12 @@
+{
+    "options": {
+        "species": {
+            "mellifera-de-primary": "Hasel, Schneeglöckchen, Sal-Weide, Löwenzahn, Süßkirsche, Apfel, Winterraps, Robinie, Winter-Linde, Heidekraut",
+            "mellifera-de-secondary": "Kornellkirsche, Traubenkirsche, Stachelbeere, Johannisbeere, Schlehe, Huflattich, Ahorn, Sonnenblume, Birne, Schwarz-Erle, Rosskastanie",
+            "mellifera-de-honeydew": "Fichte, Kiefer, Tanne, Ahorn, Birke, Birne, Linde, Eiche, Kastanie, Holunder",
+            "mellifera-de-forensic": "Mais, Raps, Sonnenblume",
+            "mellifera-de-interests": "Pflaume, Himbeere, Brombeere, Traubenkirsche, Zweigriffliger Weißdorn",
+            "mellifera-de-primary-openhive": "Winterraps, Sonnenblume, Mais, Apfel, Birne, Süßkirsche, Sauerkirsche, Himbeere, Brombeere, Hasel, Heidekraut, Löwenzahn, Robinie, Rosskastanie, Sal-Weide, Spitz-Ahorn, Winter-Linde"
+        }
+    }
+}
diff --git a/phenodata/util.py b/phenodata/util.py
@@ -22,7 +22,6 @@ def setup_logging(level=logging.INFO):
 
 def normalize_options(options, encoding=None, list_items=None):
     normalized = {}
-    list_items = list_items or []
     for key, value in options.items():
 
         # Sanitize key
@@ -32,16 +31,20 @@ def normalize_options(options, encoding=None, list_items=None):
         if encoding and (type(value) is str):
             value = value.decode(encoding)
 
+        normalized[key] = value
+
+    return normalized
+
+def options_convert_lists(options, list_items=None):
+    list_items = list_items or []
+    for key, value in options.items():
         # Decode list options
         if key in list_items:
             if value is None:
                 value = []
             else:
                 value = read_list(value)
-
-        normalized[key] = value
-
-    return normalized
+            options[key] = value
 
 def to_list(obj):
     """Convert an object to a list if it is not already one"""

diff --git a/setup.py b/setup.py
@@ -62,6 +62,9 @@
     packages=find_packages(),
     include_package_data=True,
     package_data={
+        'phenodata': [
+            'dwd/*.json',
+        ],
     },
     zip_safe=False,
     test_suite='nose.collector',