Skip to content

Commit

Permalink
#71 Allow Environment vars to substitute/override config vars
Browse files Browse the repository at this point in the history
  • Loading branch information
justb4 committed Apr 3, 2018
1 parent 755d449 commit 33c9272
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 5 deletions.
31 changes: 27 additions & 4 deletions docs/using.rst
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,12 @@ or even OGC WPS servers (planned).
Reusable Stetl Configs
----------------------
What we saw in the last example is that it is hard to reuse this `etl.cfg` when we have for example a different input file
or want to map to different output files. For this Stetl supports `parameter substitution`. Here command line parameters are substituted
for variables in `etl.cfg`. A variable is declared between curly brackets like `{out_xml}`. See
or want to map to different output files. For this Stetl supports `config parameter substitution`.

Dynamic or secret (e.g. database credentials) parameters in `etl.cfg` are declared
symbolically and substituted at runtime via the commandline or the OS environment (new in 2018).

A variable is declared between curly brackets like `{out_xml}`. See
example `6_cmdargs <https://github.com/geopython/stetl/tree/master/examples/basics/6_cmdargs>`_. ::

[etl]
Expand All @@ -259,7 +263,7 @@ One, passing the arguments on the commandline, like ::

stetl -c etl.cfg -a "in_xml=input/cities.xml in_xsl=cities2gml.xsl out_xml=output/gmlcities.gml"

Two, passing the arguments in a properties file, here called `etl.args` (the name of the suffix .args is not significant). ::
Two, passing the arguments in a properties file, here called `etl.args` (the name of the suffix .args is not significant, could be .env as well). ::

stetl -c etl.cfg -a etl.args

Expand All @@ -270,7 +274,26 @@ Where the content of the `etl.args` properties file is: ::
in_xsl=cities2gml.xsl
out_xml=output/gmlcities.gml

This makes an ETL chain highly reusable. A very elaborate Stetl config with parameter substitution can be seen in the
A third way is to pass these key/value pairs (partly) as OS Environment variables.
This is especially handy in Docker-based deployments like Docker Compose and Kubernetes.
In this case the variable names need to be prepended with `STETL_` or `stetl_` as
to not mix-up with other non-related OS-env vars. A mixture of commandline args (file)
and environment vars is possible. The rule is that *OS Environment variables always take prevalence*.

For example, the above args could also be passed as follows: ::

export stetl_in_xml="input/cities.xml"
export stetl_in_xsl="cities2gml.xsl"
export stetl_out_xml="output/gmlcities.gml"
stetl -c etl.cfg

or only override the input file name from `etl.args`: ::

export stetl_in_xml="input/cities2.xml"
stetl -c etl.cfg -a etl.args

This makes an ETL chain highly reusable.
A very elaborate Stetl config with parameter substitution can be seen in the
`Top10NL ETL <https://github.com/geopython/stetl/blob/master/examples/top10nl/etl-top10nl.cfg>`_.

Connection Compatibility
Expand Down
Binary file modified examples/basics/12_gdal_ogr/output/cities.dbf
Binary file not shown.
Binary file modified examples/basics/3_shape/output/gmlcities.dbf
Binary file not shown.
25 changes: 25 additions & 0 deletions stetl/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(self, options_dict, args_dict=None):
self.configdict = ConfigParser()

sys.path.append(ETL.CONFIG_DIR)
args_dict = self.env_expand_args_dict(args_dict)

try:
log.info("Reading config_file = %s" % config_file)
Expand All @@ -74,6 +75,30 @@ def __init__(self, options_dict, args_dict=None):
except Exception as e:
log.error("Fatal Error reading config file: err=%s" % str(e))

def env_expand_args_dict(self, args_dict):
"""
Expand values in dict with equivalent values from the
OS Env. NB vars in OS Env should be prefixed with STETL_ or stetl_
as to get overrides by accident.
:return: expanded args_dict or None
"""

env_dict = os.environ
for name in env_dict:
if name.lower().startswith('stetl_'):
# Get real key, e.g. "STETL_HOST" becomes "HOST"
# "stetl_host" becomes "host".
args_key = '_'.join(name.split('_')[1:])
args_value = env_dict[name]
if not args_dict:
args_dict = dict()

# Set: optionally override any existing value
args_dict[args_key] = args_value

return args_dict

def run(self):
# The main ETL processing
log.info("START")
Expand Down
12 changes: 12 additions & 0 deletions tests/configs/copy_in_out_file.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copies an input file to an output file using substitutable args.

[etl]
chains = input_file|output_file

[input_file]
class = inputs.fileinput.XmlFileInput
file_path = {in_file}

[output_file]
class = outputs.fileoutput.FileOutput
file_path = {out_file}
60 changes: 60 additions & 0 deletions tests/test_args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# testing: to be called by nosetests

import os

from stetl.etl import ETL
from tests.stetl_test_case import StetlTestCase


class ConfigTest(StetlTestCase):
"""Basic configuration tests"""

def setUp(self):
super(ConfigTest, self).setUp()

try:
del os.environ['stetl_out_file']
del os.environ['stetl_in_file']
except:
pass

# Initialize Stetl
self.curr_dir = os.path.dirname(os.path.realpath(__file__))
self.cfg_dict = {'config_file': os.path.join(self.curr_dir, 'configs/copy_in_out_file.cfg')}

def test_args_dict(self):
args_dict = {'in_file': 'infile.txt', 'out_file': 'outfile.txt'}
etl = ETL(self.cfg_dict, args_dict)

# Test args substitution from args_dict
self.assertEqual(etl.configdict.get('input_file', 'file_path'), 'infile.txt')
self.assertEqual(etl.configdict.get('output_file', 'file_path'), 'outfile.txt')

def test_args_dict_env_override(self):
args_dict = {'in_file': 'infile.txt', 'out_file': 'outfile.txt'}

# Override in OS env
os.environ['stetl_in_file'] = 'env_infile.txt'

etl = ETL(self.cfg_dict, args_dict)

# Test args substitution from args_dict
self.assertEqual(etl.configdict.get('input_file', 'file_path'), os.environ['stetl_in_file'])
self.assertEqual(etl.configdict.get('output_file', 'file_path'), 'outfile.txt')

def test_args_dict_env_all(self):
"""
Substitute ALL args from OS env.
:return:
"""

# Set all args in in OS env
os.environ['stetl_in_file'] = 'env_infile.txt'
os.environ['stetl_out_file'] = 'env_outfile.txt'

args_dict = None
etl = ETL(self.cfg_dict, args_dict)

# Test args substitution from args_dict
self.assertEqual(etl.configdict.get('input_file', 'file_path'), os.environ['stetl_in_file'])
self.assertEqual(etl.configdict.get('output_file', 'file_path'), os.environ['stetl_out_file'])
3 changes: 2 additions & 1 deletion tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from stetl.etl import ETL
from tests.stetl_test_case import StetlTestCase


class ConfigTest(StetlTestCase):
"""Basic configuration tests"""

Expand All @@ -18,6 +19,6 @@ def setUp(self):

def test_type(self):
self.assertEqual(self.etl.configdict.get('etl', 'chains'), 'input_xml_file|output_std')

def test_run(self):
self.etl.run()

0 comments on commit 33c9272

Please sign in to comment.