Skip to content

Commit

Permalink
Merge PR #69 from fsteggink/stetl_bgt_improvements
Browse files Browse the repository at this point in the history
Stetl improvements, coming out of NLExtract BGT enhancements.
  • Loading branch information
justb4 committed Feb 27, 2018
2 parents e0099b9 + 07670a6 commit d50832b
Show file tree
Hide file tree
Showing 10 changed files with 302 additions and 4 deletions.
53 changes: 53 additions & 0 deletions stetl/filters/execfilter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Executes the given command and returns the captured output.
#
# Author: Frank Steggink
#
import subprocess
import os
from stetl.filter import Filter
from stetl.util import Util
from stetl.packet import FORMAT

log = Util.get_log('execfilter')


class ExecFilter(Filter):
"""
Executes any command (abstract base class).
"""

def __init__(self, configdict, section, consumes, produces):
Filter.__init__(self, configdict, section, consumes, produces)

def invoke(self, packet):
return packet

def execute_cmd(self, cmd):
use_shell = True
if os.name == 'nt':
use_shell = False

log.info("executing cmd=%s" % cmd)
result = subprocess.check_output(cmd, shell=use_shell)
log.info("execute done")
return result


class CommandExecFilter(ExecFilter):
"""
Executes an arbitrary command and captures the output
consumes=FORMAT.string, produces=FORMAT.string
"""

def __init__(self, configdict, section):
ExecFilter.__init__(self, configdict, section, consumes=FORMAT.string, produces=FORMAT.string)

def invoke(self, packet):
if packet.data is not None:
packet.data = self.execute_cmd(packet.data)

return packet
63 changes: 63 additions & 0 deletions stetl/filters/regexfilter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Extracts data from a string using a regular expression and generates a record.
#
# Author: Frank Steggink

from stetl.component import Config
from stetl.filter import Filter
from stetl.packet import FORMAT
from stetl.util import Util
import re

log = Util.get_log("regexfilter")


class RegexFilter(Filter):
"""
Extracts data from a string using a regular expression and returns the named groups as a record.
consumes=FORMAT.string, produces=FORMAT.record
"""

# Start attribute config meta
# Applying Decorator pattern with the Config class to provide
# read-only config values from the configured properties.

@Config(ptype=str, default=None, required=True)
def pattern_string(self):
"""
Regex pattern string. Should contain named groups.
"""
pass

# End attribute config meta

# Constructor
def __init__(self, configdict, section, consumes=FORMAT.string, produces=FORMAT.record):
Filter.__init__(self, configdict, section, consumes, produces)

self.regex_object = re.compile(self.pattern_string, re.S)

def init(self):
log.info('Init: regex filter')
if self.pattern_string is None:
# If no pattern_string is present:
err_s = 'The pattern_string needs to be configured'
log.error(err_s)
raise ValueError('The pattern_string needs to be configured')

def exit(self):
log.info('Exit: regex filter')

def invoke(self, packet):
if packet.data is None:
return packet

m = self.regex_object.match(packet.data)
if m is not None:
packet.data = m.groupdict()
else:
packet.data = {}

return packet
21 changes: 18 additions & 3 deletions stetl/filters/templatingfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,15 @@ class StringTemplatingFilter(TemplatingFilter):
consumes=FORMAT.record or FORMAT.record_array, produces=FORMAT.string
"""

@Config(ptype=bool, default=False, required=False)
def safe_substitution(self):
"""
Apply safe substitution? With this method, string.Template.safe_substitute will be invoked, instead of
string.Template.substitute. If placeholders are missing from mapping and keywords, instead of raising an
exception, the original placeholder will appear in the resulting string intact.
"""
pass

def __init__(self, configdict, section):
TemplatingFilter.__init__(self, configdict, section, consumes=[FORMAT.record, FORMAT.record_array])

Expand All @@ -111,10 +120,16 @@ def create_template(self):
self.template = Template(self.template_string)

def render_template(self, packet):
if type(packet.data) is list:
packet.data = [self.template.substitute(item) for item in packet.data]
if self.safe_substitution:
if type(packet.data) is list:
packet.data = [self.template.safe_substitute(item) for item in packet.data]
else:
packet.data = self.template.safe_substitute(packet.data)
else:
packet.data = self.template.substitute(packet.data)
if type(packet.data) is list:
packet.data = [self.template.substitute(item) for item in packet.data]
else:
packet.data = self.template.substitute(packet.data)

return packet

Expand Down
9 changes: 8 additions & 1 deletion stetl/filters/zipfileextractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ def file_path(self):
"""
pass

@Config(ptype=bool, default=True, required=False)
def delete_file(self):
"""
Delete the file when the chain has been completed?
"""
pass

# End attribute config meta

# Constructor
Expand Down Expand Up @@ -58,7 +65,7 @@ def invoke(self, packet):

def after_chain_invoke(self, packet):
import os.path
if os.path.isfile(self.cur_file_path):
if os.path.isfile(self.cur_file_path) and self.delete_file:
os.remove(self.cur_file_path)

return True
1 change: 1 addition & 0 deletions tests/data/commandexecfilter.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python -c "print '{0}/{1}'.format('foo','bar')"
48 changes: 48 additions & 0 deletions tests/data/ogrinfo_output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
INFO: Open of `bgt_gebouwinstallatie.gml'
using driver `GML' successful.

Layer name: BuildingInstallation
Geometry: Curve Polygon
Feature Count: 1162
Extent: (93971.990000, 433941.050000) - (96020.190000, 436004.350000)
Layer SRS WKT:
PROJCS["Amersfoort / RD New",
GEOGCS["Amersfoort",
DATUM["Amersfoort",
SPHEROID["Bessel 1841",6377397.155,299.1528128,
AUTHORITY["EPSG","7004"]],
TOWGS84[565.2369,50.0087,465.658,-0.406857,0.350733,-1.87035,4.0812],
AUTHORITY["EPSG","6289"]],
PRIMEM["Greenwich",0,
AUTHORITY["EPSG","8901"]],
UNIT["degree",0.0174532925199433,
AUTHORITY["EPSG","9122"]],
AXIS["Latitude",NORTH],
AXIS["Longitude",EAST],
AUTHORITY["EPSG","4289"]],
PROJECTION["Oblique_Stereographic"],
PARAMETER["latitude_of_origin",52.15616055555555],
PARAMETER["central_meridian",5.38763888888889],
PARAMETER["scale_factor",0.9999079],
PARAMETER["false_easting",155000],
PARAMETER["false_northing",463000],
UNIT["metre",1,
AUTHORITY["EPSG","9001"]],
AXIS["X",EAST],
AXIS["Y",NORTH],
AUTHORITY["EPSG","28992"]]
gml_id: String (0.0) NOT NULL
creationDate: String (10.0)
LV-publicatiedatum: String (23.0)
relatieveHoogteligging: Integer (0.0)
inOnderzoek: Integer(Boolean) (0.0)
tijdstipRegistratie: String (23.0)
namespace: String (8.0)
lokaalID: String (38.0)
bronhouder: String (5.0)
bgt-status: String (8.0)
plus-status: String (10.0)
function: String (8.0)
plus-typeGebouwInstallatie: String (12.0)
terminationDate: String (10.0)
eindRegistratie: String (23.0)
17 changes: 17 additions & 0 deletions tests/filters/configs/commandexecfilter.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Config file for unit testing CommandExecFilter.

[etl]
chains = input_string_file|command_executor|packet_buffer|output_std

[input_string_file]
class = inputs.fileinput.StringFileInput
file_path = tests/data/commandexecfilter.txt

[command_executor]
class = filters.execfilter.CommandExecFilter

[packet_buffer]
class = filters.packetbuffer.PacketBuffer

[output_std]
class = outputs.standardoutput.StandardOutput
18 changes: 18 additions & 0 deletions tests/filters/configs/regexfilter.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Config file for unit testing RegexFilter.

[etl]
chains = input_string_file|regex_filter|packet_buffer|output_std

[input_string_file]
class = inputs.fileinput.StringFileInput
file_path = tests/data/ogrinfo_output.txt

[regex_filter]
class = filters.regexfilter.RegexFilter
pattern_string = .*Layer name: (\w+:)?(?P<elemtype>\w+).*Feature Count: (?P<featurecount>[0-9]+).*

[packet_buffer]
class = filters.packetbuffer.PacketBuffer

[output_std]
class = outputs.standardoutput.StandardOutput
38 changes: 38 additions & 0 deletions tests/filters/test_command_exec_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os

from stetl.etl import ETL
from stetl.filters.packetbuffer import PacketBuffer
from stetl.filters.execfilter import CommandExecFilter
from tests.stetl_test_case import StetlTestCase

class CommandExecFilterTest(StetlTestCase):
"""Unit tests for CommandExecFilter"""

def setUp(self):
super(CommandExecFilterTest, self).setUp()

# Initialize Stetl
curr_dir = os.path.dirname(os.path.realpath(__file__))
cfg_dict = {'config_file': os.path.join(curr_dir, 'configs/commandexecfilter.cfg')}
self.etl = ETL(cfg_dict)

def test_class(self):
chain = StetlTestCase.get_chain(self.etl)
section = StetlTestCase.get_section(chain, 1)
class_name = self.etl.configdict.get(section, 'class')

self.assertEqual('filters.execfilter.CommandExecFilter', class_name)

def test_instance(self):
chain = StetlTestCase.get_chain(self.etl)

self.assertTrue(isinstance(chain.get_by_index(1), CommandExecFilter))

def test_execute(self):
chain = StetlTestCase.get_chain(self.etl)
chain.run()

buffer_filter = chain.get_by_class(PacketBuffer)
packet_list = buffer_filter.packet_list

self.assertEqual(packet_list[0].data.strip(), "foo/bar")
38 changes: 38 additions & 0 deletions tests/filters/test_regex_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os

from stetl.etl import ETL
from stetl.filters.packetbuffer import PacketBuffer
from stetl.filters.regexfilter import RegexFilter
from tests.stetl_test_case import StetlTestCase

class RegexFilterTest(StetlTestCase):
"""Unit tests for RegexFilter"""

def setUp(self):
super(RegexFilterTest, self).setUp()

# Initialize Stetl
curr_dir = os.path.dirname(os.path.realpath(__file__))
cfg_dict = {'config_file': os.path.join(curr_dir, 'configs/regexfilter.cfg')}
self.etl = ETL(cfg_dict)

def test_class(self):
chain = StetlTestCase.get_chain(self.etl)
section = StetlTestCase.get_section(chain, 1)
class_name = self.etl.configdict.get(section, 'class')

self.assertEqual('filters.regexfilter.RegexFilter', class_name)

def test_instance(self):
chain = StetlTestCase.get_chain(self.etl)

self.assertTrue(isinstance(chain.get_by_index(1), RegexFilter))

def test_execute(self):
chain = StetlTestCase.get_chain(self.etl)
chain.run()

buffer_filter = chain.get_by_class(PacketBuffer)
packet_list = buffer_filter.packet_list

self.assertEqual(str(packet_list[0].data), "{'elemtype': 'BuildingInstallation', 'featurecount': '1162'}")

0 comments on commit d50832b

Please sign in to comment.