-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge PR #69 from fsteggink/stetl_bgt_improvements
Stetl improvements, coming out of NLExtract BGT enhancements.
- Loading branch information
Showing
10 changed files
with
302 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Executes the given command and returns the captured output. | ||
# | ||
# Author: Frank Steggink | ||
# | ||
import subprocess | ||
import os | ||
from stetl.filter import Filter | ||
from stetl.util import Util | ||
from stetl.packet import FORMAT | ||
|
||
log = Util.get_log('execfilter') | ||
|
||
|
||
class ExecFilter(Filter): | ||
""" | ||
Executes any command (abstract base class). | ||
""" | ||
|
||
def __init__(self, configdict, section, consumes, produces): | ||
Filter.__init__(self, configdict, section, consumes, produces) | ||
|
||
def invoke(self, packet): | ||
return packet | ||
|
||
def execute_cmd(self, cmd): | ||
use_shell = True | ||
if os.name == 'nt': | ||
use_shell = False | ||
|
||
log.info("executing cmd=%s" % cmd) | ||
result = subprocess.check_output(cmd, shell=use_shell) | ||
log.info("execute done") | ||
return result | ||
|
||
|
||
class CommandExecFilter(ExecFilter): | ||
""" | ||
Executes an arbitrary command and captures the output | ||
consumes=FORMAT.string, produces=FORMAT.string | ||
""" | ||
|
||
def __init__(self, configdict, section): | ||
ExecFilter.__init__(self, configdict, section, consumes=FORMAT.string, produces=FORMAT.string) | ||
|
||
def invoke(self, packet): | ||
if packet.data is not None: | ||
packet.data = self.execute_cmd(packet.data) | ||
|
||
return packet |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Extracts data from a string using a regular expression and generates a record. | ||
# | ||
# Author: Frank Steggink | ||
|
||
from stetl.component import Config | ||
from stetl.filter import Filter | ||
from stetl.packet import FORMAT | ||
from stetl.util import Util | ||
import re | ||
|
||
log = Util.get_log("regexfilter") | ||
|
||
|
||
class RegexFilter(Filter): | ||
""" | ||
Extracts data from a string using a regular expression and returns the named groups as a record. | ||
consumes=FORMAT.string, produces=FORMAT.record | ||
""" | ||
|
||
# Start attribute config meta | ||
# Applying Decorator pattern with the Config class to provide | ||
# read-only config values from the configured properties. | ||
|
||
@Config(ptype=str, default=None, required=True) | ||
def pattern_string(self): | ||
""" | ||
Regex pattern string. Should contain named groups. | ||
""" | ||
pass | ||
|
||
# End attribute config meta | ||
|
||
# Constructor | ||
def __init__(self, configdict, section, consumes=FORMAT.string, produces=FORMAT.record): | ||
Filter.__init__(self, configdict, section, consumes, produces) | ||
|
||
self.regex_object = re.compile(self.pattern_string, re.S) | ||
|
||
def init(self): | ||
log.info('Init: regex filter') | ||
if self.pattern_string is None: | ||
# If no pattern_string is present: | ||
err_s = 'The pattern_string needs to be configured' | ||
log.error(err_s) | ||
raise ValueError('The pattern_string needs to be configured') | ||
|
||
def exit(self): | ||
log.info('Exit: regex filter') | ||
|
||
def invoke(self, packet): | ||
if packet.data is None: | ||
return packet | ||
|
||
m = self.regex_object.match(packet.data) | ||
if m is not None: | ||
packet.data = m.groupdict() | ||
else: | ||
packet.data = {} | ||
|
||
return packet |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
python -c "print '{0}/{1}'.format('foo','bar')" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
INFO: Open of `bgt_gebouwinstallatie.gml' | ||
using driver `GML' successful. | ||
|
||
Layer name: BuildingInstallation | ||
Geometry: Curve Polygon | ||
Feature Count: 1162 | ||
Extent: (93971.990000, 433941.050000) - (96020.190000, 436004.350000) | ||
Layer SRS WKT: | ||
PROJCS["Amersfoort / RD New", | ||
GEOGCS["Amersfoort", | ||
DATUM["Amersfoort", | ||
SPHEROID["Bessel 1841",6377397.155,299.1528128, | ||
AUTHORITY["EPSG","7004"]], | ||
TOWGS84[565.2369,50.0087,465.658,-0.406857,0.350733,-1.87035,4.0812], | ||
AUTHORITY["EPSG","6289"]], | ||
PRIMEM["Greenwich",0, | ||
AUTHORITY["EPSG","8901"]], | ||
UNIT["degree",0.0174532925199433, | ||
AUTHORITY["EPSG","9122"]], | ||
AXIS["Latitude",NORTH], | ||
AXIS["Longitude",EAST], | ||
AUTHORITY["EPSG","4289"]], | ||
PROJECTION["Oblique_Stereographic"], | ||
PARAMETER["latitude_of_origin",52.15616055555555], | ||
PARAMETER["central_meridian",5.38763888888889], | ||
PARAMETER["scale_factor",0.9999079], | ||
PARAMETER["false_easting",155000], | ||
PARAMETER["false_northing",463000], | ||
UNIT["metre",1, | ||
AUTHORITY["EPSG","9001"]], | ||
AXIS["X",EAST], | ||
AXIS["Y",NORTH], | ||
AUTHORITY["EPSG","28992"]] | ||
gml_id: String (0.0) NOT NULL | ||
creationDate: String (10.0) | ||
LV-publicatiedatum: String (23.0) | ||
relatieveHoogteligging: Integer (0.0) | ||
inOnderzoek: Integer(Boolean) (0.0) | ||
tijdstipRegistratie: String (23.0) | ||
namespace: String (8.0) | ||
lokaalID: String (38.0) | ||
bronhouder: String (5.0) | ||
bgt-status: String (8.0) | ||
plus-status: String (10.0) | ||
function: String (8.0) | ||
plus-typeGebouwInstallatie: String (12.0) | ||
terminationDate: String (10.0) | ||
eindRegistratie: String (23.0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Config file for unit testing CommandExecFilter. | ||
|
||
[etl] | ||
chains = input_string_file|command_executor|packet_buffer|output_std | ||
|
||
[input_string_file] | ||
class = inputs.fileinput.StringFileInput | ||
file_path = tests/data/commandexecfilter.txt | ||
|
||
[command_executor] | ||
class = filters.execfilter.CommandExecFilter | ||
|
||
[packet_buffer] | ||
class = filters.packetbuffer.PacketBuffer | ||
|
||
[output_std] | ||
class = outputs.standardoutput.StandardOutput |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Config file for unit testing RegexFilter. | ||
|
||
[etl] | ||
chains = input_string_file|regex_filter|packet_buffer|output_std | ||
|
||
[input_string_file] | ||
class = inputs.fileinput.StringFileInput | ||
file_path = tests/data/ogrinfo_output.txt | ||
|
||
[regex_filter] | ||
class = filters.regexfilter.RegexFilter | ||
pattern_string = .*Layer name: (\w+:)?(?P<elemtype>\w+).*Feature Count: (?P<featurecount>[0-9]+).* | ||
|
||
[packet_buffer] | ||
class = filters.packetbuffer.PacketBuffer | ||
|
||
[output_std] | ||
class = outputs.standardoutput.StandardOutput |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import os | ||
|
||
from stetl.etl import ETL | ||
from stetl.filters.packetbuffer import PacketBuffer | ||
from stetl.filters.execfilter import CommandExecFilter | ||
from tests.stetl_test_case import StetlTestCase | ||
|
||
class CommandExecFilterTest(StetlTestCase): | ||
"""Unit tests for CommandExecFilter""" | ||
|
||
def setUp(self): | ||
super(CommandExecFilterTest, self).setUp() | ||
|
||
# Initialize Stetl | ||
curr_dir = os.path.dirname(os.path.realpath(__file__)) | ||
cfg_dict = {'config_file': os.path.join(curr_dir, 'configs/commandexecfilter.cfg')} | ||
self.etl = ETL(cfg_dict) | ||
|
||
def test_class(self): | ||
chain = StetlTestCase.get_chain(self.etl) | ||
section = StetlTestCase.get_section(chain, 1) | ||
class_name = self.etl.configdict.get(section, 'class') | ||
|
||
self.assertEqual('filters.execfilter.CommandExecFilter', class_name) | ||
|
||
def test_instance(self): | ||
chain = StetlTestCase.get_chain(self.etl) | ||
|
||
self.assertTrue(isinstance(chain.get_by_index(1), CommandExecFilter)) | ||
|
||
def test_execute(self): | ||
chain = StetlTestCase.get_chain(self.etl) | ||
chain.run() | ||
|
||
buffer_filter = chain.get_by_class(PacketBuffer) | ||
packet_list = buffer_filter.packet_list | ||
|
||
self.assertEqual(packet_list[0].data.strip(), "foo/bar") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import os | ||
|
||
from stetl.etl import ETL | ||
from stetl.filters.packetbuffer import PacketBuffer | ||
from stetl.filters.regexfilter import RegexFilter | ||
from tests.stetl_test_case import StetlTestCase | ||
|
||
class RegexFilterTest(StetlTestCase): | ||
"""Unit tests for RegexFilter""" | ||
|
||
def setUp(self): | ||
super(RegexFilterTest, self).setUp() | ||
|
||
# Initialize Stetl | ||
curr_dir = os.path.dirname(os.path.realpath(__file__)) | ||
cfg_dict = {'config_file': os.path.join(curr_dir, 'configs/regexfilter.cfg')} | ||
self.etl = ETL(cfg_dict) | ||
|
||
def test_class(self): | ||
chain = StetlTestCase.get_chain(self.etl) | ||
section = StetlTestCase.get_section(chain, 1) | ||
class_name = self.etl.configdict.get(section, 'class') | ||
|
||
self.assertEqual('filters.regexfilter.RegexFilter', class_name) | ||
|
||
def test_instance(self): | ||
chain = StetlTestCase.get_chain(self.etl) | ||
|
||
self.assertTrue(isinstance(chain.get_by_index(1), RegexFilter)) | ||
|
||
def test_execute(self): | ||
chain = StetlTestCase.get_chain(self.etl) | ||
chain.run() | ||
|
||
buffer_filter = chain.get_by_class(PacketBuffer) | ||
packet_list = buffer_filter.packet_list | ||
|
||
self.assertEqual(str(packet_list[0].data), "{'elemtype': 'BuildingInstallation', 'featurecount': '1162'}") |