Skip to content

Commit

Permalink
#13 add Config for attrs in think all Components
Browse files Browse the repository at this point in the history
  • Loading branch information
justb4 committed Nov 7, 2017
1 parent 20c3cbd commit 7398eb6
Show file tree
Hide file tree
Showing 6 changed files with 195 additions and 32 deletions.
14 changes: 13 additions & 1 deletion docs/code.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ from an :class:`stetl.input.Input` via zero or more :class:`stetl.filter.Filter`

As a trivial example: an :class:`stetl.input.Input` could be an XML file, a :class:`stetl.filter.Filter` could represent
an XSLT file and an :class:`stetl.output.Output` a PostGIS database. This is effected by specialized classes in
the subpackages inputs, filters, and outputs. New in 1.1.0: :class:`stetl.Splitter` to split data to multiple Outputs.
the subpackages inputs, filters, and outputs. New in 1.1.0: :class:`stetl.Splitter` to split data to multiple Outputs
and :class:`stetl.Merger` to combine multiple `Inputs`.

.. automodule:: stetl.factory
:members:
Expand Down Expand Up @@ -64,6 +65,9 @@ the subpackages inputs, filters, and outputs. New in 1.1.0: :class:`stetl.Splitt
:members:
:show-inheritance:

.. automodule:: stetl.merger
:members:
:show-inheritance:

Components: Inputs
------------------
Expand Down Expand Up @@ -99,6 +103,10 @@ Components: Filters
:members:
:show-inheritance:

.. automodule:: stetl.filters.xmlelementreader
:members:
:show-inheritance:

.. automodule:: stetl.filters.xmlvalidator
:members:
:show-inheritance:
Expand Down Expand Up @@ -142,6 +150,10 @@ Components: Outputs
:members:
:show-inheritance:

.. automodule:: stetl.outputs.execoutput
:members:
:show-inheritance:

.. automodule:: stetl.outputs.dboutput
:members:
:show-inheritance:
Expand Down
49 changes: 42 additions & 7 deletions stetl/inputs/deegreeinput.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import codecs
import re

from stetl.component import Config
from stetl.postgis import PostGIS
from stetl.input import Input
from stetl.util import Util, etree, StringIO
Expand All @@ -23,13 +24,47 @@ class DeegreeBlobstoreInput(Input):
produces=FORMAT.etree_doc
"""

# Start attribute config meta

@Config(ptype=int, required=False, default=10000)
def max_features_per_doc(self):
"""
Max features to read from input feature GML stream per internal document.
"""
pass

@Config(ptype=str, required=True, default=None)
def start_container(self):
"""
Tag that starts container.
"""
pass

@Config(ptype=str, required=True, default=None)
def end_container(self):
"""
Tag that ends container.
"""
pass

@Config(ptype=str, required=False, default=False)
def start_feature_tag(self):
"""
XML tag that starts Feature.
"""
pass

@Config(ptype=str, required=False, default=None)
def end_feature_tag(self):
"""
XML tag that ends Feature.
"""
pass

# End attribute config meta

def __init__(self, configdict, section):
Input.__init__(self, configdict, section, produces=FORMAT.etree_doc)
self.max_features_per_doc = self.cfg.get_int('max_features_per_doc', 10000)
self.start_container = self.cfg.get('start_container')
self.end_container = self.cfg.get('end_container')
self.start_feature_tag = self.cfg.get('start_feature_tag')
self.end_feature_tag = self.cfg.get('end_feature_tag')
self.cur_feature_blob = None
self.rowcount = 0

Expand Down Expand Up @@ -156,13 +191,13 @@ def buffer_to_doc(self, packet):
# print '[' + self.buffer.getvalue() + ']'
packet.data = etree.parse(self.buffer, self.xml_parser)
# print buffer.getvalue()
except Exception, e:
except Exception as e:
bufStr = self.buffer.getvalue()
if not bufStr:
log.info("parse buffer empty: content=[%s]" % bufStr)
else:
log.error("error in buffer parsing %s" % str(e))
print bufStr
# print(bufStr)
raise
self.buffer.close()
self.buffer = None
10 changes: 8 additions & 2 deletions stetl/inputs/httpinput.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,15 +188,21 @@ class ApacheDirInput(HttpInput):
date time is too fragile over different Apache servers).
"""

@Config(ptype=str, default='xml', required=False)
def file_ext(self):
"""
The file extension for target files in Apache dir.
"""
pass

def __init__(self, configdict, section, produces=FORMAT.record):
HttpInput.__init__(self, configdict, section, produces)
# look for a link + a timestamp + a size ('-' for dir)
# self.parse_re = re.compile('href="([^"]*)".*(..-...-.... ..:..).*?(\d+[^\s<]*|-)')
# This appeared to be too fragile, e.g. different date formats per apache server

# default file extension to filter
self.file_ext = self.cfg.get('file_ext', 'xml')
# default regular expresion for file
# default regular expression for file
self.file_reg_exp = self.cfg.get('file_reg_exp', 'href="([^"]*%s)"' % self.file_ext)
self.parse_re = re.compile(self.file_reg_exp)
self.file_list = None
Expand Down
117 changes: 99 additions & 18 deletions stetl/inputs/ogrinput.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,101 @@ class OgrPostgisInput(Input):
produces=FORMAT.xml_line_stream
"""

# Start attribute config meta
@Config(ptype=str, required=False, default='localhost')
def in_pg_host(self):
"""
Host of input DB.
"""
pass

@Config(ptype=str, required=False, default='5432')
def in_pg_port(self):
"""
Port of input DB.
"""
pass

@Config(ptype=str, required=True, default=None)
def in_pg_db(self):
"""
Database name input DB.
"""
pass

@Config(ptype=str, required=False, default=None)
def in_pg_schema(self):
"""
DB Schema name input DB.
"""
pass

@Config(ptype=str, required=False, default='postgres')
def in_pg_user(self):
"""
User input DB.
"""
pass

@Config(ptype=str, required=False, default='postgres')
def in_pg_password(self):
"""
Password input DB.
"""
pass

@Config(ptype=str, required=False, default=None)
def in_srs(self):
"""
SRS (projection) (ogr2ogr -s_srs) input DB e.g. 'EPSG:28992'.
"""
pass

@Config(ptype=str, required=False, default=None)
def in_pg_sql(self):
"""
The input query (string) to fire.
"""
pass

@Config(ptype=str, required=False, default=None)
def out_srs(self):
"""
Target SRS (ogr2ogr -t_srs) code output stream.
"""
pass

@Config(ptype=str, required=False, default='2')
def out_dimension(self):
"""
Dimension (OGR: DIM=N) of features in output stream.
"""
pass

@Config(ptype=str, required=False, default=None)
def out_gml_format(self):
"""
GML format OGR name in output stream, e.g. 'GML3'.
"""
pass

@Config(ptype=str, required=False, default=None)
def out_layer_name(self):
"""
New Layer name (ogr2ogr -nln) output stream, e.g. 'address'.
"""
pass

@Config(ptype=str, required=False, default=None)
def out_geotype(self):
"""
OGR Geometry type new layer in output stream, e.g. POINT.
"""
pass

# End attribute config meta

# TODO make this template configurable so we can have generic ogr2ogr input....
pg_conn_tmpl = "PG:host=%s dbname=%s active_schema=%s user=%s password=%s port=%s"
cmd_tmpl = 'ogr2ogr|-t_srs|%s|-s_srs|%s|-f|GML|%s|-dsco|FORMAT=%s|-lco|DIM=%s|%s|-SQL|%s|-nln|%s|%s'
Expand All @@ -201,33 +296,19 @@ def init(self):
self.ogr_process = None
self.eof_stdout = False
self.eof_stderr = False

in_pg_host = self.cfg.get('in_pg_host', 'localhost')
in_pg_db = self.cfg.get('in_pg_db')
in_pg_schema = self.cfg.get('in_pg_schema', 'public')
in_pg_user = self.cfg.get('in_pg_user', 'postgres')
in_pg_password = self.cfg.get('in_pg_password', 'postgres')
in_pg_port = self.cfg.get('in_pg_port', '5432')
in_srs = self.cfg.get('in_srs')
in_pg_sql = self.cfg.get('in_pg_sql')

out_srs = self.cfg.get('out_srs')
out_file = '/vsistdout/'
out_gml_format = self.cfg.get('out_gml_format')
out_dimension = self.cfg.get('out_dimension', '2')
out_layer_name = self.cfg.get('out_layer_name')
out_geotype = self.cfg.get('out_geotype', '')
self.out_file = '/vsistdout/'

#
# Build ogr2ogr command line
#
# PostGIS PG: options
self.pg = OgrPostgisInput.pg_conn_tmpl % (
in_pg_host, in_pg_db, in_pg_schema, in_pg_user, in_pg_password, in_pg_port)
self.in_pg_host, self.in_pg_db, self.in_pg_schema, self.in_pg_user, self.in_pg_password, self.in_pg_port)

# Entire ogr2ogr command line
self.cmd = OgrPostgisInput.cmd_tmpl % (
out_srs, in_srs, out_file, out_gml_format, out_dimension, self.pg, in_pg_sql, out_layer_name, out_geotype)
self.out_srs, self.in_srs, self.out_file, self.out_gml_format, self.out_dimension, self.pg, self.in_pg_sql,
self.out_layer_name, self.out_geotype)

# Make array to make it easy for Popen with quotes etc
self.cmd = self.cmd.split('|')
Expand Down
1 change: 1 addition & 0 deletions stetl/outputs/dboutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def schema(self):
pass

# End attribute config meta

def __init__(self, configdict, section):
DbOutput.__init__(self, configdict, section, consumes=FORMAT.string)

Expand Down
36 changes: 32 additions & 4 deletions stetl/outputs/wfsoutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#
# Author: Just van den Broecke
#
from stetl.component import Config
from stetl.output import Output
from stetl.util import Util
from stetl.packet import FORMAT
Expand All @@ -20,6 +21,37 @@ class WFSTOutput(Output):
consumes=FORMAT.etree_doc
"""

# Start attribute config meta
@Config(ptype=str, required=True, default=None)
def wfs_host(self):
"""
Hostname-part of URL e.g. geodata.ngr.nl.
"""
pass

@Config(ptype=str, required=False, default='80')
def wfs_port(self):
"""
Port-part of URL.
"""
pass

@Config(ptype=str, required=True, default=None)
def wfs_path(self):
"""
Path-part of URL e.g. '/bag/wfs'.
"""
pass

@Config(ptype=str, required=False, default='GenerateNew')
def idgen(self):
"""
Mode that WFS server generates new Id's for incoming Features.
"""
pass

# End attribute config meta

wfst_req = '''<?xml version="1.0" encoding="UTF-8"?>
<wfs:Transaction version="1.1.0" service="WFS"
xmlns:wfs="http://www.opengis.net/wfs"
Expand All @@ -35,10 +67,6 @@ class WFSTOutput(Output):

def __init__(self, configdict, section):
Output.__init__(self, configdict, section, consumes=FORMAT.etree_doc)
self.wfs_host = self.cfg.get('host')
self.wfs_port = self.cfg.get('port', '80')
self.wfs_path = self.cfg.get('path')
self.idgen = self.cfg.get('idgen', 'GenerateNew')

def write(self, packet):
if packet.data is None:
Expand Down

0 comments on commit 7398eb6

Please sign in to comment.