Skip to content
Go to file
Cannot retrieve contributors at this time
129 lines (112 sloc) 4.9 KB
# Example of process-chains for extracting Top10NL source data from GML to PostGIS.
# A Chain is a series of Components: one Input, zero or more Filters and one Output.
# The output of a Component is connected to the input of the next Component (except for
# the final Output Component, which writes to the final destination, e.g. Postgres.
# Currently 3 chains are executed in the following order:
# - SQL pre: DB initialization, delete tables, create schema
# - Main ETL chain, consists of the following components
# 1. input_big_gml_files: read input file(s)and output feature elements
# 2. xml_assembler: assemble feature elements into smaller (etree) docs
# 3. xml_schema_validator: validation against top10nl XSD of (etree) doc [OPTIONAL]
# 4. transformer_xslt: transform each (etree) doc
# 5. output_ogr2ogr: output using ogr2ogr, input are a GML etree docs, output can be any OGR output
# - SQL post: remove duplicates
# Any substitutable values are specified in curly brackets e.g. {password}.
# Actual values can be passed as args to Stetl or as arguments from a wrapper program
# like to Here are the 3 chains:
chains = input_sql_pre|schema_name_filter|output_postgres,
# alternative chains for testing
#chains = input_big_gml_files|xml_assembler|transformer_xslt|output_ogr2ogr,
# input_big_gml_files|xml_assembler|transformer_xslt|output_std,
# input_big_gml_files|xml_assembler|transformer_xslt|output_multifile
# Pre SQL file inputs to be executed
class = stetl.inputs.fileinput.StringFileInput
file_path = sql/drop-tables.sql,sql/create-schema.sql
# Post SQL file inputs to be executed
class = stetl.inputs.fileinput.StringFileInput
file_path = sql/delete-duplicates.sql
# Generic filter to substitute Python-format string values like {schema} in string
class = stetl.filters.stringfilter.StringSubstitutionFilter
# format args {schema} is schema name
format_args = schema:{schema}
class = stetl.outputs.dboutput.PostgresDbOutput
database = {database}
host = {host}
port = {port}
user = {user}
password = {password}
schema = {schema}
# The source input file(s) from dir and produce gml:featureMember elements
class = stetl.inputs.fileinput.XmlElementStreamerFileInput
file_path = {gml_files}
element_tags = featureMember
# Assembles etree docs gml:featureMember elements, each with "max_elements" elements
class = stetl.filters.xmlassembler.XmlAssembler
max_elements = {max_features}
container_doc = <?xml version="1.0" encoding="UTF-8"?>
</gml:FeatureCollection >
element_container_tag = FeatureCollection
# Transforms into simple feature data (single geometry, single attrs)
class = stetl.filters.xsltfilter.XsltFilter
script = top10-split_v1.1.1.xsl
# The ogr2ogr command-line, may use any output here, as long as
# the input is a GML file. The "temp_file" is where etree-docs
# are saved. It has to be the same file as in the ogr2ogr command.
# TODO: find a way to use a GML-stream through stdin to ogr2ogr
class = stetl.outputs.ogroutput.Ogr2OgrOutput
temp_file = {temp_dir}/top10-tmp.gml
gfs_file = top10-v1.1.1.gfs
# lco will only be added to ogr2ogr on first run
# spatial_extent, tra
# translates to -spat xmin ymin xmax ymax
spatial_extent = {spatial_extent}
ogr2ogr_cmd = ogr2ogr
-f PostgreSQL
"PG:dbname={database} host={host} port={port} user={user} password={password} active_schema={schema}"
-gt 65536
-a_srs epsg:28992
-s_srs epsg:28992
--config PG_USE_COPY YES
# Validator for XML
class = stetl.filters.xmlvalidator.XmlSchemaValidator
xsd =
enabled = True
# Below Alternative outputs for testing
# Send to stdout
class = stetl.outputs.standardoutput.StandardXmlOutput
class = stetl.outputs.fileoutput.FileOutput
file_path = output/top10nl-fc.gml
# Output multiple files ala Top10 file chunks GML
# Use numbering as in file expression.
class = stetl.outputs.fileoutput.MultiFileOutput
file_path = output/top10nl-%03d.gml
You can’t perform that action at this time.