Skip to content
Permalink
master
Go to file
 
 
Cannot retrieve contributors at this time
129 lines (112 sloc) 4.9 KB
# Example of process-chains for extracting Top10NL source data from GML to PostGIS.
# A Chain is a series of Components: one Input, zero or more Filters and one Output.
# The output of a Component is connected to the input of the next Component (except for
# the final Output Component, which writes to the final destination, e.g. Postgres.
#
# Currently 3 chains are executed in the following order:
# - SQL pre: DB initialization, delete tables, create schema
# - Main ETL chain, consists of the following components
# 1. input_big_gml_files: read input file(s)and output feature elements
# 2. xml_assembler: assemble feature elements into smaller (etree) docs
# 3. xml_schema_validator: validation against top10nl XSD of (etree) doc [OPTIONAL]
# 4. transformer_xslt: transform each (etree) doc
# 5. output_ogr2ogr: output using ogr2ogr, input are a GML etree docs, output can be any OGR output
# - SQL post: remove duplicates
#
# Any substitutable values are specified in curly brackets e.g. {password}.
# Actual values can be passed as args to Stetl main.py or as arguments from a wrapper program
# like top10extract.py to etl.py. Here are the 3 chains:
[etl]
chains = input_sql_pre|schema_name_filter|output_postgres,
input_big_gml_files|xml_assembler|transformer_xslt|output_ogr2ogr,
input_sql_post|schema_name_filter|output_postgres
# alternative chains for testing
#chains = input_big_gml_files|xml_assembler|transformer_xslt|output_ogr2ogr,
# input_big_gml_files|xml_assembler|transformer_xslt|output_std,
# input_big_gml_files|xml_assembler|transformer_xslt|output_multifile
# Pre SQL file inputs to be executed
[input_sql_pre]
class = stetl.inputs.fileinput.StringFileInput
file_path = sql/drop-tables.sql,sql/create-schema.sql
# Post SQL file inputs to be executed
[input_sql_post]
class = stetl.inputs.fileinput.StringFileInput
file_path = sql/delete-duplicates.sql
# Generic filter to substitute Python-format string values like {schema} in string
[schema_name_filter]
class = stetl.filters.stringfilter.StringSubstitutionFilter
# format args {schema} is schema name
format_args = schema:{schema}
[output_postgres]
class = stetl.outputs.dboutput.PostgresDbOutput
database = {database}
host = {host}
port = {port}
user = {user}
password = {password}
schema = {schema}
# The source input file(s) from dir and produce gml:featureMember elements
[input_big_gml_files]
class = stetl.inputs.fileinput.XmlElementStreamerFileInput
file_path = {gml_files}
element_tags = featureMember
# Assembles etree docs gml:featureMember elements, each with "max_elements" elements
[xml_assembler]
class = stetl.filters.xmlassembler.XmlAssembler
max_elements = {max_features}
container_doc = <?xml version="1.0" encoding="UTF-8"?>
<gml:FeatureCollection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:top10nl="http://www.kadaster.nl/schemas/top10nl/v20120116"
xmlns:gml="http://www.opengis.net/gml"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:smil20="http://www.w3.org/2001/SMIL20/"
xmlns:smil20lang="http://www.w3.org/2001/SMIL20/Language"
xsi:schemaLocation="http://www.kadaster.nl/schemas/top10nl/v20120116 http://www.kadaster.nl/schemas/top10nl/v20120116/TOP10NL_1_1_1.xsd">
</gml:FeatureCollection >
element_container_tag = FeatureCollection
# Transforms into simple feature data (single geometry, single attrs)
[transformer_xslt]
class = stetl.filters.xsltfilter.XsltFilter
script = top10-split_v1.1.1.xsl
# The ogr2ogr command-line, may use any output here, as long as
# the input is a GML file. The "temp_file" is where etree-docs
# are saved. It has to be the same file as in the ogr2ogr command.
# TODO: find a way to use a GML-stream through stdin to ogr2ogr
[output_ogr2ogr]
class = stetl.outputs.ogroutput.Ogr2OgrOutput
temp_file = {temp_dir}/top10-tmp.gml
gfs_file = top10-v1.1.1.gfs
# lco will only be added to ogr2ogr on first run
lco = -lco LAUNDER=YES -lco PRECISION=NO
# spatial_extent, tra
# translates to -spat xmin ymin xmax ymax
spatial_extent = {spatial_extent}
ogr2ogr_cmd = ogr2ogr
-append
-f PostgreSQL
"PG:dbname={database} host={host} port={port} user={user} password={password} active_schema={schema}"
-gt 65536
-a_srs epsg:28992
-s_srs epsg:28992
{temp_dir}/top10-tmp.gml
{multi_opts}
--config PG_USE_COPY YES
# Validator for XML
[xml_schema_validator]
class = stetl.filters.xmlvalidator.XmlSchemaValidator
xsd = http://www.kadaster.nl/schemas/top10nl/v20120116/TOP10NL_1_1_1.xsd
enabled = True
# Below Alternative outputs for testing
# Send to stdout
[output_std]
class = stetl.outputs.standardoutput.StandardXmlOutput
[output_file]
class = stetl.outputs.fileoutput.FileOutput
file_path = output/top10nl-fc.gml
# Output multiple files ala Top10 file chunks GML
# Use numbering as in file expression.
[output_multifile]
class = stetl.outputs.fileoutput.MultiFileOutput
file_path = output/top10nl-%03d.gml
You can’t perform that action at this time.