From 2e2a22a2f21e6b1acf7744f964a12ae5229b8b55 Mon Sep 17 00:00:00 2001 From: darinyu-coursera Date: Tue, 17 Nov 2015 10:57:06 -0800 Subject: [PATCH 1/3] allow new step to take in script_arguments --- dataduct/steps/load_reload_pk.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/dataduct/steps/load_reload_pk.py b/dataduct/steps/load_reload_pk.py index b35b1c0..d96820f 100644 --- a/dataduct/steps/load_reload_pk.py +++ b/dataduct/steps/load_reload_pk.py @@ -21,8 +21,8 @@ class LoadReloadAndPrimaryKeyStep(ETLStep): def __init__(self, id, input_node, staging_table_definition, production_table_definition, pipeline_name, - analyze_table=True, non_transactional=False, - log_to_s3=False, **kwargs): + script_arguments=None, analyze_table=True, + non_transactional=False, log_to_s3=False, **kwargs): """Constructor for the LoadReloadAndPrimaryKeyStep class Args: @@ -39,7 +39,8 @@ def __init__(self, id, input_node, staging_table_definition, # they support all the parameters create_and_load_pipeline_object = self.create_and_load_redshift( table_definition=staging_table_definition, - input_node=input_node + input_node=input_node, + script_arguments=script_arguments ) reload_pipeline_object = self.reload( @@ -139,7 +140,10 @@ def reload(self, source, destination, depends_on, return reload_pipeline_object - def create_and_load_redshift(self, table_definition, input_node): + def create_and_load_redshift(self, table_definition, + input_node, script_arguments): + if not script_arguments: + script_arguments = list() table = self.get_table_from_def(table_definition) if isinstance(input_node, dict): From d562a69ab682c0898760e9ecbdc46d20fc22982f Mon Sep 17 00:00:00 2001 From: darinyu-coursera Date: Tue, 17 Nov 2015 10:59:44 -0800 Subject: [PATCH 2/3] fix script_arguments override --- dataduct/steps/load_reload_pk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataduct/steps/load_reload_pk.py b/dataduct/steps/load_reload_pk.py index d96820f..5472ac5 100644 --- a/dataduct/steps/load_reload_pk.py +++ b/dataduct/steps/load_reload_pk.py @@ -151,10 +151,10 @@ def create_and_load_redshift(self, table_definition, else: input_paths = [input_node.path().uri] - script_arguments = [ + script_arguments.extend([ '--table_definition=%s' % table.sql().sql(), '--s3_input_paths' - ] + ]) script_arguments.extend(input_paths) steps_path = os.path.abspath(os.path.dirname(__file__)) From b456b133c1cd49b3bc92f1c249e572e88420208d Mon Sep 17 00:00:00 2001 From: darinyu-coursera Date: Tue, 17 Nov 2015 11:49:56 -0800 Subject: [PATCH 3/3] Add documentation for new step --- docs/steps.rst | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/docs/steps.rst b/docs/steps.rst index ff26a1e..e7e83f9 100644 --- a/docs/steps.rst +++ b/docs/steps.rst @@ -307,6 +307,46 @@ Example - step_type: create-load-redshift table_definition: tables/dev.example_table.sql +Load, Reload, Primary Key Check +---------------------------------- + +Combine ``create-load-redshift``, ``reload`` and ``primary-key-check`` into one single step. + +Properties +^^^^^^^^^^ + +- ``staging_table_definition``: Intermidiate staging schema file for the table to be loaded into. + (Required) +- ``production_table_definition``: Production schema file for the table to be reloaded into. + (Required) +- ``script_arguments``: Arguments for the runner. + + - ``--max_error``: The maximum number of errors to be ignored during + the load. Usage: ``--max_error=5`` + - ``--replace_invalid_char``: Character the replace non-utf8 + characters with. Usage: ``--replace_invalid_char='?'`` + - ``--no_escape``: If passed, does not escape special characters. + Usage: ``--no_escape`` + - ``--gzip``: If passed, compresses the output with gzip. Usage: + ``--gzip`` + - ``--command_options``: A custom SQL string as the options for the + copy command. Usage: ``--command_options="DELIMITER '\t'"`` + + - Note: If ``--command_options`` is passed, script arguments + ``--max_error``, ``--replace_invalid_char``, ``--no_escape``, + and ``--gzip`` have no effect. + +Example +^^^^^^^ + +:: + + - step_type: load-reload-pk + staging_table_definition: tables/staging.example_table.sql + production_table_definition: tables/dev.example_table.sql + script_arguments: + - "--foo=bar" + Upsert -------------------------