Skip to content

Commit

Permalink
Parse options and update documentation (#26)
Browse files Browse the repository at this point in the history
* rename  project

* parse dict options with str values

* parse column_data_retention option

* fix example project

* Documentation update

* v1. 5.26
  • Loading branch information
tanyshak authored Jul 26, 2023
1 parent 8737678 commit 3564f4b
Show file tree
Hide file tree
Showing 11 changed files with 521 additions and 469 deletions.
2 changes: 1 addition & 1 deletion dbt/adapters/upsolver/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = "1.5.25"
version = "1.5.26"
28 changes: 28 additions & 0 deletions dbt/adapters/upsolver/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,36 @@ def render_option_from_dict(self, option_value):
item.append(' ,'.join(value))
item.append(')')
else:
item.append("'") if key.lower() == 'column' else False
item.append(value)
item.append("'") if key.lower() == 'column' else False
res.append(''.join(item))
return f"({' ,'.join(res)})"
except Exception:
raise dbt.exceptions.ParsingError(f"Error while parsing value: {value}. Expected type: dictionary")

def render_option_from_dict_str(self, option_value):
try:
res = []
for key, value in option_value.items():
item = [f'{key}=']
item.append("'")
item.append(value)
item.append("'")
res.append(''.join(item))
return f"({' ,'.join(res)})"
except Exception:
raise dbt.exceptions.ParsingError(f"Error while parsing value: {value}. Expected type: dictionary")

def render_option_from_list_dict(self, option_value):
try:
res = []
for value in option_value:
res.append(self.render_option_from_dict(value))
return f"({' ,'.join(res)})"
except Exception:
raise dbt.exceptions.ParsingError(f"Error while parsing value: {value}. Expected type: dictionary")

def render_option_from_list(self, option_value):
try:
if isinstance(option_value, list) and len(option_value) > 1:
Expand All @@ -114,6 +138,10 @@ def enrich_options(self, config_options, source, options_type):
value = self.render_option_from_list(value)
elif options[option.lower()]['type'] == 'dict':
value = self.render_option_from_dict(value)
elif options[option.lower()]['type'] == 'dict_str':
value = self.render_option_from_dict_str(value)
elif options[option.lower()]['type'] == 'list_dict':
value = self.render_option_from_list_dict(value)
enriched_options[option] = find_value
enriched_options[option]['value'] = value
else:
Expand Down
221 changes: 0 additions & 221 deletions dbt/adapters/upsolver/options/connection_properties.md

This file was deleted.

221 changes: 221 additions & 0 deletions dbt/adapters/upsolver/options/dbt_docs/connection_properties.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,25 @@

def write_header(options_header, file):
file.write(f"\n\n## {options_header}\n\n")
file.write("| Option | Storage | Type | Editable | Optional | Config Syntax |\n")
file.write("| --------| --------- | ---- | -------- | -------- | ------------- |\n")
file.write("| Option | Storage | Editable | Optional | Config Syntax |\n")
file.write("| -------| --------- | -------- | -------- | ------------- |\n")

def write_options_to_md(options_category, options_header, file):
write_header(options_header, file)
for key_con, value in options_category.items():
for key, value in value.items():
#formated_description = (' '.join(value.get('description', '').split())).replace('[\\t\\n\\r]+',' ')
md_file.write(f"| {key} | {key_con} | {value['type']} | {value['editable']} | {value['optional']} | {value.get('syntax', '')} |\n")
md_file.write(f"| {key} | {key_con} | {value['editable']} | {value['optional']} | {value.get('syntax', '')} |\n")

def write_copy_options_to_md(options_category, options_header, file):
file.write(f"\n\n## {options_header}\n\n")
file.write("| Option | Storage | Category | Type | Editable | Optional | Config Syntax |\n")
file.write("| -------| ---------- | -------- | -----| -------- | -------- | ------------- |\n")
file.write("| Option | Storage | Category | Editable | Optional | Config Syntax |\n")
file.write("| -------| ---------- | -------- | -------- | -------- | ------------- |\n")
count = 0
for key_con, value_con in options_category.items():
for key_job, value_job in value_con.items():
for key, value in value_job.items():
md_file.write(f"| {key} | {key_con} | {key_job} | {value['type']} | {value['editable']} | {value['optional']} | {value.get('syntax', '')} |\n")
md_file.write(f"| {key} | {key_con} | {key_job} | {value['editable']} | {value['optional']} | {value.get('syntax', '')} |\n")

with open('connection_properties.md', 'w') as md_file:
write_options_to_md(Connection_options, 'Connection options', md_file)
Expand Down
454 changes: 249 additions & 205 deletions dbt/adapters/upsolver/options/dbt_docs/upsolver-configs.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions dbt/adapters/upsolver/options/target_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@
For example, if the current time is 2023-02-23 12:30:00 UTC, and you have defined TABLE_DATA_RETENTION = 2 days, you can expect data written during 2023-02-23, 2023-02-22, and 2023-02-21 to exist in the table. The retention threshold truncates data to the nearest day, so when the time changes to 2023-02-24 00:00:00 UTC, you can no longer expect data from 2023-02-21 to be present in the table, although it might be there for a while.
Note that you need at least one date partition column for this option to work.
Value: <integer> DAYS"""},
"column_data_retention": {"type": "dict", "editable": True, "optional": True,
"syntax":"'column_data_retention': {'COLUMN' : '<column>','DURATION': N DAYS}",
"column_data_retention": {"type": "list_dict", "editable": True, "optional": True,
"syntax":"'column_data_retention': ({'COLUMN' : '<column>','DURATION': '<N DAYS>'})",
"description":"""When set, after the duration of a column elapses in a partition, the data is rewritten without the contents of that column. Number of days can range between 1 and 9999.
Note that you need at least one date partition column for this to work.
Type: list of (<column_name>, <integer> DAYS) pairs"""},
Expand Down
44 changes: 12 additions & 32 deletions dbt/adapters/upsolver/options/transformation_options.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
Transformation_options = {
"s3": {
"run_interval": {"type": "ineger", "editable": False, "optional": True,
"run_interval": {"type": "integer", "editable": False, "optional": True,
"syntax":"'run_interval': '<N MINUTES/HOURS/DAYS>'",
"description":"""How often the job runs.
The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.RUN_INTERVAL
Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }
Default: 1 MINUTE
(Optional) How often the job runs.
The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.
Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }"""},
Default: 1 MINUTE"""},
"start_from": {"type": "value", "editable": False, "optional": True,
"syntax":"'start_from': '<timestamp>/NOW/BEGINNING'",
"description":"""Configures the time to start inserting data from. Data before the specified time is ignored.
Expand Down Expand Up @@ -84,17 +80,13 @@
Usually, it's recommended to include padding to ensure alphabetical sorting of the output files."""}
},
"elasticsearch": {
"run_interval": {"type": "identifier", "editable": False, "optional": True,
"run_interval": {"type": "integer", "editable": False, "optional": True,
"syntax":"'run_interval': '<N MINUTES/HOURS/DAYS>'",
"description":"""How often the job runs.
The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.RUN_INTERVAL
Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }
Default: 1 MINUTE
(Optional) How often the job runs.
The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.
Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }"""},
Default: 1 MINUTE"""},
"start_from": {"type": "value", "editable": False, "optional": True,
"syntax":"'start_from': '<timestamp>/NOW/BEGINNING'",
"description":"""Configures the time to start inserting data from. Data before the specified time is ignored.
Expand Down Expand Up @@ -145,13 +137,13 @@
"description":"""A description or comment regarding this job."""}
},
"snowflake": {
"custom_insert_expressions": {"type": "dict", "editable": True, "optional": True,
"custom_insert_expressions": {"type": "dict_str", "editable": True, "optional": True,
"syntax":"'custom_insert_expressions': {'INSERT_TIME' : 'CURRENT_TIMESTAMP()','MY_VALUE': '<value>'}",
"description":""" Configure a list of custom expression transformations to apply to the value of each column when inserting unmatched (new) rows. Note this is only used in Merge Jobs.
Note: You can use {} as a placeholder for the mapped value from the select statement.
Type: array[(column, expression)]
Default: ()"""},
"custom_update_expressions": {"type": "dict", "editable": True, "optional": True,
"custom_update_expressions": {"type": "dict_str", "editable": True, "optional": True,
"syntax":"'custom_update_expressions': {'UPDATE_TIME' : 'CURRENT_TIMESTAMP()','MY_VALUE': '<value>'}",
"description":"""Configure a list of custom expression transformations to apply to the value of each column when updating matched rows. Note this is only used in Merge Jobs.
Note: You can use {} as a placeholder for the mapped value from the select statement.
Expand All @@ -166,17 +158,13 @@
"description":"""When true, columns that don't exist in the target table are added automatically when encountered.
When false, you cannot do SELECT * within the SELECT statement of your transformation job.
Default: false"""},
"run_interval": {"type": "identifier", "editable": False, "optional": True,
"run_interval": {"type": "integer", "editable": False, "optional": True,
"syntax":"'run_interval': '<N MINUTES/HOURS/DAYS>'",
"description":"""How often the job runs.
The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.RUN_INTERVAL
Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }
Default: 1 MINUTE
(Optional) How often the job runs.
The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.
Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }"""},
Default: 1 MINUTE"""},
"start_from": {"type": "value", "editable": False, "optional": True,
"syntax":"'start_from': '<timestamp>/NOW/BEGINNING'",
"description":"""Configures the time to start inserting data from. Data before the specified time is ignored.
Expand Down Expand Up @@ -223,17 +211,13 @@
"description":""" When true, columns that don't exist in the target table are added automatically when encountered.
When false, you cannot do SELECT * within the SELECT statement of your transformation job.
Default: false"""},
"run_interval": {"type": "identifier", "editable": False, "optional": True,
"run_interval": {"type": "integer", "editable": False, "optional": True,
"syntax":"'run_interval': '<N MINUTES/HOURS/DAYS>'",
"description":"""How often the job runs.
The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.RUN_INTERVAL
Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }
Default: 1 MINUTE
(Optional) How often the job runs.
The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.
Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }"""},
Default: 1 MINUTE"""},
"start_from": {"type": "value", "editable": False, "optional": True,
"syntax":"'start_from': '<timestamp>/NOW/BEGINNING'",
"description":"""Configures the time to start inserting data from. Data before the specified time is ignored.
Expand Down Expand Up @@ -275,17 +259,13 @@
"description":"""A description or comment regarding this job."""}
},
"redshift": {
"run_interval": {"type": "identifier", "editable": False, "optional": True,
"run_interval": {"type": "integer", "editable": False, "optional": True,
"syntax":"'run_interval': '<N MINUTES/HOURS/DAYS>'",
"description":"""How often the job runs.
The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.RUN_INTERVAL
Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }
Default: 1 MINUTE
(Optional) How often the job runs.
The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.
Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }"""},
Default: 1 MINUTE"""},
"start_from": {"type": "value", "editable": False, "optional": True,
"syntax":"'start_from': '<timestamp>/NOW/BEGINNING'",
"description":"""Configures the time to start inserting data from. Data before the specified time is ignored.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@
)
}}

SELECT * FROM {{ ref('s3_connection_2') }}
SELECT * FROM {{ ref('s3_connection') }}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ SELECT
nettotal AS total,
$event_time AS partition_date

FROM {{ ref('orders_raw_data_2')}}
FROM {{ ref('orders_raw_data')}}
LET customer_name = customer.firstname || ' ' || customer.lastname
WHERE ordertype = 'SHIPPING'
AND $event_time BETWEEN run_start_time() AND run_end_time()

0 comments on commit 3564f4b

Please sign in to comment.