Parse options and update documentation (#26)

* rename project * parse dict options with str values * parse column_data_retention option * fix example project * Documentation update * v1. 5.26
Upsolver · Jul 26, 2023 · 3564f4b · 3564f4b
1 parent 8737678
commit 3564f4b
Show file tree

Hide file tree

Showing 11 changed files with 521 additions and 469 deletions.
diff --git a/dbt/adapters/upsolver/__version__.py b/dbt/adapters/upsolver/__version__.py
@@ -1 +1 @@
-version = "1.5.25"
+version = "1.5.26"
diff --git a/dbt/adapters/upsolver/impl.py b/dbt/adapters/upsolver/impl.py
@@ -88,12 +88,36 @@ def render_option_from_dict(self, option_value):
                     item.append(' ,'.join(value))
                     item.append(')')
                 else:
+                    item.append("'") if key.lower() == 'column' else False
                     item.append(value)
+                    item.append("'") if key.lower() == 'column' else False
                 res.append(''.join(item))
             return f"({' ,'.join(res)})"
         except Exception:
             raise dbt.exceptions.ParsingError(f"Error while parsing value: {value}. Expected type: dictionary")
 
+    def render_option_from_dict_str(self, option_value):
+        try:
+            res = []
+            for key, value in option_value.items():
+                item = [f'{key}=']
+                item.append("'")
+                item.append(value)
+                item.append("'")
+                res.append(''.join(item))
+            return f"({' ,'.join(res)})"
+        except Exception:
+            raise dbt.exceptions.ParsingError(f"Error while parsing value: {value}. Expected type: dictionary")
+
+    def render_option_from_list_dict(self, option_value):
+        try:
+            res = []
+            for value in option_value:
+                res.append(self.render_option_from_dict(value))
+            return f"({' ,'.join(res)})"
+        except Exception:
+            raise dbt.exceptions.ParsingError(f"Error while parsing value: {value}. Expected type: dictionary")
+
     def render_option_from_list(self, option_value):
         try:
             if isinstance(option_value, list) and len(option_value) > 1:
@@ -114,6 +138,10 @@ def enrich_options(self, config_options, source, options_type):
                     value = self.render_option_from_list(value)
                 elif options[option.lower()]['type'] == 'dict':
                     value = self.render_option_from_dict(value)
+                elif options[option.lower()]['type'] == 'dict_str':
+                    value = self.render_option_from_dict_str(value)
+                elif options[option.lower()]['type'] == 'list_dict':
+                    value = self.render_option_from_list_dict(value)
                 enriched_options[option] = find_value
                 enriched_options[option]['value'] = value
             else:

diff --git a/dbt/adapters/upsolver/options/connection_properties.md b/dbt/adapters/upsolver/options/connection_properties.md
diff --git a/dbt/adapters/upsolver/options/dbt_docs/connection_properties.md b/dbt/adapters/upsolver/options/dbt_docs/connection_properties.md
diff --git a/...upsolver/options/geterate_options_docs.py → ...options/dbt_docs/geterate_options_docs.py b/...upsolver/options/geterate_options_docs.py → ...options/dbt_docs/geterate_options_docs.py
@@ -6,25 +6,25 @@
 
 def write_header(options_header, file):
     file.write(f"\n\n## {options_header}\n\n")
-    file.write("| Option | Storage    | Type | Editable | Optional | Config Syntax |\n")
-    file.write("| --------| --------- | ---- | -------- | -------- | ------------- |\n")
+    file.write("| Option | Storage   | Editable | Optional | Config Syntax |\n")
+    file.write("| -------| --------- | -------- | -------- | ------------- |\n")
 
 def write_options_to_md(options_category, options_header, file):
     write_header(options_header, file)
     for key_con, value in options_category.items():
         for key, value in value.items():
             #formated_description = (' '.join(value.get('description', '').split())).replace('[\\t\\n\\r]+',' ')
-            md_file.write(f"| {key} | {key_con} | {value['type']} | {value['editable']} | {value['optional']} | {value.get('syntax', '')} |\n")
+            md_file.write(f"| {key} | {key_con} | {value['editable']} | {value['optional']} | {value.get('syntax', '')} |\n")
 
 def write_copy_options_to_md(options_category, options_header, file):
     file.write(f"\n\n## {options_header}\n\n")
-    file.write("| Option | Storage    | Category | Type | Editable | Optional | Config Syntax |\n")
-    file.write("| -------| ---------- | -------- | -----| -------- | -------- | ------------- |\n")
+    file.write("| Option | Storage    | Category | Editable | Optional | Config Syntax |\n")
+    file.write("| -------| ---------- | -------- | -------- | -------- | ------------- |\n")
     count = 0
     for key_con, value_con in options_category.items():
         for key_job, value_job in value_con.items():
                 for key, value in value_job.items():
-                    md_file.write(f"| {key} | {key_con} | {key_job} | {value['type']} | {value['editable']} | {value['optional']} | {value.get('syntax', '')} |\n")
+                    md_file.write(f"| {key} | {key_con} | {key_job} | {value['editable']} | {value['optional']} | {value.get('syntax', '')} |\n")
 
 with open('connection_properties.md', 'w') as md_file:
     write_options_to_md(Connection_options, 'Connection options', md_file)

diff --git a/dbt/adapters/upsolver/options/dbt_docs/upsolver-configs.md b/dbt/adapters/upsolver/options/dbt_docs/upsolver-configs.md
diff --git a/dbt/adapters/upsolver/options/target_options.py b/dbt/adapters/upsolver/options/target_options.py
@@ -51,8 +51,8 @@
             For example, if the current time is 2023-02-23 12:30:00 UTC, and you have defined TABLE_DATA_RETENTION = 2 days, you can expect data written during 2023-02-23, 2023-02-22, and 2023-02-21 to exist in the table. The retention threshold truncates data to the nearest day, so when the time changes to 2023-02-24 00:00:00 UTC, you can no longer expect data from 2023-02-21 to be present in the table, although it might be there for a while.
             Note that you need at least one date partition column for this option to work.
             Value: <integer> DAYS"""},
-        "column_data_retention": {"type": "dict", "editable": True, "optional": True,
-            "syntax":"'column_data_retention': {'COLUMN' : '<column>','DURATION': N DAYS}",
+        "column_data_retention": {"type": "list_dict", "editable": True, "optional": True,
+            "syntax":"'column_data_retention': ({'COLUMN' : '<column>','DURATION': '<N DAYS>'})",
             "description":"""When set, after the duration of a column elapses in a partition, the data is rewritten without the contents of that column. Number of days can range between 1 and 9999.
             Note that you need at least one date partition column for this to work.
             Type: list of (<column_name>, <integer> DAYS) pairs"""},

diff --git a/dbt/adapters/upsolver/options/transformation_options.py b/dbt/adapters/upsolver/options/transformation_options.py
@@ -1,16 +1,12 @@
 Transformation_options = {
   "s3": {
-        "run_interval": {"type": "ineger", "editable": False, "optional": True,
+        "run_interval": {"type": "integer", "editable": False, "optional": True,
             "syntax":"'run_interval': '<N MINUTES/HOURS/DAYS>'",
             "description":"""How often the job runs.
             The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
             For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.RUN_INTERVAL
             Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }
-            Default: 1 MINUTE
-            (Optional) How often the job runs.
-            The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
-            For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.
-            Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }"""},
+            Default: 1 MINUTE"""},
         "start_from": {"type": "value", "editable": False, "optional": True,
             "syntax":"'start_from': '<timestamp>/NOW/BEGINNING'",
             "description":"""Configures the time to start inserting data from. Data before the specified time is ignored.
@@ -84,17 +80,13 @@
             Usually, it's recommended to include padding to ensure alphabetical sorting of the output files."""}
   },
   "elasticsearch": {
-        "run_interval": {"type": "identifier", "editable": False, "optional": True,
+        "run_interval": {"type": "integer", "editable": False, "optional": True,
             "syntax":"'run_interval': '<N MINUTES/HOURS/DAYS>'",
             "description":"""How often the job runs.
             The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
             For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.RUN_INTERVAL
             Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }
-            Default: 1 MINUTE
-            (Optional) How often the job runs.
-            The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
-            For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.
-            Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }"""},
+            Default: 1 MINUTE"""},
         "start_from": {"type": "value", "editable": False, "optional": True,
             "syntax":"'start_from': '<timestamp>/NOW/BEGINNING'",
             "description":"""Configures the time to start inserting data from. Data before the specified time is ignored.
@@ -145,13 +137,13 @@
             "description":"""A description or comment regarding this job."""}
   },
   "snowflake": {
-        "custom_insert_expressions": {"type": "dict", "editable": True, "optional": True,
+        "custom_insert_expressions": {"type": "dict_str", "editable": True, "optional": True,
             "syntax":"'custom_insert_expressions': {'INSERT_TIME' : 'CURRENT_TIMESTAMP()','MY_VALUE': '<value>'}",
             "description":""" Configure a list of custom expression transformations to apply to the value of each column when inserting unmatched (new) rows. Note this is only used in Merge Jobs.
             Note: You can use {} as a placeholder for the mapped value from the select statement.
             Type: array[(column, expression)]
             Default: ()"""},
-        "custom_update_expressions": {"type": "dict", "editable": True, "optional": True,
+        "custom_update_expressions": {"type": "dict_str", "editable": True, "optional": True,
             "syntax":"'custom_update_expressions': {'UPDATE_TIME' : 'CURRENT_TIMESTAMP()','MY_VALUE': '<value>'}",
             "description":"""Configure a list of custom expression transformations to apply to the value of each column when updating matched rows. Note this is only used in Merge Jobs.
             Note: You can use {} as a placeholder for the mapped value from the select statement.
@@ -166,17 +158,13 @@
             "description":"""When true, columns that don't exist in the target table are added automatically when encountered.
             When false, you cannot do SELECT * within the SELECT statement of your transformation job.
             Default: false"""},
-        "run_interval": {"type": "identifier", "editable": False, "optional": True,
+        "run_interval": {"type": "integer", "editable": False, "optional": True,
             "syntax":"'run_interval': '<N MINUTES/HOURS/DAYS>'",
             "description":"""How often the job runs.
             The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
             For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.RUN_INTERVAL
             Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }
-            Default: 1 MINUTE
-            (Optional) How often the job runs.
-            The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
-            For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.
-            Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }"""},
+            Default: 1 MINUTE"""},
         "start_from": {"type": "value", "editable": False, "optional": True,
             "syntax":"'start_from': '<timestamp>/NOW/BEGINNING'",
             "description":"""Configures the time to start inserting data from. Data before the specified time is ignored.
@@ -223,17 +211,13 @@
             "description":""" When true, columns that don't exist in the target table are added automatically when encountered.
             When false, you cannot do SELECT * within the SELECT statement of your transformation job.
             Default: false"""},
-        "run_interval": {"type": "identifier", "editable": False, "optional": True,
+        "run_interval": {"type": "integer", "editable": False, "optional": True,
             "syntax":"'run_interval': '<N MINUTES/HOURS/DAYS>'",
             "description":"""How often the job runs.
             The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
             For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.RUN_INTERVAL
             Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }
-            Default: 1 MINUTE
-            (Optional) How often the job runs.
-            The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
-            For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.
-            Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }"""},
+            Default: 1 MINUTE"""},
         "start_from": {"type": "value", "editable": False, "optional": True,
             "syntax":"'start_from': '<timestamp>/NOW/BEGINNING'",
             "description":"""Configures the time to start inserting data from. Data before the specified time is ignored.
@@ -275,17 +259,13 @@
             "description":"""A description or comment regarding this job."""}
     },
     "redshift": {
-        "run_interval": {"type": "identifier", "editable": False, "optional": True,
+        "run_interval": {"type": "integer", "editable": False, "optional": True,
             "syntax":"'run_interval': '<N MINUTES/HOURS/DAYS>'",
             "description":"""How often the job runs.
             The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
             For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.RUN_INTERVAL
             Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }
-            Default: 1 MINUTE
-            (Optional) How often the job runs.
-            The runs take place over a set period of time defined by this interval and they must be divisible by the number of hours in a day.
-            For example, you can set RUN_INTERVAL to 2 hours (the job runs 12 times per day), but trying to set RUN_INTERVAL to 5 hours would fail since 24 hours is not evenly divisible by 5.
-            Value: <integer> { MINUTE[S] | HOUR[S] | DAY[S] }"""},
+            Default: 1 MINUTE"""},
         "start_from": {"type": "value", "editable": False, "optional": True,
             "syntax":"'start_from': '<timestamp>/NOW/BEGINNING'",
             "description":"""Configures the time to start inserting data from. Data before the specified time is ignored.

diff --git a/...o_athena_new/models/orders_raw_data_2.sql → ..._to_athena_new/models/orders_raw_data.sql b/...o_athena_new/models/orders_raw_data_2.sql → ..._to_athena_new/models/orders_raw_data.sql
@@ -9,4 +9,4 @@
       	)
 }}
 
-SELECT * FROM {{ ref('s3_connection_2') }}
+SELECT * FROM {{ ref('s3_connection') }}
diff --git a/..._new/models/orders_transformed_data_2.sql → ...na_new/models/orders_transformed_data.sql b/..._new/models/orders_transformed_data_2.sql → ...na_new/models/orders_transformed_data.sql
@@ -18,7 +18,7 @@ SELECT
   nettotal AS total,
   $event_time AS partition_date
 
- FROM {{ ref('orders_raw_data_2')}}
+ FROM {{ ref('orders_raw_data')}}
  LET customer_name = customer.firstname || ' ' || customer.lastname
  WHERE ordertype = 'SHIPPING'
  AND $event_time BETWEEN run_start_time() AND run_end_time()
diff --git a/..._to_athena_new/models/s3_connection_2.sql → ...s3_to_athena_new/models/s3_connection.sql b/..._to_athena_new/models/s3_connection_2.sql → ...s3_to_athena_new/models/s3_connection.sql