From f12df79a8e5836960179427c956a6b7c3b1ea63c Mon Sep 17 00:00:00 2001 From: Siarhei Nekhviadovich Date: Thu, 14 Apr 2022 08:31:38 +0200 Subject: [PATCH 1/5] Athena / Presto support for get tables by pattern --- macros/sql/get_tables_by_pattern_sql.sql | 33 ++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/macros/sql/get_tables_by_pattern_sql.sql b/macros/sql/get_tables_by_pattern_sql.sql index 4d5a8fc9..88a11439 100644 --- a/macros/sql/get_tables_by_pattern_sql.sql +++ b/macros/sql/get_tables_by_pattern_sql.sql @@ -16,6 +16,39 @@ {% endmacro %} +{% macro athena__get_tables_by_pattern_sql(schema_pattern, table_pattern, exclude='', database=target.database) %} + +{% set table_schema_like_str = "regexp_like({}, '(?i)\\A{}\\Z')".format("table_schema", schema_pattern) %} +{% set table_name_like_str = "regexp_like({}, '(?i)\\A{}\\Z')".format("table_name", table_pattern) %} +{% set table_name_not_like_str = "not regexp_like({}, '(?i)\\A{}\\Z')".format("table_name", exclude) %} + + select distinct + table_schema as "table_schema", + table_name as "table_name", + {{ dbt_utils.get_table_types_sql() }} + from {{ database }}.information_schema.tables + where {{ table_schema_like_str }} + and {{ table_name_like_str }} + and {{ table_name_not_like_str }} + +{% endmacro %} + +{% macro presto__get_tables_by_pattern_sql(schema_pattern, table_pattern, exclude='', database=target.database) %} + +{% set table_schema_like_str = "regexp_like({}, '(?i)\\A{}\\Z')".format("table_schema", schema_pattern) %} +{% set table_name_like_str = "regexp_like({}, '(?i)\\A{}\\Z')".format("table_name", table_pattern) %} +{% set table_name_not_like_str = "not regexp_like({}, '(?i)\\A{}\\Z')".format("table_name", exclude) %} + + select distinct + table_schema as "table_schema", + table_name as "table_name", + {{ dbt_utils.get_table_types_sql() }} + from {{ database }}.information_schema.tables + where {{ table_schema_like_str }} + and {{ table_name_like_str }} + and {{ table_name_not_like_str }} + +{% endmacro %} {% macro bigquery__get_tables_by_pattern_sql(schema_pattern, table_pattern, exclude='', database=target.database) %} From 45e8fcd7eaa46a494cfebafc733c3c643858a83a Mon Sep 17 00:00:00 2001 From: Siarhei Nekhviadovich Date: Thu, 14 Apr 2022 09:52:55 +0200 Subject: [PATCH 2/5] Updated Changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8e4bbe8..a1e5320d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# dbt-utils v0.8.next + +## Fixes +- `get_tables_by_pattern_sql()` support of Athena / Presto adapters ([#546](https://github.com/dbt-labs/dbt-utils/issues/546)) + +## Contributors: +- [@SOVALINUX](https://github.com/SOVALINUX) (#546) + # dbt-utils v0.8.3 ## New features - A macro for deduplicating data, `deduplicate()` ([#335](https://github.com/dbt-labs/dbt-utils/issues/335), [#512](https://github.com/dbt-labs/dbt-utils/pull/512)) From 31cb6ddefcbf80bd23d58bd3b04de6d0fb047b96 Mon Sep 17 00:00:00 2001 From: Siarhei Nekhviadovich Date: Fri, 15 Apr 2022 15:39:25 +0200 Subject: [PATCH 3/5] Not Null Proportion support for Athena / Presto --- macros/generic_tests/not_null_proportion.sql | 47 ++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/macros/generic_tests/not_null_proportion.sql b/macros/generic_tests/not_null_proportion.sql index 45b9050c..2263018a 100644 --- a/macros/generic_tests/not_null_proportion.sql +++ b/macros/generic_tests/not_null_proportion.sql @@ -24,3 +24,50 @@ select from validation_errors {% endmacro %} + +{% macro athena__test_not_null_proportion(model) %} + +{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %} +{% set at_least = kwargs.get('at_least', kwargs.get('arg')) %} +{% set at_most = kwargs.get('at_most', kwargs.get('arg', 1)) %} + +with validation as ( + select + sum(case when {{ column_name }} is null then 0 else 1 end) / cast(count(*) as double) as not_null_proportion + from {{ model }} +), +validation_errors as ( + select + not_null_proportion + from validation + where not_null_proportion < {{ at_least }} or not_null_proportion > {{ at_most }} +) +select + * +from validation_errors + +{% endmacro %} + +{% macro presto__test_not_null_proportion(model) %} + +{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %} +{% set at_least = kwargs.get('at_least', kwargs.get('arg')) %} +{% set at_most = kwargs.get('at_most', kwargs.get('arg', 1)) %} + +with validation as ( + select + sum(case when {{ column_name }} is null then 0 else 1 end) / cast(count(*) as double) as not_null_proportion + from {{ model }} +), +validation_errors as ( + select + not_null_proportion + from validation + where not_null_proportion < {{ at_least }} or not_null_proportion > {{ at_most }} +) +select + * +from validation_errors + +{% endmacro %} + From d1d21ea400f10aa78dd7cf0f36ff5b4507bda351 Mon Sep 17 00:00:00 2001 From: Siarhei Nekhviadovich Date: Fri, 15 Apr 2022 15:55:01 +0200 Subject: [PATCH 4/5] Data Types and test fixes around numeric type and Athena / Presto adapters --- CHANGELOG.md | 4 +- macros/cross_db_utils/datatypes.sql | 8 ++++ macros/generic_tests/not_null_proportion.sql | 48 +------------------- 3 files changed, 12 insertions(+), 48 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1e5320d..ad04ce3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,11 @@ ## Fixes - `get_tables_by_pattern_sql()` support of Athena / Presto adapters ([#546](https://github.com/dbt-labs/dbt-utils/issues/546)) +- `not_null_proportion` and `datatype numeric` support of Athena / Presto adapters ([#553] ( https://github.com/dbt-labs/dbt-utils/issues/553)) ## Contributors: -- [@SOVALINUX](https://github.com/SOVALINUX) (#546) +- [@SOVALINUX](https://github.com/SOVALINUX) (#546, #553) + # dbt-utils v0.8.3 ## New features diff --git a/macros/cross_db_utils/datatypes.sql b/macros/cross_db_utils/datatypes.sql index f115b4e2..40bf2e7a 100644 --- a/macros/cross_db_utils/datatypes.sql +++ b/macros/cross_db_utils/datatypes.sql @@ -69,6 +69,14 @@ numeric {% endmacro %} +{% macro athena__type_numeric() %} + double +{% endmacro %} + +{% macro presto__type_numeric() %} + double +{% endmacro %} + {# bigint ------------------------------------------------- #} diff --git a/macros/generic_tests/not_null_proportion.sql b/macros/generic_tests/not_null_proportion.sql index 2263018a..f78de820 100644 --- a/macros/generic_tests/not_null_proportion.sql +++ b/macros/generic_tests/not_null_proportion.sql @@ -10,53 +10,7 @@ with validation as ( select - sum(case when {{ column_name }} is null then 0 else 1 end) / cast(count(*) as numeric) as not_null_proportion - from {{ model }} -), -validation_errors as ( - select - not_null_proportion - from validation - where not_null_proportion < {{ at_least }} or not_null_proportion > {{ at_most }} -) -select - * -from validation_errors - -{% endmacro %} - -{% macro athena__test_not_null_proportion(model) %} - -{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %} -{% set at_least = kwargs.get('at_least', kwargs.get('arg')) %} -{% set at_most = kwargs.get('at_most', kwargs.get('arg', 1)) %} - -with validation as ( - select - sum(case when {{ column_name }} is null then 0 else 1 end) / cast(count(*) as double) as not_null_proportion - from {{ model }} -), -validation_errors as ( - select - not_null_proportion - from validation - where not_null_proportion < {{ at_least }} or not_null_proportion > {{ at_most }} -) -select - * -from validation_errors - -{% endmacro %} - -{% macro presto__test_not_null_proportion(model) %} - -{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %} -{% set at_least = kwargs.get('at_least', kwargs.get('arg')) %} -{% set at_most = kwargs.get('at_most', kwargs.get('arg', 1)) %} - -with validation as ( - select - sum(case when {{ column_name }} is null then 0 else 1 end) / cast(count(*) as double) as not_null_proportion + sum(case when {{ column_name }} is null then 0 else 1 end) / {{ dbt_utils.safe_cast('count(*)', dbt_utils.type_numeric() ) }} as not_null_proportion from {{ model }} ), validation_errors as ( From e8c90d0fa5b7cf0bd0395b48497680d135cc506b Mon Sep 17 00:00:00 2001 From: Siarhei Nekhviadovich Date: Fri, 15 Apr 2022 23:54:45 +0200 Subject: [PATCH 5/5] Current Timestamp and Dateadd fixes for Athena / Presto --- macros/cross_db_utils/current_timestamp.sql | 8 ++++++++ macros/cross_db_utils/dateadd.sql | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/macros/cross_db_utils/current_timestamp.sql b/macros/cross_db_utils/current_timestamp.sql index 66ad8dc6..01b9f5c5 100644 --- a/macros/cross_db_utils/current_timestamp.sql +++ b/macros/cross_db_utils/current_timestamp.sql @@ -14,6 +14,14 @@ current_timestamp {% endmacro %} +{% macro athena__current_timestamp() %} + now() +{% endmacro %} + +{% macro presto__current_timestamp() %} + now() +{% endmacro %} + {% macro current_timestamp_in_utc() -%} diff --git a/macros/cross_db_utils/dateadd.sql b/macros/cross_db_utils/dateadd.sql index 09c0f115..43d6d6f4 100644 --- a/macros/cross_db_utils/dateadd.sql +++ b/macros/cross_db_utils/dateadd.sql @@ -35,3 +35,23 @@ {{ return(dbt_utils.default__dateadd(datepart, interval, from_date_or_timestamp)) }} {% endmacro %} + +{% macro athena__dateadd(datepart, interval, from_date_or_timestamp) %} + + date_add( + '{{ datepart | replace("'", "") }}', + {{ interval }}, + {{ from_date_or_timestamp }} + ) + +{% endmacro %} + +{% macro presto__dateadd(datepart, interval, from_date_or_timestamp) %} + + date_add( + '{{ datepart | replace("'", "") }}', + {{ interval }}, + {{ from_date_or_timestamp }} + ) + +{% endmacro %} \ No newline at end of file