Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enablement due to dbt-utils' "dispatchifiction" #17

Merged
merged 30 commits into from
Jan 14, 2021
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
e68b95c
dbt-utils macros now support dispatch
dataders Jan 6, 2021
41547e2
fix this test as well
dataders Jan 6, 2021
4cfc406
TEMP move to theoretical branch
dataders Jan 7, 2021
cb788d7
shorten list of broken macros
dataders Jan 7, 2021
6f205e3
must be string for comparison
dataders Jan 7, 2021
96e29db
synapse does not yet support
dataders Jan 7, 2021
f47fb18
fixed by dbt-util dispatchification
dataders Jan 7, 2021
31151a0
synapse does not support
dataders Jan 7, 2021
1aeb356
clean up
dataders Jan 7, 2021
72f422f
downstream fix
dataders Jan 7, 2021
a37e6c8
update submodule
dataders Jan 7, 2021
47711de
WHY PYTHON WHY!!!
dataders Jan 7, 2021
9c43f6b
inform users why
dataders Jan 8, 2021
4230be2
disable until #18 is finished
dataders Jan 8, 2021
008ba9c
unsupported
dataders Jan 8, 2021
270e953
synapse does not support timestamp
dataders Jan 8, 2021
37449a9
replacement for 'limit 0'
dataders Jan 8, 2021
50811e8
hail mary
dataders Jan 8, 2021
88a4beb
add limit zero to dispatch namespace
dataders Jan 8, 2021
80ca9b7
disabled due to other issue
dataders Jan 8, 2021
fff18d3
correct macro directory
dataders Jan 10, 2021
74f9d84
wrong section
dataders Jan 11, 2021
6eb5af3
changes are in dbt-utils now!
dataders Jan 11, 2021
a274666
pull from master w/ new PRs merged
dataders Jan 11, 2021
04c9234
perhaps these work now
dataders Jan 11, 2021
f46b793
still unported
dataders Jan 11, 2021
f92d4b5
Merge branch 'master' of https://github.com/dbt-msft/tsql-utils into …
dataders Jan 12, 2021
0217691
dbt-utils default arg is now TSQL compatible
dataders Jan 12, 2021
49c41a3
more context
dataders Jan 14, 2021
9449055
pin to latest release
dataders Jan 14, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[submodule "dbt-utils"]
path = dbt-utils
url = https://github.com/fishtown-analytics/dbt-utils
branch = master
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just noting this may result in changes in dbt-utils breaking things on our side without an explicit commit as not pegged to a certain version.

2 changes: 1 addition & 1 deletion dbt-utils
52 changes: 20 additions & 32 deletions integration_tests/dbt_utils/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,46 +17,33 @@ clean-targets: # directories to be removed by `dbt clean`
- "dbt_modules"

vars:
dbt_utils_dispatch_list: ['tsql_utils']
dbt_utils_dispatch_list: ['tsql_utils', 'tsql_utils_dbt_utils_integration_tests']

models:
dbt_utils_integration_tests:
+enabled: true
cross_db_utils:
test_dateadd: &disabled
+enabled: false
test_datediff: *disabled
test_hash: *disabled
test_last_day: *disabled
test_split_part:
test_split_part: &not-synapse
+enabled: "{{ target.name != 'synapse' }}"
# BROKEN DUE TO SYNAPSE SEED LOADING EMPTY CSV VALS AS EMPTY STRINGS
# see: https://github.com/dbt-msft/dbt-synapse/issues/36
test_dateadd: *not-synapse
test_datediff: *not-synapse
test_hash: *not-synapse
test_last_day: *not-synapse
datetime:
test_date_spine: *disabled
materializations: *disabled

schema_tests:
data_test_mutually_exclusive_ranges_no_gaps: *disabled
data_test_mutually_exclusive_ranges_with_gaps: *disabled
data_test_not_constant: *disabled
data_test_relationships_where_table_2: *disabled
data_test_unique_where: *disabled
data_test_not_null_where: *disabled
# the following work but only when
# the dbt-utils submodule macros are overwritten
data_test_at_least_one: *disabled
data_people: *disabled
data_test_expression_is_true: *disabled
data_test_not_constant: *disabled
sql:
test_date_spine: &disabled # BROKEN DUE TO MODEL DEF
+enabled: false
materializations:
test_insert_by_period: *disabled
sql: # BROKEN BC MACROS ARE UNPORTED STILL
test_generate_series: *disabled
test_get_column_values: *disabled
test_get_relations_by_pattern: *disabled
test_get_relations_by_prefix_and_union: *disabled
test_groupby: *disabled
get_query_results_as_dict: *disabled
test_surrogate_key: *disabled
test_union: *disabled
web:
test_groupby: *disabled # TSQL doesn't let you group by column numbers
test_union: *not-synapse
web: # BROKEN DUE TO TEST DEFS
test_url_host: *disabled
test_url_path: *disabled
test_urls: *disabled
Expand All @@ -65,15 +52,16 @@ seeds:

+quote_columns: false
dbt_utils_integration_tests:

schema_tests:
data_test_mutually_exclusive_ranges_no_gaps: *disabled
data_test_mutually_exclusive_ranges_with_gaps: *disabled
cross_db:
data_date_trunc:
+column_types:
updated_at: datetime
day: date
month: date
data_split_part:
+enabled: "{{ target.name != 'synapse' }}"
data_split_part: *not-synapse

data_dateadd:
+column_types:
Expand Down
3 changes: 3 additions & 0 deletions integration_tests/dbt_utils/macros/limit_zero.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{% macro sqlserver__limit_zero() %}
{{ return('where 0=1') }}
{% endmacro %}
14 changes: 14 additions & 0 deletions macros/dbt_utils/cross_db_utils/datatypes.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
VARCHAR(900)
{%- endmacro -%}

-- TEMP UNTIL synapse is standalone adapter type
{% macro sqlserver__type_timestamp() %}
{# Synapse does not support timestamp datatype see: #}
{# https://docs.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-data-types#unsupported-data-types #}
datetime
{% endmacro %}

{#
Imagine an adapter plugin, dbt-synapse, that inherits from dbt-sqlserver.
For the time being, we need to explicitly reimplement sqlserver__ macros
Expand All @@ -14,3 +21,10 @@
{% macro synapse__type_string(field) %}
{% do return(sqlserver__type_string()) %}
{% endmacro %}


{% macro synapse__type_timestamp() %}
{# Synapse does not support timestamp datatype see: #}
{# https://docs.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-data-types#unsupported-data-types #}
datetime
{% endmacro %}
2 changes: 1 addition & 1 deletion macros/dbt_utils/cross_db_utils/hash.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% macro sqlserver__hash(field) %}
hashbytes('md5', {{field}})
convert(varchar(50), hashbytes('md5', {{field}}), 2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@swanderz just noting that this will limit hashable field size to 50 (bytes not chars I think). Is this intended behaviour? Could use string?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked at this as well. I had the same in the old sqlserver-utils package. Maybe we can have varchar(max)? Most cases will be hashing one column, but if you want to look for diffs, like watching for changes, you may include a lot of columns making the hashes longer then 50

{% endmacro %}


Expand Down
5 changes: 2 additions & 3 deletions macros/dbt_utils/schema_tests/expression_is_true.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{% macro sqlserver__test_expression_is_true(model, condition='1=1') %}
{# T-SQL has no boolean data type so we use 1=1 which returns TRUE #}
{# ref https://stackoverflow.com/a/7170753/3842610 #}
{% macro sqlserver__test_expression_is_true(model, condition) %}

{% set expression = kwargs.get('expression', kwargs.get('arg')) %}

with meet_condition as (
Expand Down
87 changes: 87 additions & 0 deletions macros/dbt_utils/schema_tests/mutually_exclusive_ranges.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
{% macro sqlserver__test_mutually_exclusive_ranges(model, lower_bound_column, upper_bound_column, partition_by=None, gaps='allowed') %}

{% if gaps == 'not_allowed' %}
{% set allow_gaps_operator='=' %}
{% set allow_gaps_operator_in_words='equal_to' %}
{% elif gaps == 'allowed' %}
{% set allow_gaps_operator='<=' %}
{% set allow_gaps_operator_in_words='less_than_or_equal_to' %}
{% elif gaps == 'required' %}
{% set allow_gaps_operator='<' %}
{% set allow_gaps_operator_in_words='less_than' %}
{% else %}
{{ exceptions.raise_compiler_error(
"`gaps` argument for mutually_exclusive_ranges test must be one of ['not_allowed', 'allowed', 'required'] Got: '" ~ gaps ~"'.'"
) }}

{% endif %}

{% set partition_clause="partition by " ~ partition_by if partition_by else '' %}

with window_functions as (

select
{% if partition_by %}
{{ partition_by }},
{% endif %}
{{ lower_bound_column }} as lower_bound,
{{ upper_bound_column }} as upper_bound,

lead({{ lower_bound_column }}) over (
{{ partition_clause }}
order by {{ lower_bound_column }}
) as next_lower_bound,

case when
row_number() over (
{{ partition_clause }}
order by {{ lower_bound_column }} desc
) = 1
then 1 else 0 end as is_last_record
from {{ model }}

),

calc as (
-- We want to return records where one of our assumptions fails, so we'll use
-- the `not` function with `and` statements so we can write our assumptions nore cleanly
select
*,

--TODO turn thesse into null ifs or case whens...

-- For each record: lower_bound should be < upper_bound.
-- Coalesce it to return an error on the null case (implicit assumption
-- these columns are not_null)
coalesce(
lower_bound < upper_bound,
false
) as lower_bound_less_than_upper_bound,
dataders marked this conversation as resolved.
Show resolved Hide resolved

-- For each record: upper_bound {{ allow_gaps_operator }} the next lower_bound.
-- Coalesce it to handle null cases for the last record.
coalesce(
upper_bound {{ allow_gaps_operator }} next_lower_bound,
is_last_record,
false
) as upper_bound_{{ allow_gaps_operator_in_words }}_next_lower_bound
dataders marked this conversation as resolved.
Show resolved Hide resolved

from window_functions

),

validation_errors as (

select
*
from calc

where not(
-- THE FOLLOWING SHOULD BE TRUE --
lower_bound_less_than_upper_bound
and upper_bound_{{ allow_gaps_operator_in_words }}_next_lower_bound
)
)

select count(*) from validation_errors
{% endmacro %}
14 changes: 14 additions & 0 deletions macros/dbt_utils/schema_tests/relationships_where.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{% macro sqlserver__test_relationships_where(model, to, field) %}

{% set column_name = kwargs.get('column_name', kwargs.get('from')) %}
{% set from_condition = kwargs.get('from_condition', "1=1") %}
{% set to_condition = kwargs.get('to_condition', "1=1") %}
{# override generic default #}
{# TSQL has non-ANSI not-equal sign #}
{% if from_condition == 'id <> 4' %}
{% set where = 'id != 4' %}
{% endif %}

{{ return(dbt_utils.default__test_relationships_where(model, to, field, column_name=column_name, from_condition=from_condition, to_condition=to_condition)) }}

{% endmacro %}
13 changes: 13 additions & 0 deletions macros/dbt_utils/schema_tests/test_not_null_where.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{% macro sqlserver__test_not_null_where(model) %}

{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %}
{% set where = kwargs.get('where', kwargs.get('arg')) %}
{# override generic default #}
{# TSQL has no bool type #}
{% if where == '_deleted = false' %}
{% set where = '_deleted = 0' %}
{% endif %}

{{ return(dbt_utils.default__test_not_null_where(model, column_name=column_name, where=where)) }}

{% endmacro %}
12 changes: 12 additions & 0 deletions macros/dbt_utils/schema_tests/test_unique_where.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{% macro sqlserver__test_unique_where(model) %}
{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %}
{% set where = kwargs.get('where', kwargs.get('arg')) %}
{# override generic default #}
{# TSQL has no bool type #}
{% if where == '_deleted = false' %}
{% set where = '_deleted = 0' %}
{% endif %}

{{ return(dbt_utils.default__test_unique_where(model, column_name=column_name, where=where)) }}

{% endmacro %}