Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Illustration of migration from raw code to dbtplyr (persistent visual diff for related blog post; not to be merged) #2

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
@@ -1,4 +1,4 @@

dbt_packages/
target/
dbt_modules/
logs/
4 changes: 2 additions & 2 deletions dbt_project.yml
Expand Up @@ -12,10 +12,10 @@ profile: 'default'
# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
source-paths: ["models"]
model-paths: ["models"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
data-paths: ["data"]
seed-paths: ["data"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

Expand Down
Empty file removed macros/.gitkeep
Empty file.
20 changes: 0 additions & 20 deletions macros/get_column_names.sql

This file was deleted.

12 changes: 0 additions & 12 deletions macros/get_matches.sql

This file was deleted.

19 changes: 0 additions & 19 deletions macros/macro.yml

This file was deleted.

40 changes: 13 additions & 27 deletions models/model_monitor.sql
@@ -1,6 +1,6 @@
{{
config(
materialized='incremental',
materialized='view',
unique_key='id',
partition_by={
"field": "dt_county",
Expand All @@ -10,35 +10,21 @@
)
}}

{% set cols = get_column_names( ref('model_monitor_staging') ) %}
{% set cols_n = get_matches(cols, '^n_.*') %}
{% set cols_dt = get_matches(cols, '^dt_.*') %}
{% set cols_prop = get_matches(cols, '^prop_.*') %}
{% set cols_ind = get_matches(cols, '^ind_.*') %}
{% set cols_oth = cols
| reject('in', cols_n)
| reject('in', cols_dt)
| reject('in', cols_prop)
| reject('in', cols_ind) %}
{% set cols = dbtplyr.get_column_names( ref('model_monitor_staging') ) %}
{% set cols_n = dbtplyr.starts_with('n', cols) %}
{% set cols_dt = dbtplyr.starts_with('dt', cols) %}
{% set cols_prop = dbtplyr.starts_with('prop', cols) %}
{% set cols_ind = dbtplyr.starts_with('ind', cols) %}
{% set cols_class = cols_n + cols_dt + cols_prop + cols_ind %}
{% set cols_oth = dbtplyr.not_one_of(cols_class, cols) %}

select

{%- for c in cols_oth %}
{{c}},
{% endfor -%}
{%- for c in cols_n %}
cast({{c}} as int64) as {{c}},
{% endfor %}
{%- for c in cols_dt %}
date({{c}}) as {{c}},
{% endfor -%}
{%- for c in cols_prop %}
round({{c}}, 3) as {{c}},
{% endfor -%}
{%- for c in cols_ind %}
coalesce({{c}}, 0) as {{c}}
{% if not loop.last %},{% endif %}
{% endfor -%}
{{ dbtplyr.across(cols_oth) }},
{{ dbtplyr.across(cols_n, "cast({{var}} as int64) as {{var}}") }},
{{ dbtplyr.across(cols_dt, "date({{var}}) as {{var}}")}},
{{ dbtplyr.across(cols_prop, "round({{var}}, 3) as {{var}}")}},
{{ dbtplyr.across(cols_ind, "coalesce({{var}}, 0) as {{var}}")}}

from {{ ref('model_monitor_staging') }}

Expand Down
6 changes: 4 additions & 2 deletions packages.yml
@@ -1,3 +1,5 @@
packages:
- package: fishtown-analytics/dbt_utils
version: 0.6.4
- package: dbt-labs/dbt_utils
version: 0.9.1
- package: emilyriederer/dbtplyr
version: 0.3.1
12 changes: 4 additions & 8 deletions tests/final_n_gt_zero.sql
@@ -1,12 +1,8 @@
{% set cols = get_column_names( ref('model_monitor') ) %}
{% set cols_n = get_matches(cols, '^n_.*') %}
{% set cols = dbtplyr.get_column_names( ref('model_monitor') ) %}
{% set cols_n = dbtplyr.starts_with('n_', cols) %}

select *
from {{ ref('model_monitor') }}
where
{%- for c in cols_n %} ({{c}} < 0) or
{% endfor %}
FALSE



{{ dbtplyr.if_any(cols_n, "{{var}} < 0") }} or
FALSE
13 changes: 5 additions & 8 deletions tests/final_n_not_null.sql
@@ -1,12 +1,9 @@
{% set cols = get_column_names( ref('model_monitor') ) %}
{% set cols_n = get_matches(cols, '^n_.*') %}
{% set cols = dbtplyr.get_column_names( ref('model_monitor') ) %}
{% set cols_n = dbtplyr.starts_with('n_', cols) %}

select *
from {{ ref('model_monitor') }}
where
{%- for c in cols_n %} ({{c}} is null) or
{% endfor %}
FALSE



{{ dbtplyr.if_any(cols_n, "{{var}} is null") }} or
FALSE

10 changes: 4 additions & 6 deletions tests/final_prop_0_1.sql
@@ -1,10 +1,8 @@
{% set cols = get_column_names( ref('model_monitor') ) %}
{% set cols_n = get_matches(cols, '^prop_.*') %}
{% set cols = dbtplyr.get_column_names( ref('model_monitor') ) %}
{% set cols_prop = dbtplyr.starts_with('prop_', cols) %}

select *
from {{ ref('model_monitor') }}
where
{%- for c in cols_n %} ({{c}} < 0 or {{c}} > 1) or
{% endfor %}
FALSE

{{ dbtplyr.if_any(cols_prop, "{{var}} < 0 or {{var}} > 1") }} or
FALSE
9 changes: 4 additions & 5 deletions tests/staging_n_int_like.sql
@@ -1,9 +1,8 @@
{% set cols = get_column_names( ref('model_monitor_staging') ) %}
{% set cols_n = get_matches(cols, '^n_.*') %}
{% set cols = dbtplyr.get_column_names( ref('model_monitor_staging') ) %}
{% set cols_n = dbtplyr.starts_with('n_', cols) %}

select *
from {{ ref('model_monitor_staging') }}
where
{%- for c in cols_n %} abs({{c}} - cast({{c}} as int64)) > 0.01 or
{% endfor %}
FALSE
{{ dbtplyr.if_any(cols_n, "abs( {{var}} - cast({{var}} as int64) ) > 0.01") }} or
FALSE