Skip to content

Commit

Permalink
Prettify code. Add docs for macros, seed
Browse files Browse the repository at this point in the history
  • Loading branch information
jtcohen6 committed Mar 27, 2020
1 parent 0dea017 commit b13977b
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 4 deletions.
13 changes: 13 additions & 0 deletions data/snowplow_seeds.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
version: 2

seeds:
- name: country_codes
description: >
English names for countries based on their two-letter ISO code, which is
stored in the `geo_country` column of `snowplow_page_views` and
`snowplow_sessions`. Not directly used in any of the snowplow package's
sessionization logic.
columns:
- name: name
- name: two_letter_iso_code
- name: three_letter_iso_code
2 changes: 1 addition & 1 deletion macros/adapters/is_adapter.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{% macro is_adapter(adapter='default') %}

{% if adapter == 'default' %}
{% set adapters = ['redshift', 'snowflake', 'postgres'] %}
{% set adapters = ['postgres', 'redshift', 'snowflake'] %}
{% elif adapter is string %}
{% set adapters = [adapter] %}
{% else %}
Expand Down
87 changes: 87 additions & 0 deletions macros/snowplow_macros.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
version: 2

macros:

- name: bot_any
description: >
List of text values to check if user agents are `like`. If matched,
the page view is probably by a bot visitor.
- name: convert_timezone
description: >
Adapter macro that supports default and Postgres syntax for converting
timestamps from one timezone into another.
arguments:
- name: in_tz
type: string
description: time zone the timestamp is currently in
- name: out_tz
type: string
description: time zone of the output
- name: in_timestamp
type: timestamp
description: timestamp to convert

- name: get_max_sql
description: >
Basically, `select max(`field`) from {{relation}}`.
Used in `get_start_ts`.
arguments:
- name: relation
- name: field

- name: get_most_recent_record
description: >
Runs `get_max_sql` using `run_query` and parses the result.
Used in `get_start_ts`.
arguments:
- name: relation
- name: field

- name: get_start_ts
description: >
Determine the start timestamp/date for this incremental run based on the
max timestamp/date in the current version of this table.
In the default implementation (Postgres/Redshift/Snowflake), this
resolves to a subquery (`select max(timestamp) from {{this}}`).
On BigQuery, the method for executing this macro depends on the
incremental strategy.
*Merge:* Uses `run_query` to get the result of `select max(partition_field) from {{this}}`,
and feeds the result into the model SQL.
*Insert overwrite*: If user supplies `partitions` config, resolves to
the `least()` of all static partition values. Otherwise, resolves to
`_dbt_max_partition` or `date(_dbt_max_partition)`, which dbt will generate
as part of the materialization script.
arguments:
- name: relation
type: relation
description: always pass `this`
- name: field
type: string
description: >
- name of the timestamp/date column to get max value
- default value: `collector_tstamp`
- not needed on Bigquery, which always uses the model's configured partition field
- name: is_adapter
description: >
Determine whether a model should be enabled depending on the `target.type`
of the current run. Returns `true` or `false`. All `default` models run on Postgres, Redshift,
and Snowflake. All `bigquery` models run on BigQuery.
arguments:
- name: adapter
type: string
description: *default* or *bigquery*

- name: timestamp_ntz
description: >
Adapter macro that supports Snowflake's good-yet-eccentric behavior
around timestamps and timezones. The default implementation returns the
input unchanged.
arguments:
- name: field
type: timestamp
description: timestamp to return as `timestampntz`
2 changes: 1 addition & 1 deletion models/identification/bigquery/snowplow_id_map.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ with all_events as (
select * from {{ ref('snowplow_base_events') }}

{% if is_incremental() %}
where DATE(collector_tstamp) >= {{get_start_ts(this)}}
where date(collector_tstamp) >= {{get_start_ts(this)}}
{% endif %}

),
Expand Down
2 changes: 1 addition & 1 deletion models/page_views/bigquery/snowplow_page_views.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ with all_events as (

{% if is_incremental() %}

where DATE(collector_tstamp) >=
where date(collector_tstamp) >=
date_sub(
{{get_start_ts(this)}},
interval {{var('snowplow:page_view_lookback_days')}} day
Expand Down
2 changes: 1 addition & 1 deletion models/sessions/bigquery/snowplow_sessions_tmp.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ with all_page_views as (
select * from {{ ref('snowplow_page_views') }}

{% if is_incremental() %}
where DATE(page_view_start) >= {{get_start_ts(this)}}
where date(page_view_start) >= {{get_start_ts(this)}}
{% endif %}

),
Expand Down

0 comments on commit b13977b

Please sign in to comment.