Skip to content

Commit

Permalink
Upgrade to 0.16.0 BQ incremental syntax
Browse files Browse the repository at this point in the history
  • Loading branch information
jtcohen6 committed Feb 24, 2020
1 parent a1574ec commit 3041f15
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 20 deletions.
2 changes: 2 additions & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ analysis-paths: ["analysis"]
data-paths: ["data"]
macro-paths: ["macros"]

require-dbt-version: ">=0.16.0"

models:
snowplow:
base:
Expand Down
14 changes: 14 additions & 0 deletions integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,17 @@ models:
'snowplow:context:useragent': FALSE
'snowplow:pass_through_columns': ['test_add_col']

seeds:
sp_event_update:

seeds:
snowplow_integration_tests:
snowplow:
sp_event_update:
column_types:
collector_tstamp: timestamp
derived_tstamp: timestamp
sp_event:
column_types:
collector_tstamp: timestamp
derived_tstamp: timestamp
18 changes: 12 additions & 6 deletions macros/adapters/bigquery/identification/snowplow_id_map.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,25 @@
{{
config(
materialized='incremental',
partition_by='DATE(max_tstamp)',
partition_by={
'field': 'max_tstamp',
'data_type': 'timestamp'
},
unique_key="domain_userid"
)
}}

{% set start_date = get_most_recent_record(this, "max_tstamp", "2001-01-01") %}

with all_events as (

select *
from {{ ref('snowplow_base_events') }}
where DATE(collector_tstamp) >= date_sub('{{ start_date }}', interval 1 day)

{% if is_incremental() %}
where DATE(collector_tstamp) >= date_sub(
DATE(_dbt_max_partition),
interval {{var('snowplow:page_view_lookback_days')}} day
)
{% endif %}

),

Expand All @@ -29,7 +36,6 @@ new_sessions as (
domain_sessionid

from all_events
where DATE(collector_tstamp) >= '{{ start_date }}'

),

Expand Down Expand Up @@ -61,7 +67,7 @@ prep as (
rows between unbounded preceding and unbounded following
) as user_id,

max(timestamp(collector_tstamp)) over (
max(collector_tstamp) over (
partition by domain_userid
) as max_tstamp

Expand Down
18 changes: 10 additions & 8 deletions macros/adapters/bigquery/pageviews/snowplow_page_views.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
{{
config(
materialized='incremental',
partition_by='DATE(page_view_start)',
partition_by={
'field': 'page_view_start',
'data_type': 'timestamp'
},
unique_key="page_view_id"
)
}}

{% set timezone = var('snowplow:timezone', 'UTC') %}
{% set start_date = get_most_recent_record(this, "page_view_start", "2001-01-01") %}

/*
General approach: find sessions that happened since the last time
Expand All @@ -27,9 +29,12 @@ with all_events as (
select *
from {{ ref('snowplow_base_events') }}

-- load up events from the start date, and the day before it, to ensure
-- that we capture pageviews that span midnight
where DATE(collector_tstamp) >= date_sub('{{ start_date }}', interval 1 day)
{% if is_incremental() %}
where DATE(collector_tstamp) >= date_sub(
DATE(_dbt_max_partition),
interval {{var('snowplow:page_view_lookback_days')}} day
)
{% endif %}

),

Expand All @@ -40,9 +45,6 @@ new_sessions as (

from all_events

-- only consider events for sessions that occurred on or after the start_date
where DATE(collector_tstamp) >= '{{ start_date }}'

),

relevant_events as (
Expand Down
5 changes: 4 additions & 1 deletion macros/adapters/bigquery/sessions/snowplow_sessions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
{{
config(
materialized='table',
partition_by='DATE(session_start)'
partition_by={
'field': 'session_start',
'data_type': 'timestamp'
}
)
}}

Expand Down
16 changes: 11 additions & 5 deletions macros/adapters/bigquery/sessions/snowplow_sessions_tmp.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,24 @@
{{
config(
materialized='incremental',
partition_by='DATE(session_start)',
partition_by={
'field': 'session_start',
'data_type': 'timestamp'
},
unique_key="session_id"
)
}}

{% set start_date = get_most_recent_record(this, "session_start", "2001-01-01") %}

with all_page_views as (

select * from {{ ref('snowplow_page_views') }}
where DATE(page_view_start) >= date_sub('{{ start_date }}', interval 1 day)

{% if is_incremental() %}
where DATE(page_view_start) >= date_sub(
DATE(_dbt_max_partition),
interval {{var('snowplow:page_view_lookback_days')}} day
)
{% endif %}

),

Expand All @@ -24,7 +31,6 @@ new_page_views as (
session_id

from all_page_views
where DATE(page_view_start) >= '{{ start_date }}'

),

Expand Down

0 comments on commit 3041f15

Please sign in to comment.