diff --git a/dbt_project.yml b/dbt_project.yml index 963c066..4375b40 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -11,6 +11,8 @@ analysis-paths: ["analysis"] data-paths: ["data"] macro-paths: ["macros"] +require-dbt-version: ">=0.16.0" + models: snowplow: base: diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 7e4bdf3..06790cb 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -31,3 +31,17 @@ models: 'snowplow:context:useragent': FALSE 'snowplow:pass_through_columns': ['test_add_col'] +seeds: + sp_event_update: + +seeds: + snowplow_integration_tests: + snowplow: + sp_event_update: + column_types: + collector_tstamp: timestamp + derived_tstamp: timestamp + sp_event: + column_types: + collector_tstamp: timestamp + derived_tstamp: timestamp diff --git a/macros/adapters/bigquery/identification/snowplow_id_map.sql b/macros/adapters/bigquery/identification/snowplow_id_map.sql index 0e79e21..b6c3b2f 100644 --- a/macros/adapters/bigquery/identification/snowplow_id_map.sql +++ b/macros/adapters/bigquery/identification/snowplow_id_map.sql @@ -8,18 +8,25 @@ {{ config( materialized='incremental', - partition_by='DATE(max_tstamp)', + partition_by={ + 'field': 'max_tstamp', + 'data_type': 'timestamp' + }, unique_key="domain_userid" ) }} -{% set start_date = get_most_recent_record(this, "max_tstamp", "2001-01-01") %} - with all_events as ( select * from {{ ref('snowplow_base_events') }} - where DATE(collector_tstamp) >= date_sub('{{ start_date }}', interval 1 day) + + {% if is_incremental() %} + where DATE(collector_tstamp) >= date_sub( + DATE(_dbt_max_partition), + interval {{var('snowplow:page_view_lookback_days')}} day + ) + {% endif %} ), @@ -29,7 +36,6 @@ new_sessions as ( domain_sessionid from all_events - where DATE(collector_tstamp) >= '{{ start_date }}' ), @@ -61,7 +67,7 @@ prep as ( rows between unbounded preceding and unbounded following ) as user_id, - max(timestamp(collector_tstamp)) over ( + max(collector_tstamp) over ( partition by domain_userid ) as max_tstamp diff --git a/macros/adapters/bigquery/pageviews/snowplow_page_views.sql b/macros/adapters/bigquery/pageviews/snowplow_page_views.sql index e74cc74..ef1ef55 100644 --- a/macros/adapters/bigquery/pageviews/snowplow_page_views.sql +++ b/macros/adapters/bigquery/pageviews/snowplow_page_views.sql @@ -4,13 +4,15 @@ {{ config( materialized='incremental', - partition_by='DATE(page_view_start)', + partition_by={ + 'field': 'page_view_start', + 'data_type': 'timestamp' + }, unique_key="page_view_id" ) }} {% set timezone = var('snowplow:timezone', 'UTC') %} -{% set start_date = get_most_recent_record(this, "page_view_start", "2001-01-01") %} /* General approach: find sessions that happened since the last time @@ -27,9 +29,12 @@ with all_events as ( select * from {{ ref('snowplow_base_events') }} - -- load up events from the start date, and the day before it, to ensure - -- that we capture pageviews that span midnight - where DATE(collector_tstamp) >= date_sub('{{ start_date }}', interval 1 day) + {% if is_incremental() %} + where DATE(collector_tstamp) >= date_sub( + DATE(_dbt_max_partition), + interval {{var('snowplow:page_view_lookback_days')}} day + ) + {% endif %} ), @@ -40,9 +45,6 @@ new_sessions as ( from all_events - -- only consider events for sessions that occurred on or after the start_date - where DATE(collector_tstamp) >= '{{ start_date }}' - ), relevant_events as ( diff --git a/macros/adapters/bigquery/sessions/snowplow_sessions.sql b/macros/adapters/bigquery/sessions/snowplow_sessions.sql index 27c719b..74944fb 100644 --- a/macros/adapters/bigquery/sessions/snowplow_sessions.sql +++ b/macros/adapters/bigquery/sessions/snowplow_sessions.sql @@ -4,7 +4,10 @@ {{ config( materialized='table', - partition_by='DATE(session_start)' + partition_by={ + 'field': 'session_start', + 'data_type': 'timestamp' + } ) }} diff --git a/macros/adapters/bigquery/sessions/snowplow_sessions_tmp.sql b/macros/adapters/bigquery/sessions/snowplow_sessions_tmp.sql index 0935540..b9365d2 100644 --- a/macros/adapters/bigquery/sessions/snowplow_sessions_tmp.sql +++ b/macros/adapters/bigquery/sessions/snowplow_sessions_tmp.sql @@ -4,17 +4,24 @@ {{ config( materialized='incremental', - partition_by='DATE(session_start)', + partition_by={ + 'field': 'session_start', + 'data_type': 'timestamp' + }, unique_key="session_id" ) }} -{% set start_date = get_most_recent_record(this, "session_start", "2001-01-01") %} - with all_page_views as ( select * from {{ ref('snowplow_page_views') }} - where DATE(page_view_start) >= date_sub('{{ start_date }}', interval 1 day) + + {% if is_incremental() %} + where DATE(page_view_start) >= date_sub( + DATE(_dbt_max_partition), + interval {{var('snowplow:page_view_lookback_days')}} day + ) + {% endif %} ), @@ -24,7 +31,6 @@ new_page_views as ( session_id from all_page_views - where DATE(page_view_start) >= '{{ start_date }}' ),