airbytehq · ChristopheDuong · Oct 29, 2021 · Oct 7, 2021 · Oct 14, 2021 · Oct 14, 2021
diff --git a/airbyte-integrations/bases/base-normalization/.gitignore b/airbyte-integrations/bases/base-normalization/.gitignore
@@ -12,5 +12,6 @@ integration_tests/normalization_test_output/*/*/*.json
 integration_tests/normalization_test_output/*/*/*.md
 integration_tests/normalization_test_output/*/*/macros/
 integration_tests/normalization_test_output/*/*/tests/
+integration_tests/normalization_test_output/*/*/models/dbt_data_tests_tmp/
 integration_tests/normalization_test_output/*/*/models/dbt_schema_tests/
 
diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/dbt_project.yml
@@ -42,17 +42,20 @@ quoting:
 # are materialized, and more!
 models:
   airbyte_utils:
+    +materialized: table
     generated:
       airbyte_ctes:
         +tags: airbyte_internal_cte
         +materialized: ephemeral
-      airbyte_views:
-        +tags: airbyte_internal_views
-        +materialized: view
+      airbyte_incremental:
+        +tags: incremental_tables
+        +materialized: incremental
       airbyte_tables:
         +tags: normalized_tables
         +materialized: table
-    +materialized: table
+      airbyte_views:
+        +tags: airbyte_internal_views
+        +materialized: view
 
 vars:
   dbt_utils_dispatch_list: ['airbyte_utils']
diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/dbt_project.yml
@@ -42,17 +42,22 @@ quoting:
 # are materialized, and more!
 models:
   airbyte_utils:
+    +materialized: table
     generated:
       airbyte_ctes:
         +tags: airbyte_internal_cte
         +materialized: ephemeral
-      airbyte_views:
-        +tags: airbyte_internal_views
-        +materialized: view
+      airbyte_incremental:
+        +tags: incremental_tables
+        # incremental is not enabled for MySql yet
+        #+materialized: incremental
+        +materialized: table
       airbyte_tables:
         +tags: normalized_tables
         +materialized: table
-    +materialized: table
+      airbyte_views:
+        +tags: airbyte_internal_views
+        +materialized: view
 
 vars:
   dbt_utils_dispatch_list: ['airbyte_utils']
diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/dbt_project.yml
@@ -40,17 +40,22 @@ quoting:
 # are materialized, and more!
 models:
   airbyte_utils:
+    +materialized: table
     generated:
       airbyte_ctes:
         +tags: airbyte_internal_cte
         +materialized: ephemeral
-      airbyte_views:
-        +tags: airbyte_internal_views
-        +materialized: view
+      airbyte_incremental:
+        +tags: incremental_tables
+        # incremental is not enabled for Oracle yet
+        #+materialized: incremental
+        +materialized: table
       airbyte_tables:
         +tags: normalized_tables
         +materialized: table
-    +materialized: table
+      airbyte_views:
+        +tags: airbyte_internal_views
+        +materialized: view
 
 vars:
   dbt_utils_dispatch_list: ['airbyte_utils']
diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template/dbt_project.yml
@@ -42,17 +42,21 @@ quoting:
 # are materialized, and more!
 models:
   airbyte_utils:
+    +materialized: table
     generated:
       airbyte_ctes:
         +tags: airbyte_internal_cte
         +materialized: ephemeral
-      airbyte_views:
-        +tags: airbyte_internal_views
-        +materialized: view
+      airbyte_incremental:
+        +tags: incremental_tables
+        +materialized: incremental
+        +on_schema_change: append_new_columns
       airbyte_tables:
         +tags: normalized_tables
         +materialized: table
-    +materialized: table
+      airbyte_views:
+        +tags: airbyte_internal_views
+        +materialized: view
 
 dispatch:
   - macro_namespace: dbt_utils

diff --git a/...tions/bases/base-normalization/dbt-project-template/macros/cross_db_utils/drop_schema.sql b/...tions/bases/base-normalization/dbt-project-template/macros/cross_db_utils/drop_schema.sql
diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql
@@ -0,0 +1,9 @@
+{#
+    This macro controls how incremental models are updated in Airbyte's normalization step
+#}
+
+{%- macro incremental_clause(col_emitted_at) -%}
+{% if is_incremental() %}
+and {{ col_emitted_at }} > (select max({{ col_emitted_at }}) from {{ this }})
+{% endif %}
+{%- endmacro -%}
diff --git a/...integrations/bases/base-normalization/dbt-project-template/macros/should_full_refresh.sql b/...integrations/bases/base-normalization/dbt-project-template/macros/should_full_refresh.sql
@@ -0,0 +1,51 @@
+{#
+    This overrides the behavior of the macro `should_full_refresh` so full refresh are triggered if:
+    - the dbt cli is run with --full-refresh flag or the model is configured explicitly to full_refresh
+    - the column _airbyte_ab_id does not exists in the normalized tables and make sure it is well populated.
+#}
+
+{%- macro need_full_refresh(col_ab_id, target_table=this) -%}
+    {%- if not execute -%}
+        {{ return(false) }}
+    {%- endif -%}
+    {%- set found_column = [] %}
+    {%- set cols = adapter.get_columns_in_relation(target_table) -%}
+    {%- for col in cols -%}
+        {%- if col.column == col_ab_id -%}
+            {% do found_column.append(col.column) %}
+        {%- endif -%}
+    {%- endfor -%}
+    {%- if found_column -%}
+        {{ return(false) }}
+    {%- else -%}
+        {{ dbt_utils.log_info(target_table ~ "." ~ col_ab_id ~ " does not exist. The table needs to be rebuilt in full_refresh") }}
+        {{ return(true) }}
+    {%- endif -%}
+{%- endmacro -%}
+
+{%- macro should_full_refresh() -%}
+  {% set config_full_refresh = config.get('full_refresh') %}
+  {%- if config_full_refresh is none -%}
+    {% set config_full_refresh = flags.FULL_REFRESH %}
+  {%- endif -%}
+  {%- if not config_full_refresh -%}
+    {% set config_full_refresh = need_full_refresh(get_col_ab_id(), this) %}
+  {%- endif -%}
+  {% do return(config_full_refresh) %}
+{%- endmacro -%}
+
+{%- macro get_col_ab_id() -%}
+  {{ adapter.dispatch('get_col_ab_id')() }}
+{%- endmacro -%}
+
+{%- macro default__get_col_ab_id() -%}
+    _airbyte_ab_id
+{%- endmacro -%}
+
+{%- macro oracle__get_col_ab_id() -%}
+    "_AIRBYTE_AB_ID"
+{%- endmacro -%}
+
+{%- macro snowflake__get_col_ab_id() -%}
+    _AIRBYTE_AB_ID
+{%- endmacro -%}
diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py
@@ -298,21 +298,29 @@ def get_normalization_image(destination_type: DestinationType) -> str:
         else:
             return "airbyte/normalization:dev"
 
-    def dbt_run(self, destination_type: DestinationType, test_root_dir: str):
+    def dbt_check(self, destination_type: DestinationType, test_root_dir: str):
         """
         Run the dbt CLI to perform transformations on the test raw data in the destination
         """
         normalization_image: str = self.get_normalization_image(destination_type)
         # Perform sanity check on dbt project settings
         assert self.run_check_dbt_command(normalization_image, "debug", test_root_dir)
         assert self.run_check_dbt_command(normalization_image, "deps", test_root_dir)
-        final_sql_files = os.path.join(test_root_dir, "final")
+
+    def dbt_run(self, destination_type: DestinationType, test_root_dir: str, output_dir: str = "final", force_full_refresh: bool = False):
+        """
+        Run the dbt CLI to perform transformations on the test raw data in the destination
+        """
+        normalization_image: str = self.get_normalization_image(destination_type)
+        final_sql_files = os.path.join(test_root_dir, output_dir)
         shutil.rmtree(final_sql_files, ignore_errors=True)
         # Compile dbt models files into destination sql dialect, then run the transformation queries
-        assert self.run_check_dbt_command(normalization_image, "run", test_root_dir)
+        assert self.run_check_dbt_command(normalization_image, "run", test_root_dir, output_dir, force_full_refresh)
 
     @staticmethod
-    def run_check_dbt_command(normalization_image: str, command: str, cwd: str) -> bool:
+    def run_check_dbt_command(
+        normalization_image: str, command: str, cwd: str, output_dir: str = "final", force_full_refresh: bool = False
+    ) -> bool:
         """
         Run dbt subprocess while checking and counting for "ERROR", "FAIL" or "WARNING" printed in its outputs
         """
@@ -327,7 +335,7 @@ def run_check_dbt_command(normalization_image: str, command: str, cwd: str) -> b
             "-v",
             f"{cwd}/build:/build",
             "-v",
-            f"{cwd}/final:/build/run/airbyte_utils/models/generated",
+            f"{cwd}/{output_dir}:/build/run/airbyte_utils/models/generated",
             "-v",
             "/tmp:/tmp",
             "--network",
@@ -340,6 +348,9 @@ def run_check_dbt_command(normalization_image: str, command: str, cwd: str) -> b
             "--profiles-dir=/workspace",
             "--project-dir=/workspace",
         ]
+        if force_full_refresh:
+            commands.append("--full-refresh")
+            command = f"{command} --full-refresh"
         print("Executing: ", " ".join(commands))
         print(f"Equivalent to: dbt {command} --profiles-dir={cwd} --project-dir={cwd}")
         with open(os.path.join(cwd, "dbt_output.log"), "ab") as f:

diff --git a/...bles/test_normalization/exchange_rate.sql → ...ntal/test_normalization/exchange_rate.sql b/...bles/test_normalization/exchange_rate.sql → ...ntal/test_normalization/exchange_rate.sql
@@ -1,6 +1,7 @@
 
+
 
-  create  table "postgres".test_normalization."exchange_rate__dbt_tmp"
+  create  table "postgres".test_normalization."exchange_rate"
   as (
 
 -- Final base SQL model
@@ -13,8 +14,12 @@ select
     hkd_special___characters,
     nzd,
     usd,
+    _airbyte_ab_id,
     _airbyte_emitted_at,
     _airbyte_exchange_rate_hashid
 from "postgres"._airbyte_test_normalization."exchange_rate_ab3"
 -- exchange_rate from "postgres".test_normalization._airbyte_raw_exchange_rate
-  );
+where 1 = 1
+
+  );
+
diff --git a/...est_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/...est_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql
@@ -1,4 +1,7 @@
-{{ config(schema="_airbyte_test_normalization", tags=["top-level-intermediate"]) }}
+{{ config(
+    schema = "_airbyte_test_normalization",
+    tags = [ "top-level-intermediate" ]
+) }}
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 select
     {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }},
@@ -9,7 +12,9 @@ select
     {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters,
     {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd,
     {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd,
+    _airbyte_ab_id,
     _airbyte_emitted_at
 from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} as table_alias
 -- exchange_rate
+where 1 = 1
 
diff --git a/...est_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/...est_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql
@@ -1,4 +1,7 @@
-{{ config(schema="_airbyte_test_normalization", tags=["top-level-intermediate"]) }}
+{{ config(
+    schema = "_airbyte_test_normalization",
+    tags = [ "top-level-intermediate" ]
+) }}
 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
 select
     cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }},
@@ -9,7 +12,9 @@ select
     cast(hkd_special___characters as {{ dbt_utils.type_string() }}) as hkd_special___characters,
     cast(nzd as {{ dbt_utils.type_float() }}) as nzd,
     cast(usd as {{ dbt_utils.type_float() }}) as usd,
+    _airbyte_ab_id,
     _airbyte_emitted_at
 from {{ ref('exchange_rate_ab1') }}
 -- exchange_rate
+where 1 = 1
 
diff --git a/...est_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/...est_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql
@@ -1,4 +1,7 @@
-{{ config(schema="_airbyte_test_normalization", tags=["top-level-intermediate"]) }}
+{{ config(
+    schema = "_airbyte_test_normalization",
+    tags = [ "top-level-intermediate" ]
+) }}
 -- SQL model to build a hash column based on the values of this record
 select
     {{ dbt_utils.surrogate_key([
@@ -14,4 +17,5 @@ select
     tmp.*
 from {{ ref('exchange_rate_ab2') }} tmp
 -- exchange_rate
+where 1 = 1
 
diff --git a/...s/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/...s/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql
@@ -0,0 +1,38 @@
+{{ config(
+    schema = "test_normalization",
+    unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'),
+    tags = [ "top-level" ]
+) }}
+-- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
+select
+  {{ dbt_utils.surrogate_key([
+        adapter.quote('id'),
+        'currency',
+        'nzd',
+  ]) }} as _airbyte_unique_key,
+    {{ adapter.quote('id') }},
+    currency,
+    {{ adapter.quote('date') }},
+    timestamp_col,
+    {{ adapter.quote('HKD@spéçiäl & characters') }},
+    hkd_special___characters,
+    nzd,
+    usd,
+  {{ adapter.quote('date') }} as _airbyte_start_at,
+  lag({{ adapter.quote('date') }}) over (
+    partition by {{ adapter.quote('id') }}, currency, cast(nzd as {{ dbt_utils.type_string() }})
+    order by {{ adapter.quote('date') }} is null asc, {{ adapter.quote('date') }} desc, _airbyte_emitted_at desc
+  ) as _airbyte_end_at,
+  case when lag({{ adapter.quote('date') }}) over (
+    partition by {{ adapter.quote('id') }}, currency, cast(nzd as {{ dbt_utils.type_string() }})
+    order by {{ adapter.quote('date') }} is null asc, {{ adapter.quote('date') }} desc, _airbyte_emitted_at desc
+  ) is null  then 1 else 0 end as _airbyte_active_row,
+  _airbyte_ab_id,
+  _airbyte_emitted_at,
+  _airbyte_dedup_exchange_rate_hashid
+from {{ ref('dedup_exchange_rate_ab4') }}
+-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
+where 1 = 1
+and _airbyte_row_num = 1
+{{ incremental_clause('_airbyte_emitted_at') }}
+
diff --git a/...e_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/...e_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql
@@ -0,0 +1,25 @@
+{{ config(
+    schema = "test_normalization",
+    unique_key = "_airbyte_unique_key",
+    tags = [ "top-level" ]
+) }}
+-- Final base SQL model
+select
+    _airbyte_unique_key,
+    {{ adapter.quote('id') }},
+    currency,
+    {{ adapter.quote('date') }},
+    timestamp_col,
+    {{ adapter.quote('HKD@spéçiäl & characters') }},
+    hkd_special___characters,
+    nzd,
+    usd,
+    _airbyte_ab_id,
+    _airbyte_emitted_at,
+    _airbyte_dedup_exchange_rate_hashid
+from {{ ref('dedup_exchange_rate_scd') }}
+-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
+where 1 = 1
+and _airbyte_active_row = 1
+{{ incremental_clause('_airbyte_emitted_at') }}
+