Skip to content

Commit

Permalink
🎉 Base Normalization: handle airbyte_type from stream schema in norma…
Browse files Browse the repository at this point in the history
…lization (#13591)

* add datatypes

* up

* up

* add MySQL

* add MSSQL

* fix

* add macros

* add macros

* upd

* upd

* upd for clickhouse

* Return datetime2 for MS SQL

* Upd time type for mysql

* Upd datetime for MySQL

* update

* upd date type for clickhouse

* up

* auto-generate

* bump version

* bump version
  • Loading branch information
annalvova05 committed Jul 26, 2022
1 parent 6d812bf commit 4963698
Show file tree
Hide file tree
Showing 67 changed files with 897 additions and 261 deletions.
2 changes: 1 addition & 1 deletion airbyte-integrations/bases/base-normalization/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ WORKDIR /airbyte
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.version=0.2.10
LABEL io.airbyte.version=0.2.11
LABEL io.airbyte.name=airbyte/normalization
120 changes: 99 additions & 21 deletions ...rations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -116,22 +116,6 @@
{% endmacro %}


{# timestamp ------------------------------------------------- --#}
{% macro mysql__type_timestamp() %}
time
{% endmacro %}

{%- macro sqlserver__type_timestamp() -%}
{#-- in TSQL timestamp is really datetime --#}
{#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#}
datetime
{%- endmacro -%}

{% macro clickhouse__type_timestamp() %}
DateTime64
{% endmacro %}


{# timestamp with time zone ------------------------------------------------- #}

{%- macro type_timestamp_with_timezone() -%}
Expand All @@ -146,23 +130,117 @@
timestamp
{% endmacro %}

{#-- MySQL doesnt allow cast operation to work with TIMESTAMP so we have to use char --#}
{#-- MySQL doesnt allow cast operation with nullif to work with DATETIME and doesn't support storing of timezone so we have to use char --#}
{#-- https://bugs.mysql.com/bug.php?id=77805 --#}
{%- macro mysql__type_timestamp_with_timezone() -%}
char
char(1024)
{%- endmacro -%}

{% macro oracle__type_timestamp_with_timezone() %}
varchar2(4000)
{% endmacro %}

{%- macro sqlserver__type_timestamp_with_timezone() -%}
datetimeoffset
{%- endmacro -%}

{% macro redshift__type_timestamp_with_timezone() %}
TIMESTAMPTZ
{% endmacro %}

{% macro clickhouse__type_timestamp_with_timezone() %}
DateTime64
{% endmacro %}


{# timestamp without time zone ------------------------------------------------- #}

{%- macro type_timestamp_without_timezone() -%}
{{ adapter.dispatch('type_timestamp_without_timezone')() }}
{%- endmacro -%}

{% macro default__type_timestamp_without_timezone() %}
timestamp
{% endmacro %}

{%- macro sqlserver__type_timestamp_without_timezone() -%}
{#-- in TSQL timestamp is really datetime or datetime2 --#}
{#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#}
datetime2
{%- endmacro -%}

{% macro clickhouse__type_timestamp_with_timezone() %}
DateTime64
{% macro bigquery__type_timestamp_without_timezone() %}
datetime
{% endmacro %}

{% macro oracle__type_timestamp_without_timezone() %}
varchar2(4000)
{% endmacro %}

{% macro redshift__type_timestamp_without_timezone() %}
TIMESTAMP
{% endmacro %}


{# time without time zone ------------------------------------------------- #}

{%- macro type_time_without_timezone() -%}
{{ adapter.dispatch('type_time_without_timezone')() }}
{%- endmacro -%}

{% macro default__type_time_without_timezone() %}
time
{% endmacro %}

{% macro oracle__type_time_without_timezone() %}
varchar2(4000)
{% endmacro %}

{% macro redshift__type_time_without_timezone() %}
TIME
{% endmacro %}

{% macro clickhouse__type_time_without_timezone() %}
String
{% endmacro %}


{# time with time zone ------------------------------------------------- #}

{%- macro type_time_with_timezone() -%}
{{ adapter.dispatch('type_time_with_timezone')() }}
{%- endmacro -%}

{% macro default__type_time_with_timezone() %}
time with time zone
{% endmacro %}

{%- macro mysql__type_time_with_timezone() -%}
char(1024)
{%- endmacro -%}

{%- macro sqlserver__type_time_with_timezone() -%}
NVARCHAR(max)
{%- endmacro -%}

{% macro bigquery__type_time_with_timezone() %}
STRING
{% endmacro %}

{% macro oracle__type_time_with_timezone() %}
varchar2(4000)
{% endmacro %}

{% macro snowflake__type_time_with_timezone() %}
varchar
{% endmacro %}

{% macro redshift__type_time_with_timezone() %}
TIMETZ
{% endmacro %}

{% macro clickhouse__type_time_with_timezone() %}
String
{% endmacro %}


Expand All @@ -185,5 +263,5 @@
{%- endmacro -%}

{% macro clickhouse__type_date() %}
Date
Date32
{% endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ select
json_extract_scalar(_airbyte_data, "$['NZD']") as NZD,
json_extract_scalar(_airbyte_data, "$['USD']") as USD,
json_extract_scalar(_airbyte_data, "$['column___with__quotes']") as column___with__quotes,
json_extract_scalar(_airbyte_data, "$['datetime_tz']") as datetime_tz,
json_extract_scalar(_airbyte_data, "$['datetime_no_tz']") as datetime_no_tz,
json_extract_scalar(_airbyte_data, "$['time_tz']") as time_tz,
json_extract_scalar(_airbyte_data, "$['time_no_tz']") as time_no_tz,
_airbyte_ab_id,
_airbyte_emitted_at,
CURRENT_TIMESTAMP() as _airbyte_normalized_at
Expand Down Expand Up @@ -58,6 +62,18 @@ select
cast(column___with__quotes as
string
) as column___with__quotes,
cast(nullif(datetime_tz, '') as
timestamp
) as datetime_tz,
cast(nullif(datetime_no_tz, '') as
datetime
) as datetime_no_tz,
cast(nullif(time_tz, '') as
STRING
) as time_tz,
cast(nullif(time_no_tz, '') as
time
) as time_no_tz,
_airbyte_ab_id,
_airbyte_emitted_at,
CURRENT_TIMESTAMP() as _airbyte_normalized_at
Expand Down Expand Up @@ -87,6 +103,14 @@ select
string
), ''), '-', coalesce(cast(column___with__quotes as
string
), ''), '-', coalesce(cast(datetime_tz as
string
), ''), '-', coalesce(cast(datetime_no_tz as
string
), ''), '-', coalesce(cast(time_tz as
string
), ''), '-', coalesce(cast(time_no_tz as
string
), '')) as
string
))) as _airbyte_exchange_rate_hashid,
Expand All @@ -106,6 +130,10 @@ select
NZD,
USD,
column___with__quotes,
datetime_tz,
datetime_no_tz,
time_tz,
time_no_tz,
_airbyte_ab_id,
_airbyte_emitted_at,
CURRENT_TIMESTAMP() as _airbyte_normalized_at,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ select
NZD,
USD,
column___with__quotes,
datetime_tz,
datetime_no_tz,
time_tz,
time_no_tz,
_airbyte_ab_id,
_airbyte_emitted_at,
{{ current_timestamp() }} as _airbyte_normalized_at,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ select
json_extract_scalar(_airbyte_data, "$['NZD']") as NZD,
json_extract_scalar(_airbyte_data, "$['USD']") as USD,
json_extract_scalar(_airbyte_data, "$['column___with__quotes']") as column___with__quotes,
json_extract_scalar(_airbyte_data, "$['datetime_tz']") as datetime_tz,
json_extract_scalar(_airbyte_data, "$['datetime_no_tz']") as datetime_no_tz,
json_extract_scalar(_airbyte_data, "$['time_tz']") as time_tz,
json_extract_scalar(_airbyte_data, "$['time_no_tz']") as time_no_tz,
_airbyte_ab_id,
_airbyte_emitted_at,
CURRENT_TIMESTAMP() as _airbyte_normalized_at
Expand Down Expand Up @@ -58,6 +62,18 @@ select
cast(column___with__quotes as
string
) as column___with__quotes,
cast(nullif(datetime_tz, '') as
timestamp
) as datetime_tz,
cast(nullif(datetime_no_tz, '') as
datetime
) as datetime_no_tz,
cast(nullif(time_tz, '') as
STRING
) as time_tz,
cast(nullif(time_no_tz, '') as
time
) as time_no_tz,
_airbyte_ab_id,
_airbyte_emitted_at,
CURRENT_TIMESTAMP() as _airbyte_normalized_at
Expand Down Expand Up @@ -87,6 +103,14 @@ select
string
), ''), '-', coalesce(cast(column___with__quotes as
string
), ''), '-', coalesce(cast(datetime_tz as
string
), ''), '-', coalesce(cast(datetime_no_tz as
string
), ''), '-', coalesce(cast(time_tz as
string
), ''), '-', coalesce(cast(time_no_tz as
string
), '')) as
string
))) as _airbyte_exchange_rate_hashid,
Expand All @@ -106,6 +130,10 @@ select
NZD,
USD,
column___with__quotes,
datetime_tz,
datetime_no_tz,
time_tz,
time_no_tz,
_airbyte_ab_id,
_airbyte_emitted_at,
CURRENT_TIMESTAMP() as _airbyte_normalized_at,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ select
JSONExtractRaw(assumeNotNull(_airbyte_data), 'NZD') as NZD,
JSONExtractRaw(assumeNotNull(_airbyte_data), 'USD') as USD,
JSONExtractRaw(assumeNotNull(_airbyte_data), 'column`_''with\"_quotes') as "column`_'with""_quotes",
JSONExtractRaw(assumeNotNull(_airbyte_data), 'datetime_tz') as datetime_tz,
JSONExtractRaw(assumeNotNull(_airbyte_data), 'datetime_no_tz') as datetime_no_tz,
JSONExtractRaw(assumeNotNull(_airbyte_data), 'time_tz') as time_tz,
JSONExtractRaw(assumeNotNull(_airbyte_data), 'time_no_tz') as time_no_tz,
_airbyte_ab_id,
_airbyte_emitted_at,
now() as _airbyte_normalized_at
Expand All @@ -40,7 +44,7 @@ select
BIGINT
') as id,
nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency,
parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date,
toDate(parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, '')))) as date,
parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col,
accurateCastOrNull("HKD@spéçiäl & characters", '
Float64
Expand All @@ -53,6 +57,14 @@ select
Float64
') as USD,
nullif(accurateCastOrNull(trim(BOTH '"' from "column`_'with""_quotes"), 'String'), 'null') as "column`_'with""_quotes",
parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(datetime_tz, ''))) as datetime_tz,
parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(datetime_no_tz, ''))) as datetime_no_tz,
nullif(accurateCastOrNull(trim(BOTH '"' from time_tz), '
String
'), 'null') as time_tz,
nullif(accurateCastOrNull(trim(BOTH '"' from time_no_tz), '
String
'), 'null') as time_no_tz,
_airbyte_ab_id,
_airbyte_emitted_at,
now() as _airbyte_normalized_at
Expand Down Expand Up @@ -90,7 +102,19 @@ select
toString(USD) || '~' ||


toString("column`_'with""_quotes")
toString("column`_'with""_quotes") || '~' ||


toString(datetime_tz) || '~' ||


toString(datetime_no_tz) || '~' ||


toString(time_tz) || '~' ||


toString(time_no_tz)

))) as _airbyte_exchange_rate_hashid,
tmp.*
Expand All @@ -109,6 +133,10 @@ select
NZD,
USD,
"column`_'with""_quotes",
datetime_tz,
datetime_no_tz,
time_tz,
time_no_tz,
_airbyte_ab_id,
_airbyte_emitted_at,
now() as _airbyte_normalized_at,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ select
BIGINT
') as id,
nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency,
parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date,
toDate(parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, '')))) as date,
parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col,
accurateCastOrNull("HKD@spéçiäl & characters", '
Float64
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
select
accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id,
nullif(accurateCastOrNull(trim(BOTH '"' from currency), '{{ dbt_utils.type_string() }}'), 'null') as currency,
parseDateTimeBestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('date') }})) as date,
toDate(parseDateTimeBestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('date') }}))) as date,
parseDateTime64BestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('timestamp_col') }})) as timestamp_col,
accurateCastOrNull({{ quote('HKD@spéçiäl & characters') }}, '{{ dbt_utils.type_float() }}') as {{ quote('HKD@spéçiäl & characters') }},
nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), '{{ dbt_utils.type_string() }}'), 'null') as HKD_special___characters,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ select
NZD,
USD,
{{ quote('column`_\'with""_quotes') }},
datetime_tz,
datetime_no_tz,
time_tz,
time_no_tz,
_airbyte_ab_id,
_airbyte_emitted_at,
{{ current_timestamp() }} as _airbyte_normalized_at,
Expand Down

0 comments on commit 4963698

Please sign in to comment.