Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ClickHouse normalization update #10905

Merged
merged 9 commits into from Mar 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion airbyte-integrations/bases/base-normalization/Dockerfile
Expand Up @@ -28,5 +28,5 @@ WORKDIR /airbyte
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.version=0.1.73
LABEL io.airbyte.version=0.1.74
LABEL io.airbyte.name=airbyte/normalization
Expand Up @@ -5,6 +5,7 @@
- Redshift: -> https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/
- postgres: unnest() -> https://www.postgresqltutorial.com/postgresql-array/
- MSSQL: openjson() –> https://docs.microsoft.com/en-us/sql/relational-databases/json/validate-query-and-change-json-data-with-built-in-functions-sql-server?view=sql-server-ver15
- ClickHouse: ARRAY JOIN –> https://clickhouse.com/docs/zh/sql-reference/statements/select/array-join/
#}

{# cross_join_unnest ------------------------------------------------- #}
Expand All @@ -21,6 +22,10 @@
cross join unnest({{ array_col }}) as {{ array_col }}
{%- endmacro %}

{% macro clickhouse__cross_join_unnest(stream_name, array_col) -%}
ARRAY JOIN {{ array_col }}
{%- endmacro %}

{% macro oracle__cross_join_unnest(stream_name, array_col) -%}
{% do exceptions.warn("Normalization does not support unnesting for Oracle yet.") %}
{%- endmacro %}
Expand Down
Expand Up @@ -135,9 +135,9 @@

{% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
{%- if from_table|string() == '' %}
JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
{% else %}
JSONExtractRaw({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }})
JSONExtractRaw(assumeNotNull({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }})
{% endif -%}
{%- endmacro %}

Expand Down Expand Up @@ -180,7 +180,7 @@
{%- endmacro %}

{% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
{%- endmacro %}

{# json_extract_array ------------------------------------------------- #}
Expand Down Expand Up @@ -222,5 +222,5 @@
{%- endmacro %}

{% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
JSONExtractArrayRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
JSONExtractArrayRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
{%- endmacro %}
Expand Up @@ -57,6 +57,11 @@
cast({{ field }} as bit)
{%- endmacro %}

{# -- ClickHouse does not support converting string directly to Int8, it must go through int first #}
{% macro clickhouse__cast_to_boolean(field) -%}
IF(lower({{ field }}) = 'true', 1, 0)
{%- endmacro %}

{# empty_string_to_null ------------------------------------------------- #}
{% macro empty_string_to_null(field) -%}
{{ return(adapter.dispatch('empty_string_to_null')(field)) }}
Expand Down
Expand Up @@ -14,7 +14,7 @@
public class NormalizationRunnerFactory {

public static final String BASE_NORMALIZATION_IMAGE_NAME = "airbyte/normalization";
public static final String NORMALIZATION_VERSION = "0.1.73";
public static final String NORMALIZATION_VERSION = "0.1.74";

static final Map<String, ImmutablePair<String, DefaultNormalizationRunner.DestinationType>> NORMALIZATION_MAPPING =
ImmutableMap.<String, ImmutablePair<String, DefaultNormalizationRunner.DestinationType>>builder()
Expand Down
1 change: 1 addition & 0 deletions docs/understanding-airbyte/basic-normalization.md
Expand Up @@ -350,6 +350,7 @@ Therefore, in order to "upgrade" to the desired normalization version, you need

| Airbyte Version | Normalization Version | Date | Pull Request | Subject |
|:----------------| :--- | :--- | :--- | :--- |
| 0.35.61-alpha | 0.1.74 | 2022-03-24 | [\#10905](https://github.com/airbytehq/airbyte/pull/10905) | Update clickhouse dbt version |
| 0.35.60-alpha | 0.1.73 | 2022-03-25 | [\#11267](https://github.com/airbytehq/airbyte/pull/11267) | Set `--event-buffer-size` to reduce memory usage |
| 0.35.59-alpha | 0.1.72 | 2022-03-24 | [\#11093](https://github.com/airbytehq/airbyte/pull/11093) | Added Snowflake OAuth2.0 support |
| 0.35.53-alpha | 0.1.71 | 2022-03-14 | [\#11077](https://github.com/airbytehq/airbyte/pull/11077) | Enable BigQuery to handle project ID embedded inside dataset ID |
Expand Down