Skip to content

Commit

Permalink
Normalization: handle non-object top-level schemas; treat binary data…
Browse files Browse the repository at this point in the history
… as string (#22165)

* handle dumb top-level schemas

* version bump

* also definitions

* treat binary as string

* fallback case

* format

* new variable
  • Loading branch information
edgao committed Jan 31, 2023
1 parent 2c97aa3 commit 8276d03
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
icon: bigquery.svg
normalizationConfig:
normalizationRepository: airbyte/normalization
normalizationTag: 0.3.1
normalizationTag: 0.3.2
normalizationIntegrationType: bigquery
supportsDbt: true
resourceRequirements:
Expand Down Expand Up @@ -91,7 +91,7 @@
releaseStage: alpha
normalizationConfig:
normalizationRepository: airbyte/normalization-clickhouse
normalizationTag: 0.3.1
normalizationTag: 0.3.2
normalizationIntegrationType: clickhouse
supportsDbt: true
- name: Cloudflare R2
Expand Down Expand Up @@ -213,7 +213,7 @@
releaseStage: alpha
normalizationConfig:
normalizationRepository: airbyte/normalization-mssql
normalizationTag: 0.3.1
normalizationTag: 0.3.2
normalizationIntegrationType: mssql
supportsDbt: true
- name: MeiliSearch
Expand All @@ -239,7 +239,7 @@
releaseStage: alpha
normalizationConfig:
normalizationRepository: airbyte/normalization-mysql
normalizationTag: 0.3.1
normalizationTag: 0.3.2
normalizationIntegrationType: mysql
supportsDbt: true
- name: Oracle
Expand All @@ -251,7 +251,7 @@
releaseStage: alpha
normalizationConfig:
normalizationRepository: airbyte/normalization-oracle
normalizationTag: 0.3.1
normalizationTag: 0.3.2
normalizationIntegrationType: oracle
supportsDbt: true
- name: Postgres
Expand All @@ -263,7 +263,7 @@
releaseStage: alpha
normalizationConfig:
normalizationRepository: airbyte/normalization
normalizationTag: 0.3.1
normalizationTag: 0.3.2
normalizationIntegrationType: postgres
supportsDbt: true
- name: Pulsar
Expand Down Expand Up @@ -295,7 +295,7 @@
icon: redshift.svg
normalizationConfig:
normalizationRepository: airbyte/normalization-redshift
normalizationTag: 0.3.1
normalizationTag: 0.3.2
normalizationIntegrationType: redshift
supportsDbt: true
resourceRequirements:
Expand Down Expand Up @@ -353,7 +353,7 @@
icon: snowflake.svg
normalizationConfig:
normalizationRepository: airbyte/normalization-snowflake
normalizationTag: 0.3.1
normalizationTag: 0.3.2
normalizationIntegrationType: snowflake
supportsDbt: true
resourceRequirements:
Expand Down Expand Up @@ -407,7 +407,7 @@
releaseStage: alpha
normalizationConfig:
normalizationRepository: airbyte/normalization-tidb
normalizationTag: 0.3.1
normalizationTag: 0.3.2
normalizationIntegrationType: tidb
supportsDbt: true
- name: Typesense
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/bases/base-normalization/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ WORKDIR /airbyte
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.version=0.3.1
LABEL io.airbyte.version=0.3.2
LABEL io.airbyte.name=airbyte/normalization
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,17 @@ def build_stream_processor(
primary_key = get_field(configured_stream, "primary_key", f"Undefined primary key for stream {stream_name}")

message = f"'json_schema'.'properties' are not defined for stream {stream_name}"
properties = get_field(get_field(stream_config, "json_schema", message), "properties", message)
stream_schema = get_field(stream_config, "json_schema", f"'json_schema' is not defined for stream {stream_name}")
if "properties" in stream_schema:
properties = get_field(stream_schema, "properties", message)
elif "oneOf" in stream_schema:
options = list(filter(lambda option: "properties" in option, stream_schema["oneOf"]))
if len(options) == 0:
raise KeyError(f"Stream {stream_name} does not have any properties")
# If there are multiple oneOf options, just pick the first one - we don't really support oneOf to begin with
properties = options[0]["properties"]
else:
raise KeyError(f"Stream {stream_name} does not have any properties and no oneOf option with properties")

from_table = dbt_macro.Source(schema_name, raw_table_name)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,14 @@ def is_reftype(definition: dict) -> bool:


def is_string(definition: dict) -> bool:
return is_type_included(definition, get_reftype_function(data_type.STRING_TYPE))
return is_type_included(definition, get_reftype_function(data_type.STRING_TYPE)) or is_type_included(
definition, get_reftype_function(data_type.BINARY_DATA_TYPE)
)


def is_binary_datatype(definition: dict) -> bool:
return is_type_included(definition, get_reftype_function(data_type.BINARY_DATA_TYPE))
return False
# return is_type_included(definition, get_reftype_function(data_type.BINARY_DATA_TYPE))


def is_datetime(definition: dict) -> bool:
Expand Down
1 change: 1 addition & 0 deletions docs/understanding-airbyte/basic-normalization.md
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ Therefore, in order to "upgrade" to the desired normalization version, you need

| Airbyte Version | Normalization Version | Date | Pull Request | Subject |
|:----------------|:----------------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------|
| | 0.3.2 | 2023-01-31 | [\#22165](https://github.com/airbytehq/airbyte/pull/22165) | Fix support for non-object top-level schemas |
| | 0.3.1 | 2023-01-31 | [\#22161](https://github.com/airbytehq/airbyte/pull/22161) | Fix handling for combined primitive types |
| | 0.3.0 | 2023-01-30 | [\#19721](https://github.com/airbytehq/airbyte/pull/19721) | Update normalization to airbyte-protocol v1.0.0 |
| | 0.2.25 | 2022-12-05 | [\#19573](https://github.com/airbytehq/airbyte/pull/19573) | Update Clickhouse dbt version |
Expand Down

0 comments on commit 8276d03

Please sign in to comment.