airbyte.types

Type conversion methods for SQL Caches.

  1# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
  2
  3"""Type conversion methods for SQL Caches."""
  4from __future__ import annotations
  5
  6from typing import cast
  7
  8import sqlalchemy
  9from rich import print
 10
 11
 12# Compare to documentation here: https://docs.airbyte.com/understanding-airbyte/supported-data-types
 13CONVERSION_MAP = {
 14    "string": sqlalchemy.types.VARCHAR,
 15    "integer": sqlalchemy.types.BIGINT,
 16    "number": sqlalchemy.types.DECIMAL,
 17    "boolean": sqlalchemy.types.BOOLEAN,
 18    "date": sqlalchemy.types.DATE,
 19    "timestamp_with_timezone": sqlalchemy.types.TIMESTAMP,
 20    "timestamp_without_timezone": sqlalchemy.types.TIMESTAMP,
 21    "time_with_timezone": sqlalchemy.types.TIME,
 22    "time_without_timezone": sqlalchemy.types.TIME,
 23    # Technically 'object' and 'array' as JSON Schema types, not airbyte types.
 24    # We include them here for completeness.
 25    "object": sqlalchemy.types.JSON,
 26    "array": sqlalchemy.types.JSON,
 27}
 28
 29
 30class SQLTypeConversionError(Exception):
 31    """An exception to be raised when a type conversion fails."""
 32
 33
 34def _get_airbyte_type(  # noqa: PLR0911  # Too many return statements
 35    json_schema_property_def: dict[str, str | dict | list],
 36) -> tuple[str, str | None]:
 37    """Get the airbyte type and subtype from a JSON schema property definition.
 38
 39    Subtype is only used for array types. Otherwise, subtype will return None.
 40    """
 41    airbyte_type = cast(str, json_schema_property_def.get("airbyte_type", None))
 42    if airbyte_type:
 43        return airbyte_type, None
 44
 45    json_schema_type = json_schema_property_def.get("type", None)
 46    json_schema_format = json_schema_property_def.get("format", None)
 47
 48    # if json_schema_type is an array of two strings with one of them being null, pick the other one
 49    # this strategy is often used by connectors to indicate a field might not be set all the time
 50    if isinstance(json_schema_type, list):
 51        non_null_types = [t for t in json_schema_type if t != "null"]
 52        if len(non_null_types) == 1:
 53            json_schema_type = non_null_types[0]
 54
 55    if json_schema_type == "string":
 56        if json_schema_format == "date":
 57            return "date", None
 58
 59        if json_schema_format == "date-time":
 60            return "timestamp_with_timezone", None
 61
 62        if json_schema_format == "time":
 63            return "time_without_timezone", None
 64
 65    if json_schema_type in ["string", "number", "boolean", "integer"]:
 66        return cast(str, json_schema_type), None
 67
 68    if json_schema_type == "object":
 69        return "object", None
 70
 71    if json_schema_type == "array":
 72        items_def = json_schema_property_def.get("items", None)
 73        if isinstance(items_def, dict):
 74            try:
 75                subtype, _ = _get_airbyte_type(items_def)
 76            except SQLTypeConversionError:
 77                # We have enough information, so we can ignore parsing errors on subtype.
 78                subtype = None
 79
 80            return "array", subtype
 81
 82        return "array", None
 83
 84    err_msg = f"Could not determine airbyte type from JSON schema type: {json_schema_property_def}"
 85    raise SQLTypeConversionError(err_msg)
 86
 87
 88class SQLTypeConverter:
 89    """A base class to perform type conversions."""
 90
 91    def __init__(
 92        self,
 93        conversion_map: dict | None = None,
 94    ) -> None:
 95        self.conversion_map = conversion_map or CONVERSION_MAP
 96
 97    @classmethod
 98    def get_string_type(cls) -> sqlalchemy.types.TypeEngine:
 99        """Get the type to use for string data."""
100        return sqlalchemy.types.VARCHAR()
101
102    @classmethod
103    def get_failover_type(cls) -> sqlalchemy.types.TypeEngine:
104        """Get the 'last resort' type to use if no other type is found."""
105        return cls.get_string_type()
106
107    @classmethod
108    def get_json_type(cls) -> sqlalchemy.types.TypeEngine:
109        """Get the type to use for nested JSON data."""
110        return sqlalchemy.types.JSON()
111
112    def to_sql_type(
113        self,
114        json_schema_property_def: dict[str, str | dict | list],
115    ) -> sqlalchemy.types.TypeEngine:
116        """Convert a value to a SQL type."""
117        try:
118            airbyte_type, _ = _get_airbyte_type(json_schema_property_def)
119            return self.conversion_map[airbyte_type]()
120        except SQLTypeConversionError:
121            print(f"Could not determine airbyte type from JSON schema: {json_schema_property_def}")
122        except KeyError:
123            print(f"Could not find SQL type for airbyte type: {airbyte_type}")
124
125        json_schema_type = json_schema_property_def.get("type", None)
126        json_schema_format = json_schema_property_def.get("format", None)
127
128        if json_schema_type == "string" and json_schema_format == "date":
129            return sqlalchemy.types.DATE()
130
131        if json_schema_type == "string" and json_schema_format == "date-time":
132            return sqlalchemy.types.TIMESTAMP()
133
134        if json_schema_type == "array":
135            return sqlalchemy.types.JSON()
136
137        if json_schema_type == "object":
138            return sqlalchemy.types.JSON()
139
140        return self.get_failover_type()
CONVERSION_MAP = {'string': <class 'sqlalchemy.sql.sqltypes.VARCHAR'>, 'integer': <class 'sqlalchemy.sql.sqltypes.BIGINT'>, 'number': <class 'sqlalchemy.sql.sqltypes.DECIMAL'>, 'boolean': <class 'sqlalchemy.sql.sqltypes.BOOLEAN'>, 'date': <class 'sqlalchemy.sql.sqltypes.DATE'>, 'timestamp_with_timezone': <class 'sqlalchemy.sql.sqltypes.TIMESTAMP'>, 'timestamp_without_timezone': <class 'sqlalchemy.sql.sqltypes.TIMESTAMP'>, 'time_with_timezone': <class 'sqlalchemy.sql.sqltypes.TIME'>, 'time_without_timezone': <class 'sqlalchemy.sql.sqltypes.TIME'>, 'object': <class 'sqlalchemy.sql.sqltypes.JSON'>, 'array': <class 'sqlalchemy.sql.sqltypes.JSON'>}
class SQLTypeConversionError(builtins.Exception):
31class SQLTypeConversionError(Exception):
32    """An exception to be raised when a type conversion fails."""

An exception to be raised when a type conversion fails.

Inherited Members
builtins.Exception
Exception
builtins.BaseException
with_traceback
args
class SQLTypeConverter:
 89class SQLTypeConverter:
 90    """A base class to perform type conversions."""
 91
 92    def __init__(
 93        self,
 94        conversion_map: dict | None = None,
 95    ) -> None:
 96        self.conversion_map = conversion_map or CONVERSION_MAP
 97
 98    @classmethod
 99    def get_string_type(cls) -> sqlalchemy.types.TypeEngine:
100        """Get the type to use for string data."""
101        return sqlalchemy.types.VARCHAR()
102
103    @classmethod
104    def get_failover_type(cls) -> sqlalchemy.types.TypeEngine:
105        """Get the 'last resort' type to use if no other type is found."""
106        return cls.get_string_type()
107
108    @classmethod
109    def get_json_type(cls) -> sqlalchemy.types.TypeEngine:
110        """Get the type to use for nested JSON data."""
111        return sqlalchemy.types.JSON()
112
113    def to_sql_type(
114        self,
115        json_schema_property_def: dict[str, str | dict | list],
116    ) -> sqlalchemy.types.TypeEngine:
117        """Convert a value to a SQL type."""
118        try:
119            airbyte_type, _ = _get_airbyte_type(json_schema_property_def)
120            return self.conversion_map[airbyte_type]()
121        except SQLTypeConversionError:
122            print(f"Could not determine airbyte type from JSON schema: {json_schema_property_def}")
123        except KeyError:
124            print(f"Could not find SQL type for airbyte type: {airbyte_type}")
125
126        json_schema_type = json_schema_property_def.get("type", None)
127        json_schema_format = json_schema_property_def.get("format", None)
128
129        if json_schema_type == "string" and json_schema_format == "date":
130            return sqlalchemy.types.DATE()
131
132        if json_schema_type == "string" and json_schema_format == "date-time":
133            return sqlalchemy.types.TIMESTAMP()
134
135        if json_schema_type == "array":
136            return sqlalchemy.types.JSON()
137
138        if json_schema_type == "object":
139            return sqlalchemy.types.JSON()
140
141        return self.get_failover_type()

A base class to perform type conversions.

SQLTypeConverter(conversion_map: dict | None = None)
92    def __init__(
93        self,
94        conversion_map: dict | None = None,
95    ) -> None:
96        self.conversion_map = conversion_map or CONVERSION_MAP
conversion_map
@classmethod
def get_string_type(cls) -> sqlalchemy.sql.type_api.TypeEngine:
 98    @classmethod
 99    def get_string_type(cls) -> sqlalchemy.types.TypeEngine:
100        """Get the type to use for string data."""
101        return sqlalchemy.types.VARCHAR()

Get the type to use for string data.

@classmethod
def get_failover_type(cls) -> sqlalchemy.sql.type_api.TypeEngine:
103    @classmethod
104    def get_failover_type(cls) -> sqlalchemy.types.TypeEngine:
105        """Get the 'last resort' type to use if no other type is found."""
106        return cls.get_string_type()

Get the 'last resort' type to use if no other type is found.

@classmethod
def get_json_type(cls) -> sqlalchemy.sql.type_api.TypeEngine:
108    @classmethod
109    def get_json_type(cls) -> sqlalchemy.types.TypeEngine:
110        """Get the type to use for nested JSON data."""
111        return sqlalchemy.types.JSON()

Get the type to use for nested JSON data.

def to_sql_type( self, json_schema_property_def: dict[str, str | dict | list]) -> sqlalchemy.sql.type_api.TypeEngine:
113    def to_sql_type(
114        self,
115        json_schema_property_def: dict[str, str | dict | list],
116    ) -> sqlalchemy.types.TypeEngine:
117        """Convert a value to a SQL type."""
118        try:
119            airbyte_type, _ = _get_airbyte_type(json_schema_property_def)
120            return self.conversion_map[airbyte_type]()
121        except SQLTypeConversionError:
122            print(f"Could not determine airbyte type from JSON schema: {json_schema_property_def}")
123        except KeyError:
124            print(f"Could not find SQL type for airbyte type: {airbyte_type}")
125
126        json_schema_type = json_schema_property_def.get("type", None)
127        json_schema_format = json_schema_property_def.get("format", None)
128
129        if json_schema_type == "string" and json_schema_format == "date":
130            return sqlalchemy.types.DATE()
131
132        if json_schema_type == "string" and json_schema_format == "date-time":
133            return sqlalchemy.types.TIMESTAMP()
134
135        if json_schema_type == "array":
136            return sqlalchemy.types.JSON()
137
138        if json_schema_type == "object":
139            return sqlalchemy.types.JSON()
140
141        return self.get_failover_type()

Convert a value to a SQL type.