airbyte.types
Type conversion methods for SQL Caches.
1# Copyright (c) 2023 Airbyte, Inc., all rights reserved. 2 3"""Type conversion methods for SQL Caches.""" 4from __future__ import annotations 5 6from typing import cast 7 8import sqlalchemy 9from rich import print 10 11 12# Compare to documentation here: https://docs.airbyte.com/understanding-airbyte/supported-data-types 13CONVERSION_MAP = { 14 "string": sqlalchemy.types.VARCHAR, 15 "integer": sqlalchemy.types.BIGINT, 16 "number": sqlalchemy.types.DECIMAL, 17 "boolean": sqlalchemy.types.BOOLEAN, 18 "date": sqlalchemy.types.DATE, 19 "timestamp_with_timezone": sqlalchemy.types.TIMESTAMP, 20 "timestamp_without_timezone": sqlalchemy.types.TIMESTAMP, 21 "time_with_timezone": sqlalchemy.types.TIME, 22 "time_without_timezone": sqlalchemy.types.TIME, 23 # Technically 'object' and 'array' as JSON Schema types, not airbyte types. 24 # We include them here for completeness. 25 "object": sqlalchemy.types.JSON, 26 "array": sqlalchemy.types.JSON, 27} 28 29 30class SQLTypeConversionError(Exception): 31 """An exception to be raised when a type conversion fails.""" 32 33 34def _get_airbyte_type( # noqa: PLR0911 # Too many return statements 35 json_schema_property_def: dict[str, str | dict | list], 36) -> tuple[str, str | None]: 37 """Get the airbyte type and subtype from a JSON schema property definition. 38 39 Subtype is only used for array types. Otherwise, subtype will return None. 40 """ 41 airbyte_type = cast(str, json_schema_property_def.get("airbyte_type", None)) 42 if airbyte_type: 43 return airbyte_type, None 44 45 json_schema_type = json_schema_property_def.get("type", None) 46 json_schema_format = json_schema_property_def.get("format", None) 47 48 # if json_schema_type is an array of two strings with one of them being null, pick the other one 49 # this strategy is often used by connectors to indicate a field might not be set all the time 50 if isinstance(json_schema_type, list): 51 non_null_types = [t for t in json_schema_type if t != "null"] 52 if len(non_null_types) == 1: 53 json_schema_type = non_null_types[0] 54 55 if json_schema_type == "string": 56 if json_schema_format == "date": 57 return "date", None 58 59 if json_schema_format == "date-time": 60 return "timestamp_with_timezone", None 61 62 if json_schema_format == "time": 63 return "time_without_timezone", None 64 65 if json_schema_type in ["string", "number", "boolean", "integer"]: 66 return cast(str, json_schema_type), None 67 68 if json_schema_type == "object": 69 return "object", None 70 71 if json_schema_type == "array": 72 items_def = json_schema_property_def.get("items", None) 73 if isinstance(items_def, dict): 74 try: 75 subtype, _ = _get_airbyte_type(items_def) 76 except SQLTypeConversionError: 77 # We have enough information, so we can ignore parsing errors on subtype. 78 subtype = None 79 80 return "array", subtype 81 82 return "array", None 83 84 err_msg = f"Could not determine airbyte type from JSON schema type: {json_schema_property_def}" 85 raise SQLTypeConversionError(err_msg) 86 87 88class SQLTypeConverter: 89 """A base class to perform type conversions.""" 90 91 def __init__( 92 self, 93 conversion_map: dict | None = None, 94 ) -> None: 95 self.conversion_map = conversion_map or CONVERSION_MAP 96 97 @classmethod 98 def get_string_type(cls) -> sqlalchemy.types.TypeEngine: 99 """Get the type to use for string data.""" 100 return sqlalchemy.types.VARCHAR() 101 102 @classmethod 103 def get_failover_type(cls) -> sqlalchemy.types.TypeEngine: 104 """Get the 'last resort' type to use if no other type is found.""" 105 return cls.get_string_type() 106 107 @classmethod 108 def get_json_type(cls) -> sqlalchemy.types.TypeEngine: 109 """Get the type to use for nested JSON data.""" 110 return sqlalchemy.types.JSON() 111 112 def to_sql_type( 113 self, 114 json_schema_property_def: dict[str, str | dict | list], 115 ) -> sqlalchemy.types.TypeEngine: 116 """Convert a value to a SQL type.""" 117 try: 118 airbyte_type, _ = _get_airbyte_type(json_schema_property_def) 119 return self.conversion_map[airbyte_type]() 120 except SQLTypeConversionError: 121 print(f"Could not determine airbyte type from JSON schema: {json_schema_property_def}") 122 except KeyError: 123 print(f"Could not find SQL type for airbyte type: {airbyte_type}") 124 125 json_schema_type = json_schema_property_def.get("type", None) 126 json_schema_format = json_schema_property_def.get("format", None) 127 128 if json_schema_type == "string" and json_schema_format == "date": 129 return sqlalchemy.types.DATE() 130 131 if json_schema_type == "string" and json_schema_format == "date-time": 132 return sqlalchemy.types.TIMESTAMP() 133 134 if json_schema_type == "array": 135 return sqlalchemy.types.JSON() 136 137 if json_schema_type == "object": 138 return sqlalchemy.types.JSON() 139 140 return self.get_failover_type()
CONVERSION_MAP =
{'string': <class 'sqlalchemy.sql.sqltypes.VARCHAR'>, 'integer': <class 'sqlalchemy.sql.sqltypes.BIGINT'>, 'number': <class 'sqlalchemy.sql.sqltypes.DECIMAL'>, 'boolean': <class 'sqlalchemy.sql.sqltypes.BOOLEAN'>, 'date': <class 'sqlalchemy.sql.sqltypes.DATE'>, 'timestamp_with_timezone': <class 'sqlalchemy.sql.sqltypes.TIMESTAMP'>, 'timestamp_without_timezone': <class 'sqlalchemy.sql.sqltypes.TIMESTAMP'>, 'time_with_timezone': <class 'sqlalchemy.sql.sqltypes.TIME'>, 'time_without_timezone': <class 'sqlalchemy.sql.sqltypes.TIME'>, 'object': <class 'sqlalchemy.sql.sqltypes.JSON'>, 'array': <class 'sqlalchemy.sql.sqltypes.JSON'>}
class
SQLTypeConversionError(builtins.Exception):
31class SQLTypeConversionError(Exception): 32 """An exception to be raised when a type conversion fails."""
An exception to be raised when a type conversion fails.
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback
- args
class
SQLTypeConverter:
89class SQLTypeConverter: 90 """A base class to perform type conversions.""" 91 92 def __init__( 93 self, 94 conversion_map: dict | None = None, 95 ) -> None: 96 self.conversion_map = conversion_map or CONVERSION_MAP 97 98 @classmethod 99 def get_string_type(cls) -> sqlalchemy.types.TypeEngine: 100 """Get the type to use for string data.""" 101 return sqlalchemy.types.VARCHAR() 102 103 @classmethod 104 def get_failover_type(cls) -> sqlalchemy.types.TypeEngine: 105 """Get the 'last resort' type to use if no other type is found.""" 106 return cls.get_string_type() 107 108 @classmethod 109 def get_json_type(cls) -> sqlalchemy.types.TypeEngine: 110 """Get the type to use for nested JSON data.""" 111 return sqlalchemy.types.JSON() 112 113 def to_sql_type( 114 self, 115 json_schema_property_def: dict[str, str | dict | list], 116 ) -> sqlalchemy.types.TypeEngine: 117 """Convert a value to a SQL type.""" 118 try: 119 airbyte_type, _ = _get_airbyte_type(json_schema_property_def) 120 return self.conversion_map[airbyte_type]() 121 except SQLTypeConversionError: 122 print(f"Could not determine airbyte type from JSON schema: {json_schema_property_def}") 123 except KeyError: 124 print(f"Could not find SQL type for airbyte type: {airbyte_type}") 125 126 json_schema_type = json_schema_property_def.get("type", None) 127 json_schema_format = json_schema_property_def.get("format", None) 128 129 if json_schema_type == "string" and json_schema_format == "date": 130 return sqlalchemy.types.DATE() 131 132 if json_schema_type == "string" and json_schema_format == "date-time": 133 return sqlalchemy.types.TIMESTAMP() 134 135 if json_schema_type == "array": 136 return sqlalchemy.types.JSON() 137 138 if json_schema_type == "object": 139 return sqlalchemy.types.JSON() 140 141 return self.get_failover_type()
A base class to perform type conversions.
@classmethod
def
get_string_type(cls) -> sqlalchemy.sql.type_api.TypeEngine:
98 @classmethod 99 def get_string_type(cls) -> sqlalchemy.types.TypeEngine: 100 """Get the type to use for string data.""" 101 return sqlalchemy.types.VARCHAR()
Get the type to use for string data.
@classmethod
def
get_failover_type(cls) -> sqlalchemy.sql.type_api.TypeEngine:
103 @classmethod 104 def get_failover_type(cls) -> sqlalchemy.types.TypeEngine: 105 """Get the 'last resort' type to use if no other type is found.""" 106 return cls.get_string_type()
Get the 'last resort' type to use if no other type is found.
@classmethod
def
get_json_type(cls) -> sqlalchemy.sql.type_api.TypeEngine:
108 @classmethod 109 def get_json_type(cls) -> sqlalchemy.types.TypeEngine: 110 """Get the type to use for nested JSON data.""" 111 return sqlalchemy.types.JSON()
Get the type to use for nested JSON data.
def
to_sql_type( self, json_schema_property_def: dict[str, str | dict | list]) -> sqlalchemy.sql.type_api.TypeEngine:
113 def to_sql_type( 114 self, 115 json_schema_property_def: dict[str, str | dict | list], 116 ) -> sqlalchemy.types.TypeEngine: 117 """Convert a value to a SQL type.""" 118 try: 119 airbyte_type, _ = _get_airbyte_type(json_schema_property_def) 120 return self.conversion_map[airbyte_type]() 121 except SQLTypeConversionError: 122 print(f"Could not determine airbyte type from JSON schema: {json_schema_property_def}") 123 except KeyError: 124 print(f"Could not find SQL type for airbyte type: {airbyte_type}") 125 126 json_schema_type = json_schema_property_def.get("type", None) 127 json_schema_format = json_schema_property_def.get("format", None) 128 129 if json_schema_type == "string" and json_schema_format == "date": 130 return sqlalchemy.types.DATE() 131 132 if json_schema_type == "string" and json_schema_format == "date-time": 133 return sqlalchemy.types.TIMESTAMP() 134 135 if json_schema_type == "array": 136 return sqlalchemy.types.JSON() 137 138 if json_schema_type == "object": 139 return sqlalchemy.types.JSON() 140 141 return self.get_failover_type()
Convert a value to a SQL type.