airbyte.caches.base
A SQL Cache implementation.
1# Copyright (c) 2023 Airbyte, Inc., all rights reserved. 2"""A SQL Cache implementation.""" 3 4from __future__ import annotations 5 6import abc 7from pathlib import Path 8from typing import TYPE_CHECKING, Any, Optional, cast, final 9 10from pydantic import BaseModel, PrivateAttr 11 12from airbyte import exceptions as exc 13from airbyte.caches._catalog_manager import CatalogManager 14from airbyte.datasets._sql import CachedDataset 15 16 17if TYPE_CHECKING: 18 from collections.abc import Generator 19 20 from sqlalchemy.engine import Engine 21 22 from airbyte._processors.sql.base import SqlProcessorBase 23 from airbyte.datasets._base import DatasetBase 24 25 26# TODO: meta=EnforceOverrides (Pydantic doesn't like it currently.) 27class CacheBase(BaseModel): 28 """Base configuration for a cache.""" 29 30 cache_dir: Path = Path(".cache") 31 """The directory to store the cache in.""" 32 33 cleanup: bool = True 34 """Whether to clean up the cache after use.""" 35 36 schema_name: str = "airbyte_raw" 37 """The name of the schema to write to.""" 38 39 table_prefix: Optional[str] = None 40 """ A prefix to add to all table names. 41 If 'None', a prefix will be created based on the source name. 42 """ 43 44 table_suffix: str = "" 45 """A suffix to add to all table names.""" 46 47 _deployed_api_root: Optional[str] = PrivateAttr(default=None) 48 _deployed_workspace_id: Optional[str] = PrivateAttr(default=None) 49 _deployed_destination_id: Optional[str] = PrivateAttr(default=None) 50 _deployed_connection_id: Optional[str] = PrivateAttr(default=None) 51 52 _sql_processor_class: type[SqlProcessorBase] = PrivateAttr() 53 _sql_processor: Optional[SqlProcessorBase] = PrivateAttr(default=None) 54 55 @final 56 @property 57 def processor(self) -> SqlProcessorBase: 58 """Return the SQL processor instance.""" 59 if self._sql_processor is None: 60 self._sql_processor = self._sql_processor_class(cache=self) 61 return self._sql_processor 62 63 @final 64 def get_sql_engine(self) -> Engine: 65 """Return a new SQL engine to use.""" 66 return self.processor.get_sql_engine() 67 68 @abc.abstractmethod 69 def get_sql_alchemy_url(self) -> str: 70 """Returns a SQL Alchemy URL.""" 71 ... 72 73 @abc.abstractmethod 74 def get_database_name(self) -> str: 75 """Return the name of the database.""" 76 ... 77 78 @final 79 @property 80 def streams( 81 self, 82 ) -> dict[str, CachedDataset]: 83 """Return a temporary table name.""" 84 result = {} 85 stream_names = self.processor.expected_streams 86 if self._has_catalog_manager: 87 stream_names |= set(self._catalog_manager.stream_names) 88 for stream_name in stream_names: 89 result[stream_name] = CachedDataset(self, stream_name) 90 91 return result 92 93 def _get_state( 94 self, 95 source_name: str, 96 streams: list[str] | None, 97 ) -> list[dict[str, Any]] | None: 98 return self._catalog_manager.get_state( 99 source_name=source_name, 100 streams=streams, 101 ) 102 103 @property 104 def _has_catalog_manager( 105 self, 106 ) -> bool: 107 """Return whether the cache has a catalog manager.""" 108 # Member is private until we have a public API for it. 109 return self.processor._catalog_manager is not None # noqa: SLF001 110 111 @property 112 def _catalog_manager( 113 self, 114 ) -> CatalogManager: 115 if not self._has_catalog_manager: 116 raise exc.PyAirbyteInternalError( 117 message="Catalog manager should exist but does not.", 118 ) 119 120 # Member is private until we have a public API for it. 121 return cast(CatalogManager, self.processor._catalog_manager) # noqa: SLF001 122 123 def __getitem__(self, stream: str) -> DatasetBase: 124 return self.streams[stream] 125 126 def __contains__(self, stream: str) -> bool: 127 return stream in (self.processor.expected_streams) 128 129 def __iter__(self) -> Generator[tuple[str, Any], None, None]: 130 return ((name, dataset) for name, dataset in self.streams.items())
class
CacheBase(pydantic.main.BaseModel):
28class CacheBase(BaseModel): 29 """Base configuration for a cache.""" 30 31 cache_dir: Path = Path(".cache") 32 """The directory to store the cache in.""" 33 34 cleanup: bool = True 35 """Whether to clean up the cache after use.""" 36 37 schema_name: str = "airbyte_raw" 38 """The name of the schema to write to.""" 39 40 table_prefix: Optional[str] = None 41 """ A prefix to add to all table names. 42 If 'None', a prefix will be created based on the source name. 43 """ 44 45 table_suffix: str = "" 46 """A suffix to add to all table names.""" 47 48 _deployed_api_root: Optional[str] = PrivateAttr(default=None) 49 _deployed_workspace_id: Optional[str] = PrivateAttr(default=None) 50 _deployed_destination_id: Optional[str] = PrivateAttr(default=None) 51 _deployed_connection_id: Optional[str] = PrivateAttr(default=None) 52 53 _sql_processor_class: type[SqlProcessorBase] = PrivateAttr() 54 _sql_processor: Optional[SqlProcessorBase] = PrivateAttr(default=None) 55 56 @final 57 @property 58 def processor(self) -> SqlProcessorBase: 59 """Return the SQL processor instance.""" 60 if self._sql_processor is None: 61 self._sql_processor = self._sql_processor_class(cache=self) 62 return self._sql_processor 63 64 @final 65 def get_sql_engine(self) -> Engine: 66 """Return a new SQL engine to use.""" 67 return self.processor.get_sql_engine() 68 69 @abc.abstractmethod 70 def get_sql_alchemy_url(self) -> str: 71 """Returns a SQL Alchemy URL.""" 72 ... 73 74 @abc.abstractmethod 75 def get_database_name(self) -> str: 76 """Return the name of the database.""" 77 ... 78 79 @final 80 @property 81 def streams( 82 self, 83 ) -> dict[str, CachedDataset]: 84 """Return a temporary table name.""" 85 result = {} 86 stream_names = self.processor.expected_streams 87 if self._has_catalog_manager: 88 stream_names |= set(self._catalog_manager.stream_names) 89 for stream_name in stream_names: 90 result[stream_name] = CachedDataset(self, stream_name) 91 92 return result 93 94 def _get_state( 95 self, 96 source_name: str, 97 streams: list[str] | None, 98 ) -> list[dict[str, Any]] | None: 99 return self._catalog_manager.get_state( 100 source_name=source_name, 101 streams=streams, 102 ) 103 104 @property 105 def _has_catalog_manager( 106 self, 107 ) -> bool: 108 """Return whether the cache has a catalog manager.""" 109 # Member is private until we have a public API for it. 110 return self.processor._catalog_manager is not None # noqa: SLF001 111 112 @property 113 def _catalog_manager( 114 self, 115 ) -> CatalogManager: 116 if not self._has_catalog_manager: 117 raise exc.PyAirbyteInternalError( 118 message="Catalog manager should exist but does not.", 119 ) 120 121 # Member is private until we have a public API for it. 122 return cast(CatalogManager, self.processor._catalog_manager) # noqa: SLF001 123 124 def __getitem__(self, stream: str) -> DatasetBase: 125 return self.streams[stream] 126 127 def __contains__(self, stream: str) -> bool: 128 return stream in (self.processor.expected_streams) 129 130 def __iter__(self) -> Generator[tuple[str, Any], None, None]: 131 return ((name, dataset) for name, dataset in self.streams.items())
Base configuration for a cache.
table_prefix: Optional[str]
A prefix to add to all table names. If 'None', a prefix will be created based on the source name.
processor: airbyte._processors.sql.base.SqlProcessorBase
56 @final 57 @property 58 def processor(self) -> SqlProcessorBase: 59 """Return the SQL processor instance.""" 60 if self._sql_processor is None: 61 self._sql_processor = self._sql_processor_class(cache=self) 62 return self._sql_processor
Return the SQL processor instance.
@final
def
get_sql_engine(self) -> sqlalchemy.engine.base.Engine:
64 @final 65 def get_sql_engine(self) -> Engine: 66 """Return a new SQL engine to use.""" 67 return self.processor.get_sql_engine()
Return a new SQL engine to use.
@abc.abstractmethod
def
get_sql_alchemy_url(self) -> str:
69 @abc.abstractmethod 70 def get_sql_alchemy_url(self) -> str: 71 """Returns a SQL Alchemy URL.""" 72 ...
Returns a SQL Alchemy URL.
@abc.abstractmethod
def
get_database_name(self) -> str:
74 @abc.abstractmethod 75 def get_database_name(self) -> str: 76 """Return the name of the database.""" 77 ...
Return the name of the database.
streams: dict[str, airbyte.datasets._sql.CachedDataset]
79 @final 80 @property 81 def streams( 82 self, 83 ) -> dict[str, CachedDataset]: 84 """Return a temporary table name.""" 85 result = {} 86 stream_names = self.processor.expected_streams 87 if self._has_catalog_manager: 88 stream_names |= set(self._catalog_manager.stream_names) 89 for stream_name in stream_names: 90 result[stream_name] = CachedDataset(self, stream_name) 91 92 return result
Return a temporary table name.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- Config
- dict
- json
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs