airbyte.caches.base

A SQL Cache implementation.

  1# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
  2"""A SQL Cache implementation."""
  3
  4from __future__ import annotations
  5
  6import abc
  7from pathlib import Path
  8from typing import TYPE_CHECKING, Any, Optional, cast, final
  9
 10from pydantic import BaseModel, PrivateAttr
 11
 12from airbyte import exceptions as exc
 13from airbyte.caches._catalog_manager import CatalogManager
 14from airbyte.datasets._sql import CachedDataset
 15
 16
 17if TYPE_CHECKING:
 18    from collections.abc import Generator
 19
 20    from sqlalchemy.engine import Engine
 21
 22    from airbyte._processors.sql.base import SqlProcessorBase
 23    from airbyte.datasets._base import DatasetBase
 24
 25
 26# TODO: meta=EnforceOverrides (Pydantic doesn't like it currently.)
 27class CacheBase(BaseModel):
 28    """Base configuration for a cache."""
 29
 30    cache_dir: Path = Path(".cache")
 31    """The directory to store the cache in."""
 32
 33    cleanup: bool = True
 34    """Whether to clean up the cache after use."""
 35
 36    schema_name: str = "airbyte_raw"
 37    """The name of the schema to write to."""
 38
 39    table_prefix: Optional[str] = None
 40    """ A prefix to add to all table names.
 41    If 'None', a prefix will be created based on the source name.
 42    """
 43
 44    table_suffix: str = ""
 45    """A suffix to add to all table names."""
 46
 47    _deployed_api_root: Optional[str] = PrivateAttr(default=None)
 48    _deployed_workspace_id: Optional[str] = PrivateAttr(default=None)
 49    _deployed_destination_id: Optional[str] = PrivateAttr(default=None)
 50    _deployed_connection_id: Optional[str] = PrivateAttr(default=None)
 51
 52    _sql_processor_class: type[SqlProcessorBase] = PrivateAttr()
 53    _sql_processor: Optional[SqlProcessorBase] = PrivateAttr(default=None)
 54
 55    @final
 56    @property
 57    def processor(self) -> SqlProcessorBase:
 58        """Return the SQL processor instance."""
 59        if self._sql_processor is None:
 60            self._sql_processor = self._sql_processor_class(cache=self)
 61        return self._sql_processor
 62
 63    @final
 64    def get_sql_engine(self) -> Engine:
 65        """Return a new SQL engine to use."""
 66        return self.processor.get_sql_engine()
 67
 68    @abc.abstractmethod
 69    def get_sql_alchemy_url(self) -> str:
 70        """Returns a SQL Alchemy URL."""
 71        ...
 72
 73    @abc.abstractmethod
 74    def get_database_name(self) -> str:
 75        """Return the name of the database."""
 76        ...
 77
 78    @final
 79    @property
 80    def streams(
 81        self,
 82    ) -> dict[str, CachedDataset]:
 83        """Return a temporary table name."""
 84        result = {}
 85        stream_names = self.processor.expected_streams
 86        if self._has_catalog_manager:
 87            stream_names |= set(self._catalog_manager.stream_names)
 88        for stream_name in stream_names:
 89            result[stream_name] = CachedDataset(self, stream_name)
 90
 91        return result
 92
 93    def _get_state(
 94        self,
 95        source_name: str,
 96        streams: list[str] | None,
 97    ) -> list[dict[str, Any]] | None:
 98        return self._catalog_manager.get_state(
 99            source_name=source_name,
100            streams=streams,
101        )
102
103    @property
104    def _has_catalog_manager(
105        self,
106    ) -> bool:
107        """Return whether the cache has a catalog manager."""
108        # Member is private until we have a public API for it.
109        return self.processor._catalog_manager is not None  # noqa: SLF001
110
111    @property
112    def _catalog_manager(
113        self,
114    ) -> CatalogManager:
115        if not self._has_catalog_manager:
116            raise exc.PyAirbyteInternalError(
117                message="Catalog manager should exist but does not.",
118            )
119
120        # Member is private until we have a public API for it.
121        return cast(CatalogManager, self.processor._catalog_manager)  # noqa: SLF001
122
123    def __getitem__(self, stream: str) -> DatasetBase:
124        return self.streams[stream]
125
126    def __contains__(self, stream: str) -> bool:
127        return stream in (self.processor.expected_streams)
128
129    def __iter__(self) -> Generator[tuple[str, Any], None, None]:
130        return ((name, dataset) for name, dataset in self.streams.items())
class CacheBase(pydantic.main.BaseModel):
 28class CacheBase(BaseModel):
 29    """Base configuration for a cache."""
 30
 31    cache_dir: Path = Path(".cache")
 32    """The directory to store the cache in."""
 33
 34    cleanup: bool = True
 35    """Whether to clean up the cache after use."""
 36
 37    schema_name: str = "airbyte_raw"
 38    """The name of the schema to write to."""
 39
 40    table_prefix: Optional[str] = None
 41    """ A prefix to add to all table names.
 42    If 'None', a prefix will be created based on the source name.
 43    """
 44
 45    table_suffix: str = ""
 46    """A suffix to add to all table names."""
 47
 48    _deployed_api_root: Optional[str] = PrivateAttr(default=None)
 49    _deployed_workspace_id: Optional[str] = PrivateAttr(default=None)
 50    _deployed_destination_id: Optional[str] = PrivateAttr(default=None)
 51    _deployed_connection_id: Optional[str] = PrivateAttr(default=None)
 52
 53    _sql_processor_class: type[SqlProcessorBase] = PrivateAttr()
 54    _sql_processor: Optional[SqlProcessorBase] = PrivateAttr(default=None)
 55
 56    @final
 57    @property
 58    def processor(self) -> SqlProcessorBase:
 59        """Return the SQL processor instance."""
 60        if self._sql_processor is None:
 61            self._sql_processor = self._sql_processor_class(cache=self)
 62        return self._sql_processor
 63
 64    @final
 65    def get_sql_engine(self) -> Engine:
 66        """Return a new SQL engine to use."""
 67        return self.processor.get_sql_engine()
 68
 69    @abc.abstractmethod
 70    def get_sql_alchemy_url(self) -> str:
 71        """Returns a SQL Alchemy URL."""
 72        ...
 73
 74    @abc.abstractmethod
 75    def get_database_name(self) -> str:
 76        """Return the name of the database."""
 77        ...
 78
 79    @final
 80    @property
 81    def streams(
 82        self,
 83    ) -> dict[str, CachedDataset]:
 84        """Return a temporary table name."""
 85        result = {}
 86        stream_names = self.processor.expected_streams
 87        if self._has_catalog_manager:
 88            stream_names |= set(self._catalog_manager.stream_names)
 89        for stream_name in stream_names:
 90            result[stream_name] = CachedDataset(self, stream_name)
 91
 92        return result
 93
 94    def _get_state(
 95        self,
 96        source_name: str,
 97        streams: list[str] | None,
 98    ) -> list[dict[str, Any]] | None:
 99        return self._catalog_manager.get_state(
100            source_name=source_name,
101            streams=streams,
102        )
103
104    @property
105    def _has_catalog_manager(
106        self,
107    ) -> bool:
108        """Return whether the cache has a catalog manager."""
109        # Member is private until we have a public API for it.
110        return self.processor._catalog_manager is not None  # noqa: SLF001
111
112    @property
113    def _catalog_manager(
114        self,
115    ) -> CatalogManager:
116        if not self._has_catalog_manager:
117            raise exc.PyAirbyteInternalError(
118                message="Catalog manager should exist but does not.",
119            )
120
121        # Member is private until we have a public API for it.
122        return cast(CatalogManager, self.processor._catalog_manager)  # noqa: SLF001
123
124    def __getitem__(self, stream: str) -> DatasetBase:
125        return self.streams[stream]
126
127    def __contains__(self, stream: str) -> bool:
128        return stream in (self.processor.expected_streams)
129
130    def __iter__(self) -> Generator[tuple[str, Any], None, None]:
131        return ((name, dataset) for name, dataset in self.streams.items())

Base configuration for a cache.

cache_dir: pathlib.Path

The directory to store the cache in.

cleanup: bool

Whether to clean up the cache after use.

schema_name: str

The name of the schema to write to.

table_prefix: Optional[str]

A prefix to add to all table names. If 'None', a prefix will be created based on the source name.

table_suffix: str

A suffix to add to all table names.

processor: airbyte._processors.sql.base.SqlProcessorBase
56    @final
57    @property
58    def processor(self) -> SqlProcessorBase:
59        """Return the SQL processor instance."""
60        if self._sql_processor is None:
61            self._sql_processor = self._sql_processor_class(cache=self)
62        return self._sql_processor

Return the SQL processor instance.

@final
def get_sql_engine(self) -> sqlalchemy.engine.base.Engine:
64    @final
65    def get_sql_engine(self) -> Engine:
66        """Return a new SQL engine to use."""
67        return self.processor.get_sql_engine()

Return a new SQL engine to use.

@abc.abstractmethod
def get_sql_alchemy_url(self) -> str:
69    @abc.abstractmethod
70    def get_sql_alchemy_url(self) -> str:
71        """Returns a SQL Alchemy URL."""
72        ...

Returns a SQL Alchemy URL.

@abc.abstractmethod
def get_database_name(self) -> str:
74    @abc.abstractmethod
75    def get_database_name(self) -> str:
76        """Return the name of the database."""
77        ...

Return the name of the database.

streams: dict[str, airbyte.datasets._sql.CachedDataset]
79    @final
80    @property
81    def streams(
82        self,
83    ) -> dict[str, CachedDataset]:
84        """Return a temporary table name."""
85        result = {}
86        stream_names = self.processor.expected_streams
87        if self._has_catalog_manager:
88            stream_names |= set(self._catalog_manager.stream_names)
89        for stream_name in stream_names:
90            result[stream_name] = CachedDataset(self, stream_name)
91
92        return result

Return a temporary table name.

Inherited Members
pydantic.main.BaseModel
BaseModel
Config
dict
json
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs