airbyte.caches.duckdb

A DuckDB implementation of the PyAirbyte cache.

Usage Example

```python from airbyte as ab from airbyte.caches import DuckDBCache

cache = DuckDBCache( db_path="/path/to/my/databaseairbyte.caches.duckdb", schema_name="myschema", )

 1# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 2"""A DuckDB implementation of the PyAirbyte cache.
 3
 4## Usage Example
 5
 6```python
 7from airbyte as ab
 8from airbyte.caches import DuckDBCache
 9
10cache = DuckDBCache(
11    db_path="/path/to/my/database.duckdb",
12    schema_name="myschema",
13)
14"""
15
16from __future__ import annotations
17
18import warnings
19from pathlib import Path  # noqa: TCH003  # Used in Pydantic init
20from typing import Union
21
22from overrides import overrides
23from typing_extensions import Literal
24
25from airbyte._processors.sql.duckdb import DuckDBSqlProcessor
26from airbyte.caches.base import CacheBase
27
28
29# Suppress warnings from DuckDB about reflection on indices.
30# https://github.com/Mause/duckdb_engine/issues/905
31warnings.filterwarnings(
32    "ignore",
33    message="duckdb-engine doesn't yet support reflection on indices",
34)
35
36
37class DuckDBCache(CacheBase):
38    """A DuckDB cache."""
39
40    db_path: Union[Path, str]
41    """Normally db_path is a Path object.
42
43    The database name will be inferred from the file name. For example, given a `db_path` of
44    `/path/to/my/my_db.duckdb`, the database name is `my_db`.
45    """
46
47    schema_name: str = "main"
48    """The name of the schema to write to. Defaults to "main"."""
49
50    _sql_processor_class = DuckDBSqlProcessor
51
52    @overrides
53    def get_sql_alchemy_url(self) -> str:
54        """Return the SQLAlchemy URL to use."""
55        # return f"duckdb:///{self.db_path}?schema={self.schema_name}"
56        return f"duckdb:///{self.db_path!s}"
57
58    @overrides
59    def get_database_name(self) -> str:
60        """Return the name of the database."""
61        if self.db_path == ":memory:":
62            return "memory"
63
64        # Split the path on the appropriate separator ("/" or "\")
65        split_on: Literal["/", "\\"] = "\\" if "\\" in str(self.db_path) else "/"
66
67        # Return the file name without the extension
68        return str(self.db_path).split(sep=split_on)[-1].split(".")[0]
class DuckDBCache(airbyte.caches.base.CacheBase):
38class DuckDBCache(CacheBase):
39    """A DuckDB cache."""
40
41    db_path: Union[Path, str]
42    """Normally db_path is a Path object.
43
44    The database name will be inferred from the file name. For example, given a `db_path` of
45    `/path/to/my/my_db.duckdb`, the database name is `my_db`.
46    """
47
48    schema_name: str = "main"
49    """The name of the schema to write to. Defaults to "main"."""
50
51    _sql_processor_class = DuckDBSqlProcessor
52
53    @overrides
54    def get_sql_alchemy_url(self) -> str:
55        """Return the SQLAlchemy URL to use."""
56        # return f"duckdb:///{self.db_path}?schema={self.schema_name}"
57        return f"duckdb:///{self.db_path!s}"
58
59    @overrides
60    def get_database_name(self) -> str:
61        """Return the name of the database."""
62        if self.db_path == ":memory:":
63            return "memory"
64
65        # Split the path on the appropriate separator ("/" or "\")
66        split_on: Literal["/", "\\"] = "\\" if "\\" in str(self.db_path) else "/"
67
68        # Return the file name without the extension
69        return str(self.db_path).split(sep=split_on)[-1].split(".")[0]

A DuckDB cache.

db_path: Union[pathlib.Path, str]

Normally db_path is a Path object.

The database name will be inferred from the file name. For example, given a db_path of /path/to/my/my_dbairbyte.caches.duckdb, the database name is my_db.

schema_name: str

The name of the schema to write to. Defaults to "main".

@overrides
def get_sql_alchemy_url(self) -> str:
53    @overrides
54    def get_sql_alchemy_url(self) -> str:
55        """Return the SQLAlchemy URL to use."""
56        # return f"duckdb:///{self.db_path}?schema={self.schema_name}"
57        return f"duckdb:///{self.db_path!s}"

Return the SQLAlchemy URL to use.

@overrides
def get_database_name(self) -> str:
59    @overrides
60    def get_database_name(self) -> str:
61        """Return the name of the database."""
62        if self.db_path == ":memory:":
63            return "memory"
64
65        # Split the path on the appropriate separator ("/" or "\")
66        split_on: Literal["/", "\\"] = "\\" if "\\" in str(self.db_path) else "/"
67
68        # Return the file name without the extension
69        return str(self.db_path).split(sep=split_on)[-1].split(".")[0]

Return the name of the database.

Inherited Members
pydantic.main.BaseModel
BaseModel
Config
dict
json
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
airbyte.caches.base.CacheBase
cache_dir
cleanup
table_prefix
table_suffix
processor
get_sql_engine
streams