airbyte.caches.bigquery

A BigQuery implementation of the cache.

Usage Example

import airbyte as ab
from airbyte.caches import BigQueryCache

cache = BigQueryCache(
    project_name="myproject",
    dataset_name="mydataset",
    credentials_path="path/to/credentials.json",
)
 1# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
 2"""A BigQuery implementation of the cache.
 3
 4## Usage Example
 5
 6```python
 7import airbyte as ab
 8from airbyte.caches import BigQueryCache
 9
10cache = BigQueryCache(
11    project_name="myproject",
12    dataset_name="mydataset",
13    credentials_path="path/to/credentials.json",
14)
15```
16"""
17
18from __future__ import annotations
19
20from typing import TYPE_CHECKING, Any, Optional
21
22from overrides import overrides
23from pydantic import root_validator
24from sqlalchemy.engine import make_url
25
26from airbyte._processors.sql.bigquery import BigQuerySqlProcessor
27from airbyte.caches.base import (
28    CacheBase,
29)
30
31
32if TYPE_CHECKING:
33    from sqlalchemy.engine.url import URL
34
35
36class BigQueryCache(CacheBase):
37    """The BigQuery cache implementation."""
38
39    project_name: str
40    """The name of the project to use. In BigQuery, this is equivalent to the database name."""
41
42    dataset_name: str = "airbyte_raw"
43    """The name of the dataset to use. In BigQuery, this is equivalent to the schema name."""
44
45    credentials_path: Optional[str] = None
46    """The path to the credentials file to use.
47    If not passed, falls back to the default inferred from the environment."""
48
49    _sql_processor_class: type[BigQuerySqlProcessor] = BigQuerySqlProcessor
50
51    @root_validator(pre=True)
52    @classmethod
53    def set_schema_name(cls, values: dict[str, Any]) -> dict[str, Any]:
54        dataset_name = values.get("dataset_name")
55        if dataset_name is None:
56            raise ValueError("dataset_name must be defined")  # noqa: TRY003
57        values["schema_name"] = dataset_name
58        return values
59
60    @overrides
61    def get_database_name(self) -> str:
62        """Return the name of the database. For BigQuery, this is the project name."""
63        return self.project_name
64
65    @overrides
66    def get_sql_alchemy_url(self) -> str:
67        """Return the SQLAlchemy URL to use."""
68        url: URL = make_url(f"bigquery://{self.project_name!s}")
69        if self.credentials_path:
70            url = url.update_query_dict({"credentials_path": self.credentials_path})
71
72        return str(url)
class BigQueryCache(airbyte.caches.base.CacheBase):
37class BigQueryCache(CacheBase):
38    """The BigQuery cache implementation."""
39
40    project_name: str
41    """The name of the project to use. In BigQuery, this is equivalent to the database name."""
42
43    dataset_name: str = "airbyte_raw"
44    """The name of the dataset to use. In BigQuery, this is equivalent to the schema name."""
45
46    credentials_path: Optional[str] = None
47    """The path to the credentials file to use.
48    If not passed, falls back to the default inferred from the environment."""
49
50    _sql_processor_class: type[BigQuerySqlProcessor] = BigQuerySqlProcessor
51
52    @root_validator(pre=True)
53    @classmethod
54    def set_schema_name(cls, values: dict[str, Any]) -> dict[str, Any]:
55        dataset_name = values.get("dataset_name")
56        if dataset_name is None:
57            raise ValueError("dataset_name must be defined")  # noqa: TRY003
58        values["schema_name"] = dataset_name
59        return values
60
61    @overrides
62    def get_database_name(self) -> str:
63        """Return the name of the database. For BigQuery, this is the project name."""
64        return self.project_name
65
66    @overrides
67    def get_sql_alchemy_url(self) -> str:
68        """Return the SQLAlchemy URL to use."""
69        url: URL = make_url(f"bigquery://{self.project_name!s}")
70        if self.credentials_path:
71            url = url.update_query_dict({"credentials_path": self.credentials_path})
72
73        return str(url)

The BigQuery cache implementation.

project_name: str

The name of the project to use. In BigQuery, this is equivalent to the database name.

dataset_name: str

The name of the dataset to use. In BigQuery, this is equivalent to the schema name.

credentials_path: Optional[str]

The path to the credentials file to use. If not passed, falls back to the default inferred from the environment.

@root_validator(pre=True)
@classmethod
def set_schema_name(cls, values: dict[str, typing.Any]) -> dict[str, typing.Any]:
52    @root_validator(pre=True)
53    @classmethod
54    def set_schema_name(cls, values: dict[str, Any]) -> dict[str, Any]:
55        dataset_name = values.get("dataset_name")
56        if dataset_name is None:
57            raise ValueError("dataset_name must be defined")  # noqa: TRY003
58        values["schema_name"] = dataset_name
59        return values
@overrides
def get_database_name(self) -> str:
61    @overrides
62    def get_database_name(self) -> str:
63        """Return the name of the database. For BigQuery, this is the project name."""
64        return self.project_name

Return the name of the database. For BigQuery, this is the project name.

@overrides
def get_sql_alchemy_url(self) -> str:
66    @overrides
67    def get_sql_alchemy_url(self) -> str:
68        """Return the SQLAlchemy URL to use."""
69        url: URL = make_url(f"bigquery://{self.project_name!s}")
70        if self.credentials_path:
71            url = url.update_query_dict({"credentials_path": self.credentials_path})
72
73        return str(url)

Return the SQLAlchemy URL to use.

Inherited Members
pydantic.main.BaseModel
BaseModel
Config
dict
json
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
airbyte.caches.base.CacheBase
cache_dir
cleanup
schema_name
table_prefix
table_suffix
processor
get_sql_engine
streams