airbyte.cloud

PyAirbyte classes and methods for interacting with the Airbyte Cloud API.

You can use this module to interact with Airbyte Cloud, OSS, and Enterprise.

Usage example:

import airbyte as ab
from airbyte import cloud

workspace = cloud.CloudWorkspace(
    workspace_id="123",
    api_key=ab.get_secret("AIRBYTE_CLOUD_API_KEY"),
)

source = ab.get_source("source-faker", config={})
source.check()

workspace.deploy_source(source)
 1# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
 2"""PyAirbyte classes and methods for interacting with the Airbyte Cloud API.
 3
 4You can use this module to interact with Airbyte Cloud, OSS, and Enterprise.
 5
 6Usage example:
 7
 8```python
 9import airbyte as ab
10from airbyte import cloud
11
12workspace = cloud.CloudWorkspace(
13    workspace_id="123",
14    api_key=ab.get_secret("AIRBYTE_CLOUD_API_KEY"),
15)
16
17source = ab.get_source("source-faker", config={})
18source.check()
19
20workspace.deploy_source(source)
21```
22"""
23
24from __future__ import annotations
25
26from airbyte.cloud import connections, sync_results, workspaces
27from airbyte.cloud.connections import CloudConnection
28from airbyte.cloud.sync_results import SyncResult
29from airbyte.cloud.workspaces import CloudWorkspace
30
31
32__all__ = [
33    # Submodules
34    "workspaces",
35    "connections",
36    "sync_results",
37    # Classes
38    "CloudWorkspace",
39    "CloudConnection",
40    "SyncResult",
41]
@dataclass
class CloudWorkspace:
 39@dataclass
 40class CloudWorkspace:
 41    """A remote workspace on the Airbyte Cloud.
 42
 43    By overriding `api_root`, you can use this class to interact with self-managed Airbyte
 44    instances, both OSS and Enterprise.
 45    """
 46
 47    workspace_id: str
 48    api_key: str
 49    api_root: str = CLOUD_API_ROOT
 50
 51    @property
 52    def workspace_url(self) -> str | None:
 53        return f"{self.api_root}/workspaces/{self.workspace_id}"
 54
 55    # Test connection and creds
 56
 57    def connect(self) -> None:
 58        """Check that the workspace is reachable and raise an exception otherwise.
 59
 60        Note: It is not necessary to call this method before calling other operations. It
 61              serves primarily as a simple check to ensure that the workspace is reachable
 62              and credentials are correct.
 63        """
 64        _ = get_workspace(
 65            api_root=self.api_root,
 66            api_key=self.api_key,
 67            workspace_id=self.workspace_id,
 68        )
 69        print(f"Successfully connected to workspace: {self.workspace_url}")
 70
 71    # Deploy and delete sources
 72
 73    def deploy_source(
 74        self,
 75        source: Source,
 76    ) -> str:
 77        """Deploy a source to the workspace.
 78
 79        Returns the newly deployed source ID.
 80        """
 81        source_configuration = source.get_config().copy()
 82        source_configuration["sourceType"] = source.name.replace("source-", "")
 83
 84        deployed_source = create_source(
 85            name=f"{source.name.replace('-', ' ').title()} (Deployed by PyAirbyte)",
 86            api_root=self.api_root,
 87            api_key=self.api_key,
 88            workspace_id=self.workspace_id,
 89            config=source_configuration,
 90        )
 91
 92        # Set the deployment Ids on the source object
 93        source._deployed_api_root = self.api_root  # noqa: SLF001  # Accessing nn-public API
 94        source._deployed_workspace_id = self.workspace_id  # noqa: SLF001  # Accessing nn-public API
 95        source._deployed_source_id = deployed_source.source_id  # noqa: SLF001  # Accessing nn-public API
 96
 97        return deployed_source.source_id
 98
 99    def delete_source(
100        self,
101        source: str | Source,
102    ) -> None:
103        """Delete a source from the workspace.
104
105        You can pass either the source ID `str` or a deployed `Source` object.
106        """
107        if not isinstance(source, (str, Source)):
108            raise ValueError(f"Invalid source type: {type(source)}")  # noqa: TRY004, TRY003
109
110        if isinstance(source, Source):
111            if not source._deployed_source_id:  # noqa: SLF001
112                raise ValueError("Source has not been deployed.")  # noqa: TRY003
113
114            source_id = source._deployed_source_id  # noqa: SLF001
115
116        elif isinstance(source, str):
117            source_id = source
118
119        delete_source(
120            source_id=source_id,
121            api_root=self.api_root,
122            api_key=self.api_key,
123        )
124
125    # Deploy and delete destinations
126
127    def deploy_cache_as_destination(
128        self,
129        cache: CacheBase,
130    ) -> str:
131        """Deploy a cache to the workspace as a new destination.
132
133        Returns the newly deployed destination ID.
134        """
135        cache_type_name = cache.__class__.__name__.replace("Cache", "")
136
137        deployed_destination: DestinationResponse = create_destination(
138            name=f"Destination {cache_type_name} (Deployed by PyAirbyte)",
139            api_root=self.api_root,
140            api_key=self.api_key,
141            workspace_id=self.workspace_id,
142            config=get_destination_config_from_cache(cache),
143        )
144
145        # Set the deployment Ids on the source object
146        cache._deployed_api_root = self.api_root  # noqa: SLF001  # Accessing nn-public API
147        cache._deployed_workspace_id = self.workspace_id  # noqa: SLF001  # Accessing nn-public API
148        cache._deployed_destination_id = deployed_destination.destination_id  # noqa: SLF001  # Accessing nn-public API
149
150        return deployed_destination.destination_id
151
152    def delete_destination(
153        self,
154        *,
155        destination: str | None = None,
156        cache: CacheBase | None = None,
157    ) -> None:
158        """Delete a deployed destination from the workspace.
159
160        You can pass either the `Cache` class or the deployed destination ID as a `str`.
161        """
162        if destination is None and cache is None:
163            raise ValueError("You must provide either a destination ID or a cache object.")  # noqa: TRY003
164        if destination is not None and cache is not None:
165            raise ValueError(  # noqa: TRY003
166                "You must provide either a destination ID or a cache object, not both."
167            )
168
169        if cache:
170            if not cache._deployed_destination_id:  # noqa: SLF001
171                raise ValueError("Cache has not been deployed.")  # noqa: TRY003
172
173            destination = cache._deployed_destination_id  # noqa: SLF001
174
175        if destination is None:
176            raise ValueError("No destination ID provided.")  # noqa: TRY003
177
178        delete_destination(
179            destination_id=destination,
180            api_root=self.api_root,
181            api_key=self.api_key,
182        )
183
184    # Deploy and delete connections
185
186    def deploy_connection(
187        self,
188        source: Source | str,
189        cache: CacheBase | None = None,
190        destination: str | None = None,
191        table_prefix: str | None = None,
192        selected_streams: list[str] | None = None,
193    ) -> str:
194        """Deploy a source and cache to the workspace as a new connection.
195
196        Returns the newly deployed connection ID as a `str`.
197
198        Args:
199            source (Source | str): The source to deploy. You can pass either an already deployed
200                source ID `str` or a PyAirbyte `Source` object. If you pass a `Source` object,
201                it will be deployed automatically.
202            cache (CacheBase, optional): The cache to deploy as a new destination. You can provide
203                `cache` or `destination`, but not both.
204            destination (str, optional): The destination ID to use. You can provide
205                `cache` or `destination`, but not both.
206        """
207        # Resolve source ID
208        source_id: str
209        if isinstance(source, Source):
210            selected_streams = selected_streams or source.get_selected_streams()
211            if source._deployed_source_id:  # noqa: SLF001
212                source_id = source._deployed_source_id  # noqa: SLF001
213            else:
214                source_id = self.deploy_source(source)
215        else:
216            source_id = source
217            if not selected_streams:
218                raise exc.PyAirbyteInputError(
219                    guidance="You must provide `selected_streams` when deploying a source ID."
220                )
221
222        # Resolve destination ID
223        destination_id: str
224        if destination:
225            destination_id = destination
226        elif cache:
227            table_prefix = table_prefix if table_prefix is not None else (cache.table_prefix or "")
228            if not cache._deployed_destination_id:  # noqa: SLF001
229                destination_id = self.deploy_cache_as_destination(cache)
230            else:
231                destination_id = cache._deployed_destination_id  # noqa: SLF001
232        else:
233            raise exc.PyAirbyteInputError(
234                guidance="You must provide either a destination ID or a cache object."
235            )
236
237        assert source_id is not None
238        assert destination_id is not None
239
240        deployed_connection = create_connection(
241            name="Connection (Deployed by PyAirbyte)",
242            source_id=source_id,
243            destination_id=destination_id,
244            api_root=self.api_root,
245            api_key=self.api_key,
246            workspace_id=self.workspace_id,
247            selected_stream_names=selected_streams,
248            prefix=table_prefix or "",
249        )
250
251        if isinstance(source, Source):
252            source._deployed_connection_id = deployed_connection.connection_id  # noqa: SLF001
253        if cache:
254            cache._deployed_connection_id = deployed_connection.connection_id  # noqa: SLF001
255
256        return deployed_connection.connection_id
257
258    def get_connection(
259        self,
260        connection_id: str,
261    ) -> CloudConnection:
262        """Get a connection by ID.
263
264        This method does not fetch data from the API. It returns a `CloudConnection` object,
265        which will be loaded lazily as needed.
266        """
267        return CloudConnection(
268            workspace=self,
269            connection_id=connection_id,
270        )
271
272    def delete_connection(
273        self,
274        connection_id: str | None,
275        *,
276        delete_source: bool = False,
277        delete_destination: bool = False,
278    ) -> None:
279        """Delete a deployed connection from the workspace."""
280        if connection_id is None:
281            raise ValueError("No connection ID provided.")  # noqa: TRY003
282
283        connection: ConnectionResponse = get_connection(
284            connection_id=connection_id,
285            api_root=self.api_root,
286            api_key=self.api_key,
287            workspace_id=self.workspace_id,
288        )
289        delete_connection(
290            connection_id=connection_id,
291            api_root=self.api_root,
292            api_key=self.api_key,
293            workspace_id=self.workspace_id,
294        )
295        if delete_source:
296            self.delete_source(source=connection.source_id)
297
298        if delete_destination:
299            self.delete_destination(destination=connection.destination_id)
300
301    # Run syncs
302
303    def run_sync(
304        self,
305        connection_id: str,
306        *,
307        wait: bool = True,
308        wait_timeout: int = 300,
309    ) -> SyncResult:
310        """Run a sync on a deployed connection."""
311        connection = CloudConnection(
312            workspace=self,
313            connection_id=connection_id,
314        )
315        return connection.run_sync(wait=wait, wait_timeout=wait_timeout)
316
317    # Get sync results and previous sync logs
318
319    def get_sync_result(
320        self,
321        connection_id: str,
322        job_id: str | None = None,
323    ) -> SyncResult | None:
324        """Get the sync result for a connection job.
325
326        If `job_id` is not provided, the most recent sync job will be used.
327
328        Returns `None` if job_id is omitted and no previous jobs are found.
329        """
330        connection = CloudConnection(
331            workspace=self,
332            connection_id=connection_id,
333        )
334        if job_id is None:
335            results = self.get_previous_sync_logs(
336                connection_id=connection_id,
337                limit=1,
338            )
339            if results:
340                return results[0]
341
342            return None
343        connection = CloudConnection(
344            workspace=self,
345            connection_id=connection_id,
346        )
347        return SyncResult(
348            workspace=self,
349            connection=connection,
350            job_id=job_id,
351        )
352
353    def get_previous_sync_logs(
354        self,
355        connection_id: str,
356        *,
357        limit: int = 10,
358    ) -> list[SyncResult]:
359        """Get the previous sync logs for a connection."""
360        connection = CloudConnection(
361            workspace=self,
362            connection_id=connection_id,
363        )
364        return connection.get_previous_sync_logs(
365            limit=limit,
366        )

A remote workspace on the Airbyte Cloud.

By overriding api_root, you can use this class to interact with self-managed Airbyte instances, both OSS and Enterprise.

CloudWorkspace( workspace_id: str, api_key: str, api_root: str = 'https://api.airbyte.com/v1')
workspace_id: str
api_key: str
api_root: str = 'https://api.airbyte.com/v1'
workspace_url: str | None
51    @property
52    def workspace_url(self) -> str | None:
53        return f"{self.api_root}/workspaces/{self.workspace_id}"
def connect(self) -> None:
57    def connect(self) -> None:
58        """Check that the workspace is reachable and raise an exception otherwise.
59
60        Note: It is not necessary to call this method before calling other operations. It
61              serves primarily as a simple check to ensure that the workspace is reachable
62              and credentials are correct.
63        """
64        _ = get_workspace(
65            api_root=self.api_root,
66            api_key=self.api_key,
67            workspace_id=self.workspace_id,
68        )
69        print(f"Successfully connected to workspace: {self.workspace_url}")

Check that the workspace is reachable and raise an exception otherwise.

Note: It is not necessary to call this method before calling other operations. It serves primarily as a simple check to ensure that the workspace is reachable and credentials are correct.

def deploy_source(self, source: airbyte.sources.base.Source) -> str:
73    def deploy_source(
74        self,
75        source: Source,
76    ) -> str:
77        """Deploy a source to the workspace.
78
79        Returns the newly deployed source ID.
80        """
81        source_configuration = source.get_config().copy()
82        source_configuration["sourceType"] = source.name.replace("source-", "")
83
84        deployed_source = create_source(
85            name=f"{source.name.replace('-', ' ').title()} (Deployed by PyAirbyte)",
86            api_root=self.api_root,
87            api_key=self.api_key,
88            workspace_id=self.workspace_id,
89            config=source_configuration,
90        )
91
92        # Set the deployment Ids on the source object
93        source._deployed_api_root = self.api_root  # noqa: SLF001  # Accessing nn-public API
94        source._deployed_workspace_id = self.workspace_id  # noqa: SLF001  # Accessing nn-public API
95        source._deployed_source_id = deployed_source.source_id  # noqa: SLF001  # Accessing nn-public API
96
97        return deployed_source.source_id

Deploy a source to the workspace.

Returns the newly deployed source ID.

def delete_source(self, source: str | airbyte.sources.base.Source) -> None:
 99    def delete_source(
100        self,
101        source: str | Source,
102    ) -> None:
103        """Delete a source from the workspace.
104
105        You can pass either the source ID `str` or a deployed `Source` object.
106        """
107        if not isinstance(source, (str, Source)):
108            raise ValueError(f"Invalid source type: {type(source)}")  # noqa: TRY004, TRY003
109
110        if isinstance(source, Source):
111            if not source._deployed_source_id:  # noqa: SLF001
112                raise ValueError("Source has not been deployed.")  # noqa: TRY003
113
114            source_id = source._deployed_source_id  # noqa: SLF001
115
116        elif isinstance(source, str):
117            source_id = source
118
119        delete_source(
120            source_id=source_id,
121            api_root=self.api_root,
122            api_key=self.api_key,
123        )

Delete a source from the workspace.

You can pass either the source ID str or a deployed Source object.

def deploy_cache_as_destination(self, cache: airbyte.caches.base.CacheBase) -> str:
127    def deploy_cache_as_destination(
128        self,
129        cache: CacheBase,
130    ) -> str:
131        """Deploy a cache to the workspace as a new destination.
132
133        Returns the newly deployed destination ID.
134        """
135        cache_type_name = cache.__class__.__name__.replace("Cache", "")
136
137        deployed_destination: DestinationResponse = create_destination(
138            name=f"Destination {cache_type_name} (Deployed by PyAirbyte)",
139            api_root=self.api_root,
140            api_key=self.api_key,
141            workspace_id=self.workspace_id,
142            config=get_destination_config_from_cache(cache),
143        )
144
145        # Set the deployment Ids on the source object
146        cache._deployed_api_root = self.api_root  # noqa: SLF001  # Accessing nn-public API
147        cache._deployed_workspace_id = self.workspace_id  # noqa: SLF001  # Accessing nn-public API
148        cache._deployed_destination_id = deployed_destination.destination_id  # noqa: SLF001  # Accessing nn-public API
149
150        return deployed_destination.destination_id

Deploy a cache to the workspace as a new destination.

Returns the newly deployed destination ID.

def delete_destination( self, *, destination: str | None = None, cache: airbyte.caches.base.CacheBase | None = None) -> None:
152    def delete_destination(
153        self,
154        *,
155        destination: str | None = None,
156        cache: CacheBase | None = None,
157    ) -> None:
158        """Delete a deployed destination from the workspace.
159
160        You can pass either the `Cache` class or the deployed destination ID as a `str`.
161        """
162        if destination is None and cache is None:
163            raise ValueError("You must provide either a destination ID or a cache object.")  # noqa: TRY003
164        if destination is not None and cache is not None:
165            raise ValueError(  # noqa: TRY003
166                "You must provide either a destination ID or a cache object, not both."
167            )
168
169        if cache:
170            if not cache._deployed_destination_id:  # noqa: SLF001
171                raise ValueError("Cache has not been deployed.")  # noqa: TRY003
172
173            destination = cache._deployed_destination_id  # noqa: SLF001
174
175        if destination is None:
176            raise ValueError("No destination ID provided.")  # noqa: TRY003
177
178        delete_destination(
179            destination_id=destination,
180            api_root=self.api_root,
181            api_key=self.api_key,
182        )

Delete a deployed destination from the workspace.

You can pass either the Cache class or the deployed destination ID as a str.

def deploy_connection( self, source: airbyte.sources.base.Source | str, cache: airbyte.caches.base.CacheBase | None = None, destination: str | None = None, table_prefix: str | None = None, selected_streams: list[str] | None = None) -> str:
186    def deploy_connection(
187        self,
188        source: Source | str,
189        cache: CacheBase | None = None,
190        destination: str | None = None,
191        table_prefix: str | None = None,
192        selected_streams: list[str] | None = None,
193    ) -> str:
194        """Deploy a source and cache to the workspace as a new connection.
195
196        Returns the newly deployed connection ID as a `str`.
197
198        Args:
199            source (Source | str): The source to deploy. You can pass either an already deployed
200                source ID `str` or a PyAirbyte `Source` object. If you pass a `Source` object,
201                it will be deployed automatically.
202            cache (CacheBase, optional): The cache to deploy as a new destination. You can provide
203                `cache` or `destination`, but not both.
204            destination (str, optional): The destination ID to use. You can provide
205                `cache` or `destination`, but not both.
206        """
207        # Resolve source ID
208        source_id: str
209        if isinstance(source, Source):
210            selected_streams = selected_streams or source.get_selected_streams()
211            if source._deployed_source_id:  # noqa: SLF001
212                source_id = source._deployed_source_id  # noqa: SLF001
213            else:
214                source_id = self.deploy_source(source)
215        else:
216            source_id = source
217            if not selected_streams:
218                raise exc.PyAirbyteInputError(
219                    guidance="You must provide `selected_streams` when deploying a source ID."
220                )
221
222        # Resolve destination ID
223        destination_id: str
224        if destination:
225            destination_id = destination
226        elif cache:
227            table_prefix = table_prefix if table_prefix is not None else (cache.table_prefix or "")
228            if not cache._deployed_destination_id:  # noqa: SLF001
229                destination_id = self.deploy_cache_as_destination(cache)
230            else:
231                destination_id = cache._deployed_destination_id  # noqa: SLF001
232        else:
233            raise exc.PyAirbyteInputError(
234                guidance="You must provide either a destination ID or a cache object."
235            )
236
237        assert source_id is not None
238        assert destination_id is not None
239
240        deployed_connection = create_connection(
241            name="Connection (Deployed by PyAirbyte)",
242            source_id=source_id,
243            destination_id=destination_id,
244            api_root=self.api_root,
245            api_key=self.api_key,
246            workspace_id=self.workspace_id,
247            selected_stream_names=selected_streams,
248            prefix=table_prefix or "",
249        )
250
251        if isinstance(source, Source):
252            source._deployed_connection_id = deployed_connection.connection_id  # noqa: SLF001
253        if cache:
254            cache._deployed_connection_id = deployed_connection.connection_id  # noqa: SLF001
255
256        return deployed_connection.connection_id

Deploy a source and cache to the workspace as a new connection.

Returns the newly deployed connection ID as a str.

Arguments:
  • source (Source | str): The source to deploy. You can pass either an already deployed source ID str or a PyAirbyte Source object. If you pass a Source object, it will be deployed automatically.
  • cache (CacheBase, optional): The cache to deploy as a new destination. You can provide cache or destination, but not both.
  • destination (str, optional): The destination ID to use. You can provide cache or destination, but not both.
def get_connection(self, connection_id: str) -> CloudConnection:
258    def get_connection(
259        self,
260        connection_id: str,
261    ) -> CloudConnection:
262        """Get a connection by ID.
263
264        This method does not fetch data from the API. It returns a `CloudConnection` object,
265        which will be loaded lazily as needed.
266        """
267        return CloudConnection(
268            workspace=self,
269            connection_id=connection_id,
270        )

Get a connection by ID.

This method does not fetch data from the API. It returns a CloudConnection object, which will be loaded lazily as needed.

def delete_connection( self, connection_id: str | None, *, delete_source: bool = False, delete_destination: bool = False) -> None:
272    def delete_connection(
273        self,
274        connection_id: str | None,
275        *,
276        delete_source: bool = False,
277        delete_destination: bool = False,
278    ) -> None:
279        """Delete a deployed connection from the workspace."""
280        if connection_id is None:
281            raise ValueError("No connection ID provided.")  # noqa: TRY003
282
283        connection: ConnectionResponse = get_connection(
284            connection_id=connection_id,
285            api_root=self.api_root,
286            api_key=self.api_key,
287            workspace_id=self.workspace_id,
288        )
289        delete_connection(
290            connection_id=connection_id,
291            api_root=self.api_root,
292            api_key=self.api_key,
293            workspace_id=self.workspace_id,
294        )
295        if delete_source:
296            self.delete_source(source=connection.source_id)
297
298        if delete_destination:
299            self.delete_destination(destination=connection.destination_id)

Delete a deployed connection from the workspace.

def run_sync( self, connection_id: str, *, wait: bool = True, wait_timeout: int = 300) -> SyncResult:
303    def run_sync(
304        self,
305        connection_id: str,
306        *,
307        wait: bool = True,
308        wait_timeout: int = 300,
309    ) -> SyncResult:
310        """Run a sync on a deployed connection."""
311        connection = CloudConnection(
312            workspace=self,
313            connection_id=connection_id,
314        )
315        return connection.run_sync(wait=wait, wait_timeout=wait_timeout)

Run a sync on a deployed connection.

def get_sync_result( self, connection_id: str, job_id: str | None = None) -> SyncResult | None:
319    def get_sync_result(
320        self,
321        connection_id: str,
322        job_id: str | None = None,
323    ) -> SyncResult | None:
324        """Get the sync result for a connection job.
325
326        If `job_id` is not provided, the most recent sync job will be used.
327
328        Returns `None` if job_id is omitted and no previous jobs are found.
329        """
330        connection = CloudConnection(
331            workspace=self,
332            connection_id=connection_id,
333        )
334        if job_id is None:
335            results = self.get_previous_sync_logs(
336                connection_id=connection_id,
337                limit=1,
338            )
339            if results:
340                return results[0]
341
342            return None
343        connection = CloudConnection(
344            workspace=self,
345            connection_id=connection_id,
346        )
347        return SyncResult(
348            workspace=self,
349            connection=connection,
350            job_id=job_id,
351        )

Get the sync result for a connection job.

If job_id is not provided, the most recent sync job will be used.

Returns None if job_id is omitted and no previous jobs are found.

def get_previous_sync_logs( self, connection_id: str, *, limit: int = 10) -> list[SyncResult]:
353    def get_previous_sync_logs(
354        self,
355        connection_id: str,
356        *,
357        limit: int = 10,
358    ) -> list[SyncResult]:
359        """Get the previous sync logs for a connection."""
360        connection = CloudConnection(
361            workspace=self,
362            connection_id=connection_id,
363        )
364        return connection.get_previous_sync_logs(
365            limit=limit,
366        )

Get the previous sync logs for a connection.

class CloudConnection:
 20class CloudConnection:
 21    """A connection is a link between a source and a destination.
 22
 23    Do not instantiate this class directly. Instead, use
 24    `CloudWorkspace.deploy_connection`
 25    or `.CloudWorkspace.get_connection` methods.
 26    """
 27
 28    def __init__(
 29        self,
 30        workspace: CloudWorkspace,
 31        connection_id: str,
 32        source: str | None = None,
 33        destination: str | None = None,
 34    ) -> None:
 35        self.connection_id = connection_id
 36        """The ID of the connection."""
 37
 38        self.workspace = workspace
 39        """The workspace that the connection belongs to."""
 40
 41        self._source_id = source
 42        """The ID of the source."""
 43
 44        self._destination_id = destination
 45        """The ID of the destination."""
 46
 47        self._connection_info: ConnectionResponse | None = None
 48
 49    def _fetch_connection_info(self) -> ConnectionResponse:
 50        """Populate the connection with data from the API."""
 51        return api_util.get_connection(
 52            workspace_id=self.workspace.workspace_id,
 53            connection_id=self.connection_id,
 54            api_root=self.workspace.api_root,
 55            api_key=self.workspace.api_key,
 56        )
 57
 58    # Properties
 59
 60    @property
 61    def source_id(self) -> str:
 62        """The ID of the source."""
 63        if not self._source_id:
 64            if not self._connection_info:
 65                self._connection_info = self._fetch_connection_info()
 66
 67            self._source_id = self._connection_info.source_id
 68
 69        return cast(str, self._source_id)
 70
 71    @property
 72    def destination_id(self) -> str:
 73        """The ID of the destination."""
 74        if not self._destination_id:
 75            if not self._connection_info:
 76                self._connection_info = self._fetch_connection_info()
 77
 78            self._destination_id = self._connection_info.source_id
 79
 80        return cast(str, self._destination_id)
 81
 82    @property
 83    def stream_names(self) -> list[str]:
 84        """The stream names."""
 85        if not self._connection_info:
 86            self._connection_info = self._fetch_connection_info()
 87
 88        return [stream.name for stream in self._connection_info.configurations.streams]
 89
 90    @property
 91    def table_prefix(self) -> str:
 92        """The table prefix."""
 93        if not self._connection_info:
 94            self._connection_info = self._fetch_connection_info()
 95
 96        return self._connection_info.configurations.prefix
 97
 98    @property
 99    def connection_url(self) -> str | None:
100        return f"{self.workspace.workspace_url}/connections/{self.connection_id}"
101
102    @property
103    def job_history_url(self) -> str | None:
104        return f"{self.connection_url}/job-history"
105
106    # Run Sync
107
108    def run_sync(
109        self,
110        *,
111        wait: bool = True,
112        wait_timeout: int = 300,
113    ) -> SyncResult:
114        """Run a sync."""
115        connection_response = api_util.run_connection(
116            connection_id=self.connection_id,
117            api_root=self.workspace.api_root,
118            api_key=self.workspace.api_key,
119            workspace_id=self.workspace.workspace_id,
120        )
121        sync_result = SyncResult(
122            workspace=self.workspace,
123            connection=self,
124            job_id=connection_response.job_id,
125        )
126
127        if wait:
128            sync_result.wait_for_completion(
129                wait_timeout=wait_timeout,
130                raise_failure=True,
131                raise_timeout=True,
132            )
133
134        return sync_result
135
136    # Logs
137
138    def get_previous_sync_logs(
139        self,
140        *,
141        limit: int = 10,
142    ) -> list[SyncResult]:
143        """Get the previous sync logs for a connection."""
144        sync_logs: list[JobResponse] = api_util.get_job_logs(
145            connection_id=self.connection_id,
146            api_root=self.workspace.api_root,
147            api_key=self.workspace.api_key,
148            workspace_id=self.workspace.workspace_id,
149            limit=limit,
150        )
151        return [
152            SyncResult(
153                workspace=self.workspace,
154                connection=self,
155                job_id=sync_log.job_id,
156                _latest_status=sync_log.status,
157            )
158            for sync_log in sync_logs
159        ]
160
161    def get_sync_result(
162        self,
163        job_id: str | None = None,
164    ) -> SyncResult | None:
165        """Get the sync result for the connection.
166
167        If `job_id` is not provided, the most recent sync job will be used.
168
169        Returns `None` if job_id is omitted and no previous jobs are found.
170        """
171        if job_id is None:
172            # Get the most recent sync job
173            results = self.get_previous_sync_logs(
174                limit=1,
175            )
176            if results:
177                return results[0]
178
179            return None
180
181        # Get the sync job by ID (lazy loaded)
182        return SyncResult(
183            workspace=self.workspace,
184            connection=self,
185            job_id=job_id,
186        )
187
188    # Deletions
189
190    def delete(
191        self,
192        *,
193        delete_source: bool = False,
194        delete_destination: bool = False,
195    ) -> None:
196        """Delete the connection.
197
198        Args:
199            delete_source: Whether to also delete the source.
200            delete_destination: Whether to also delete the destination.
201        """
202        self.workspace.delete_connection(connection_id=self.connection_id)
203
204        if delete_source:
205            self.workspace.delete_source(source=self.source_id)
206
207        if delete_destination:
208            self.workspace.delete_destination(destination=self.destination_id)

A connection is a link between a source and a destination.

Do not instantiate this class directly. Instead, use CloudWorkspace.deploy_connection or airbyte.cloud.CloudWorkspace.get_connection methods.

CloudConnection( workspace: CloudWorkspace, connection_id: str, source: str | None = None, destination: str | None = None)
28    def __init__(
29        self,
30        workspace: CloudWorkspace,
31        connection_id: str,
32        source: str | None = None,
33        destination: str | None = None,
34    ) -> None:
35        self.connection_id = connection_id
36        """The ID of the connection."""
37
38        self.workspace = workspace
39        """The workspace that the connection belongs to."""
40
41        self._source_id = source
42        """The ID of the source."""
43
44        self._destination_id = destination
45        """The ID of the destination."""
46
47        self._connection_info: ConnectionResponse | None = None
connection_id

The ID of the connection.

workspace

The workspace that the connection belongs to.

source_id: str
60    @property
61    def source_id(self) -> str:
62        """The ID of the source."""
63        if not self._source_id:
64            if not self._connection_info:
65                self._connection_info = self._fetch_connection_info()
66
67            self._source_id = self._connection_info.source_id
68
69        return cast(str, self._source_id)

The ID of the source.

destination_id: str
71    @property
72    def destination_id(self) -> str:
73        """The ID of the destination."""
74        if not self._destination_id:
75            if not self._connection_info:
76                self._connection_info = self._fetch_connection_info()
77
78            self._destination_id = self._connection_info.source_id
79
80        return cast(str, self._destination_id)

The ID of the destination.

stream_names: list[str]
82    @property
83    def stream_names(self) -> list[str]:
84        """The stream names."""
85        if not self._connection_info:
86            self._connection_info = self._fetch_connection_info()
87
88        return [stream.name for stream in self._connection_info.configurations.streams]

The stream names.

table_prefix: str
90    @property
91    def table_prefix(self) -> str:
92        """The table prefix."""
93        if not self._connection_info:
94            self._connection_info = self._fetch_connection_info()
95
96        return self._connection_info.configurations.prefix

The table prefix.

connection_url: str | None
 98    @property
 99    def connection_url(self) -> str | None:
100        return f"{self.workspace.workspace_url}/connections/{self.connection_id}"
job_history_url: str | None
102    @property
103    def job_history_url(self) -> str | None:
104        return f"{self.connection_url}/job-history"
def run_sync( self, *, wait: bool = True, wait_timeout: int = 300) -> SyncResult:
108    def run_sync(
109        self,
110        *,
111        wait: bool = True,
112        wait_timeout: int = 300,
113    ) -> SyncResult:
114        """Run a sync."""
115        connection_response = api_util.run_connection(
116            connection_id=self.connection_id,
117            api_root=self.workspace.api_root,
118            api_key=self.workspace.api_key,
119            workspace_id=self.workspace.workspace_id,
120        )
121        sync_result = SyncResult(
122            workspace=self.workspace,
123            connection=self,
124            job_id=connection_response.job_id,
125        )
126
127        if wait:
128            sync_result.wait_for_completion(
129                wait_timeout=wait_timeout,
130                raise_failure=True,
131                raise_timeout=True,
132            )
133
134        return sync_result

Run a sync.

def get_previous_sync_logs(self, *, limit: int = 10) -> list[SyncResult]:
138    def get_previous_sync_logs(
139        self,
140        *,
141        limit: int = 10,
142    ) -> list[SyncResult]:
143        """Get the previous sync logs for a connection."""
144        sync_logs: list[JobResponse] = api_util.get_job_logs(
145            connection_id=self.connection_id,
146            api_root=self.workspace.api_root,
147            api_key=self.workspace.api_key,
148            workspace_id=self.workspace.workspace_id,
149            limit=limit,
150        )
151        return [
152            SyncResult(
153                workspace=self.workspace,
154                connection=self,
155                job_id=sync_log.job_id,
156                _latest_status=sync_log.status,
157            )
158            for sync_log in sync_logs
159        ]

Get the previous sync logs for a connection.

def get_sync_result( self, job_id: str | None = None) -> SyncResult | None:
161    def get_sync_result(
162        self,
163        job_id: str | None = None,
164    ) -> SyncResult | None:
165        """Get the sync result for the connection.
166
167        If `job_id` is not provided, the most recent sync job will be used.
168
169        Returns `None` if job_id is omitted and no previous jobs are found.
170        """
171        if job_id is None:
172            # Get the most recent sync job
173            results = self.get_previous_sync_logs(
174                limit=1,
175            )
176            if results:
177                return results[0]
178
179            return None
180
181        # Get the sync job by ID (lazy loaded)
182        return SyncResult(
183            workspace=self.workspace,
184            connection=self,
185            job_id=job_id,
186        )

Get the sync result for the connection.

If job_id is not provided, the most recent sync job will be used.

Returns None if job_id is omitted and no previous jobs are found.

def delete( self, *, delete_source: bool = False, delete_destination: bool = False) -> None:
190    def delete(
191        self,
192        *,
193        delete_source: bool = False,
194        delete_destination: bool = False,
195    ) -> None:
196        """Delete the connection.
197
198        Args:
199            delete_source: Whether to also delete the source.
200            delete_destination: Whether to also delete the destination.
201        """
202        self.workspace.delete_connection(connection_id=self.connection_id)
203
204        if delete_source:
205            self.workspace.delete_source(source=self.source_id)
206
207        if delete_destination:
208            self.workspace.delete_destination(destination=self.destination_id)

Delete the connection.

Arguments:
  • delete_source: Whether to also delete the source.
  • delete_destination: Whether to also delete the destination.
@dataclass
class SyncResult:
 42@dataclass
 43class SyncResult:
 44    """The result of a sync operation."""
 45
 46    workspace: CloudWorkspace
 47    connection: CloudConnection
 48    job_id: str
 49    table_name_prefix: str = ""
 50    table_name_suffix: str = ""
 51    _latest_status: JobStatusEnum | None = None
 52    _connection_response: ConnectionResponse | None = None
 53    _cache: CacheBase | None = None
 54
 55    @property
 56    def job_url(self) -> str:
 57        """Return the URL of the sync job."""
 58        return f"{self.connection.job_history_url}/{self.job_id}"
 59
 60    def _get_connection_info(self, *, force_refresh: bool = False) -> ConnectionResponse:
 61        """Return connection info for the sync job."""
 62        if self._connection_response and not force_refresh:
 63            return self._connection_response
 64
 65        self._connection_response = api_util.get_connection(
 66            workspace_id=self.workspace.workspace_id,
 67            api_root=self.workspace.api_root,
 68            api_key=self.workspace.api_key,
 69            connection_id=self.connection.connection_id,
 70        )
 71        return self._connection_response
 72
 73    def _get_destination_configuration(self, *, force_refresh: bool = False) -> dict[str, Any]:
 74        """Return the destination configuration for the sync job."""
 75        connection_info: ConnectionResponse = self._get_connection_info(force_refresh=force_refresh)
 76        destination_response = api_util.get_destination(
 77            destination_id=connection_info.destination_id,
 78            api_root=self.workspace.api_root,
 79            api_key=self.workspace.api_key,
 80        )
 81        return destination_response.configuration
 82
 83    def is_job_complete(self) -> bool:
 84        """Check if the sync job is complete."""
 85        return self.get_job_status() in FINAL_STATUSES
 86
 87    def get_job_status(self) -> JobStatusEnum:
 88        """Check if the sync job is still running."""
 89        if self._latest_status and self._latest_status in FINAL_STATUSES:
 90            return self._latest_status
 91
 92        job_info = api_util.get_job_info(
 93            job_id=self.job_id,
 94            api_root=self.workspace.api_root,
 95            api_key=self.workspace.api_key,
 96        )
 97        self._latest_status = job_info.status
 98
 99        return job_info.status
100
101    def raise_failure_status(
102        self,
103        *,
104        refresh_status: bool = False,
105    ) -> None:
106        """Raise an exception if the sync job failed.
107
108        By default, this method will use the latest status available. If you want to refresh the
109        status before checking for failure, set `refresh_status=True`. If the job has failed, this
110        method will raise a `AirbyteConnectionSyncError`.
111
112        Otherwise, do nothing.
113        """
114        latest_status = self._latest_status
115        if refresh_status:
116            latest_status = self.get_job_status()
117
118        if latest_status in FAILED_STATUSES:
119            raise AirbyteConnectionSyncError(
120                workspace=self.workspace,
121                connection_id=self.connection.connection_id,
122                job_id=self.job_id,
123                job_status=self._latest_status,
124            )
125
126    def wait_for_completion(
127        self,
128        *,
129        wait_timeout: int = DEFAULT_SYNC_TIMEOUT_SECONDS,
130        raise_timeout: bool = True,
131        raise_failure: bool = False,
132    ) -> JobStatusEnum:
133        """Wait for a job to finish running."""
134        start_time = time.time()
135        while True:
136            latest_status = self.get_job_status()
137            if latest_status in FINAL_STATUSES:
138                if raise_failure:
139                    # No-op if the job succeeded or is still running:
140                    self.raise_failure_status()
141
142                return latest_status
143
144            if time.time() - start_time > wait_timeout:
145                if raise_timeout:
146                    raise AirbyteConnectionSyncTimeoutError(
147                        workspace=self.workspace,
148                        connection_id=self.connection.connection_id,
149                        job_id=self.job_id,
150                        job_status=latest_status,
151                        timeout=wait_timeout,
152                    )
153
154                return latest_status  # This will be a non-final status
155
156            time.sleep(api_util.JOB_WAIT_INTERVAL_SECS)
157
158    def get_sql_cache(self) -> CacheBase:
159        """Return a SQL Cache object for working with the data in a SQL-based destination's."""
160        if self._cache:
161            return self._cache
162
163        destination_configuration: dict[str, Any] = self._get_destination_configuration()
164        self._cache = create_cache_from_destination_config(
165            destination_configuration=destination_configuration
166        )
167        return self._cache
168
169    def get_sql_engine(self) -> sqlalchemy.engine.Engine:
170        """Return a SQL Engine for querying a SQL-based destination."""
171        self.get_sql_cache().get_sql_engine()
172
173    def get_sql_table_name(self, stream_name: str) -> str:
174        """Return the SQL table name of the named stream."""
175        return self.get_sql_cache().processor.get_sql_table_name(stream_name=stream_name)
176
177    def get_sql_table(
178        self,
179        stream_name: str,
180    ) -> sqlalchemy.Table:
181        """Return a SQLAlchemy table object for the named stream."""
182        self.get_sql_cache().processor.get_sql_table(stream_name)
183
184    def get_dataset(self, stream_name: str) -> CachedDataset:
185        """Return cached dataset."""
186        return CachedDataset(self.get_sql_cache(), stream_name=stream_name)
187
188    def get_sql_database_name(self) -> str:
189        """Return the SQL database name."""
190        cache = self.get_sql_cache()
191        return cache.get_database_name()
192
193    def get_sql_schema_name(self) -> str:
194        """Return the SQL schema name."""
195        cache = self.get_sql_cache()
196        return cache.schema_name
197
198    @property
199    def stream_names(self) -> set[str]:
200        """Return the set of stream names."""
201        return self.get_sql_cache().processor.expected_streams
202
203    @final
204    @property
205    def streams(
206        self,
207    ) -> SyncResultStreams:
208        """Return a temporary table name."""
209        return self.SyncResultStreams(self)
210
211    class SyncResultStreams(Mapping[str, CachedDataset]):
212        """A mapping of stream names to cached datasets."""
213
214        def __init__(
215            self,
216            parent: SyncResult,
217            /,
218        ) -> None:
219            self.parent: SyncResult = parent
220
221        def __getitem__(self, key: str) -> CachedDataset:
222            return self.parent.get_dataset(stream_name=key)
223
224        def __iter__(self) -> Iterator[str]:
225            """TODO"""
226            return iter(self.parent.stream_names)
227
228        def __len__(self) -> int:
229            return len(self.parent.stream_names)

The result of a sync operation.

SyncResult( workspace: CloudWorkspace, connection: CloudConnection, job_id: str, table_name_prefix: str = '', table_name_suffix: str = '', _latest_status: airbyte_api.models.shared.jobstatusenum.JobStatusEnum | None = None, _connection_response: airbyte_api.models.shared.connectionresponse.ConnectionResponse | None = None, _cache: airbyte.caches.base.CacheBase | None = None)
workspace: CloudWorkspace
connection: CloudConnection
job_id: str
table_name_prefix: str = ''
table_name_suffix: str = ''
job_url: str
55    @property
56    def job_url(self) -> str:
57        """Return the URL of the sync job."""
58        return f"{self.connection.job_history_url}/{self.job_id}"

Return the URL of the sync job.

def is_job_complete(self) -> bool:
83    def is_job_complete(self) -> bool:
84        """Check if the sync job is complete."""
85        return self.get_job_status() in FINAL_STATUSES

Check if the sync job is complete.

def get_job_status(self) -> airbyte_api.models.shared.jobstatusenum.JobStatusEnum:
87    def get_job_status(self) -> JobStatusEnum:
88        """Check if the sync job is still running."""
89        if self._latest_status and self._latest_status in FINAL_STATUSES:
90            return self._latest_status
91
92        job_info = api_util.get_job_info(
93            job_id=self.job_id,
94            api_root=self.workspace.api_root,
95            api_key=self.workspace.api_key,
96        )
97        self._latest_status = job_info.status
98
99        return job_info.status

Check if the sync job is still running.

def raise_failure_status(self, *, refresh_status: bool = False) -> None:
101    def raise_failure_status(
102        self,
103        *,
104        refresh_status: bool = False,
105    ) -> None:
106        """Raise an exception if the sync job failed.
107
108        By default, this method will use the latest status available. If you want to refresh the
109        status before checking for failure, set `refresh_status=True`. If the job has failed, this
110        method will raise a `AirbyteConnectionSyncError`.
111
112        Otherwise, do nothing.
113        """
114        latest_status = self._latest_status
115        if refresh_status:
116            latest_status = self.get_job_status()
117
118        if latest_status in FAILED_STATUSES:
119            raise AirbyteConnectionSyncError(
120                workspace=self.workspace,
121                connection_id=self.connection.connection_id,
122                job_id=self.job_id,
123                job_status=self._latest_status,
124            )

Raise an exception if the sync job failed.

By default, this method will use the latest status available. If you want to refresh the status before checking for failure, set refresh_status=True. If the job has failed, this method will raise a AirbyteConnectionSyncError.

Otherwise, do nothing.

def wait_for_completion( self, *, wait_timeout: int = 1800, raise_timeout: bool = True, raise_failure: bool = False) -> airbyte_api.models.shared.jobstatusenum.JobStatusEnum:
126    def wait_for_completion(
127        self,
128        *,
129        wait_timeout: int = DEFAULT_SYNC_TIMEOUT_SECONDS,
130        raise_timeout: bool = True,
131        raise_failure: bool = False,
132    ) -> JobStatusEnum:
133        """Wait for a job to finish running."""
134        start_time = time.time()
135        while True:
136            latest_status = self.get_job_status()
137            if latest_status in FINAL_STATUSES:
138                if raise_failure:
139                    # No-op if the job succeeded or is still running:
140                    self.raise_failure_status()
141
142                return latest_status
143
144            if time.time() - start_time > wait_timeout:
145                if raise_timeout:
146                    raise AirbyteConnectionSyncTimeoutError(
147                        workspace=self.workspace,
148                        connection_id=self.connection.connection_id,
149                        job_id=self.job_id,
150                        job_status=latest_status,
151                        timeout=wait_timeout,
152                    )
153
154                return latest_status  # This will be a non-final status
155
156            time.sleep(api_util.JOB_WAIT_INTERVAL_SECS)

Wait for a job to finish running.

def get_sql_cache(self) -> airbyte.caches.base.CacheBase:
158    def get_sql_cache(self) -> CacheBase:
159        """Return a SQL Cache object for working with the data in a SQL-based destination's."""
160        if self._cache:
161            return self._cache
162
163        destination_configuration: dict[str, Any] = self._get_destination_configuration()
164        self._cache = create_cache_from_destination_config(
165            destination_configuration=destination_configuration
166        )
167        return self._cache

Return a SQL Cache object for working with the data in a SQL-based destination's.

def get_sql_engine(self) -> sqlalchemy.engine.base.Engine:
169    def get_sql_engine(self) -> sqlalchemy.engine.Engine:
170        """Return a SQL Engine for querying a SQL-based destination."""
171        self.get_sql_cache().get_sql_engine()

Return a SQL Engine for querying a SQL-based destination.

def get_sql_table_name(self, stream_name: str) -> str:
173    def get_sql_table_name(self, stream_name: str) -> str:
174        """Return the SQL table name of the named stream."""
175        return self.get_sql_cache().processor.get_sql_table_name(stream_name=stream_name)

Return the SQL table name of the named stream.

def get_sql_table(self, stream_name: str) -> sqlalchemy.sql.schema.Table:
177    def get_sql_table(
178        self,
179        stream_name: str,
180    ) -> sqlalchemy.Table:
181        """Return a SQLAlchemy table object for the named stream."""
182        self.get_sql_cache().processor.get_sql_table(stream_name)

Return a SQLAlchemy table object for the named stream.

def get_dataset(self, stream_name: str) -> airbyte.datasets._sql.CachedDataset:
184    def get_dataset(self, stream_name: str) -> CachedDataset:
185        """Return cached dataset."""
186        return CachedDataset(self.get_sql_cache(), stream_name=stream_name)

Return cached dataset.

def get_sql_database_name(self) -> str:
188    def get_sql_database_name(self) -> str:
189        """Return the SQL database name."""
190        cache = self.get_sql_cache()
191        return cache.get_database_name()

Return the SQL database name.

def get_sql_schema_name(self) -> str:
193    def get_sql_schema_name(self) -> str:
194        """Return the SQL schema name."""
195        cache = self.get_sql_cache()
196        return cache.schema_name

Return the SQL schema name.

stream_names: set[str]
198    @property
199    def stream_names(self) -> set[str]:
200        """Return the set of stream names."""
201        return self.get_sql_cache().processor.expected_streams

Return the set of stream names.

streams: SyncResult.SyncResultStreams
203    @final
204    @property
205    def streams(
206        self,
207    ) -> SyncResultStreams:
208        """Return a temporary table name."""
209        return self.SyncResultStreams(self)

Return a temporary table name.

class SyncResult.SyncResultStreams(collections.abc.Mapping[str, airbyte.datasets._sql.CachedDataset]):
211    class SyncResultStreams(Mapping[str, CachedDataset]):
212        """A mapping of stream names to cached datasets."""
213
214        def __init__(
215            self,
216            parent: SyncResult,
217            /,
218        ) -> None:
219            self.parent: SyncResult = parent
220
221        def __getitem__(self, key: str) -> CachedDataset:
222            return self.parent.get_dataset(stream_name=key)
223
224        def __iter__(self) -> Iterator[str]:
225            """TODO"""
226            return iter(self.parent.stream_names)
227
228        def __len__(self) -> int:
229            return len(self.parent.stream_names)

A mapping of stream names to cached datasets.

SyncResult.SyncResultStreams(parent: SyncResult, /)
214        def __init__(
215            self,
216            parent: SyncResult,
217            /,
218        ) -> None:
219            self.parent: SyncResult = parent
parent: SyncResult
Inherited Members
collections.abc.Mapping
get
keys
items
values