airbyte.cloud
PyAirbyte classes and methods for interacting with the Airbyte Cloud API.
You can use this module to interact with Airbyte Cloud, OSS, and Enterprise.
Usage example:
import airbyte as ab
from airbyte import cloud
workspace = cloud.CloudWorkspace(
workspace_id="123",
api_key=ab.get_secret("AIRBYTE_CLOUD_API_KEY"),
)
source = ab.get_source("source-faker", config={})
source.check()
workspace.deploy_source(source)
1# Copyright (c) 2024 Airbyte, Inc., all rights reserved. 2"""PyAirbyte classes and methods for interacting with the Airbyte Cloud API. 3 4You can use this module to interact with Airbyte Cloud, OSS, and Enterprise. 5 6Usage example: 7 8```python 9import airbyte as ab 10from airbyte import cloud 11 12workspace = cloud.CloudWorkspace( 13 workspace_id="123", 14 api_key=ab.get_secret("AIRBYTE_CLOUD_API_KEY"), 15) 16 17source = ab.get_source("source-faker", config={}) 18source.check() 19 20workspace.deploy_source(source) 21``` 22""" 23 24from __future__ import annotations 25 26from airbyte.cloud import connections, sync_results, workspaces 27from airbyte.cloud.connections import CloudConnection 28from airbyte.cloud.sync_results import SyncResult 29from airbyte.cloud.workspaces import CloudWorkspace 30 31 32__all__ = [ 33 # Submodules 34 "workspaces", 35 "connections", 36 "sync_results", 37 # Classes 38 "CloudWorkspace", 39 "CloudConnection", 40 "SyncResult", 41]
39@dataclass 40class CloudWorkspace: 41 """A remote workspace on the Airbyte Cloud. 42 43 By overriding `api_root`, you can use this class to interact with self-managed Airbyte 44 instances, both OSS and Enterprise. 45 """ 46 47 workspace_id: str 48 api_key: str 49 api_root: str = CLOUD_API_ROOT 50 51 @property 52 def workspace_url(self) -> str | None: 53 return f"{self.api_root}/workspaces/{self.workspace_id}" 54 55 # Test connection and creds 56 57 def connect(self) -> None: 58 """Check that the workspace is reachable and raise an exception otherwise. 59 60 Note: It is not necessary to call this method before calling other operations. It 61 serves primarily as a simple check to ensure that the workspace is reachable 62 and credentials are correct. 63 """ 64 _ = get_workspace( 65 api_root=self.api_root, 66 api_key=self.api_key, 67 workspace_id=self.workspace_id, 68 ) 69 print(f"Successfully connected to workspace: {self.workspace_url}") 70 71 # Deploy and delete sources 72 73 def deploy_source( 74 self, 75 source: Source, 76 ) -> str: 77 """Deploy a source to the workspace. 78 79 Returns the newly deployed source ID. 80 """ 81 source_configuration = source.get_config().copy() 82 source_configuration["sourceType"] = source.name.replace("source-", "") 83 84 deployed_source = create_source( 85 name=f"{source.name.replace('-', ' ').title()} (Deployed by PyAirbyte)", 86 api_root=self.api_root, 87 api_key=self.api_key, 88 workspace_id=self.workspace_id, 89 config=source_configuration, 90 ) 91 92 # Set the deployment Ids on the source object 93 source._deployed_api_root = self.api_root # noqa: SLF001 # Accessing nn-public API 94 source._deployed_workspace_id = self.workspace_id # noqa: SLF001 # Accessing nn-public API 95 source._deployed_source_id = deployed_source.source_id # noqa: SLF001 # Accessing nn-public API 96 97 return deployed_source.source_id 98 99 def delete_source( 100 self, 101 source: str | Source, 102 ) -> None: 103 """Delete a source from the workspace. 104 105 You can pass either the source ID `str` or a deployed `Source` object. 106 """ 107 if not isinstance(source, (str, Source)): 108 raise ValueError(f"Invalid source type: {type(source)}") # noqa: TRY004, TRY003 109 110 if isinstance(source, Source): 111 if not source._deployed_source_id: # noqa: SLF001 112 raise ValueError("Source has not been deployed.") # noqa: TRY003 113 114 source_id = source._deployed_source_id # noqa: SLF001 115 116 elif isinstance(source, str): 117 source_id = source 118 119 delete_source( 120 source_id=source_id, 121 api_root=self.api_root, 122 api_key=self.api_key, 123 ) 124 125 # Deploy and delete destinations 126 127 def deploy_cache_as_destination( 128 self, 129 cache: CacheBase, 130 ) -> str: 131 """Deploy a cache to the workspace as a new destination. 132 133 Returns the newly deployed destination ID. 134 """ 135 cache_type_name = cache.__class__.__name__.replace("Cache", "") 136 137 deployed_destination: DestinationResponse = create_destination( 138 name=f"Destination {cache_type_name} (Deployed by PyAirbyte)", 139 api_root=self.api_root, 140 api_key=self.api_key, 141 workspace_id=self.workspace_id, 142 config=get_destination_config_from_cache(cache), 143 ) 144 145 # Set the deployment Ids on the source object 146 cache._deployed_api_root = self.api_root # noqa: SLF001 # Accessing nn-public API 147 cache._deployed_workspace_id = self.workspace_id # noqa: SLF001 # Accessing nn-public API 148 cache._deployed_destination_id = deployed_destination.destination_id # noqa: SLF001 # Accessing nn-public API 149 150 return deployed_destination.destination_id 151 152 def delete_destination( 153 self, 154 *, 155 destination: str | None = None, 156 cache: CacheBase | None = None, 157 ) -> None: 158 """Delete a deployed destination from the workspace. 159 160 You can pass either the `Cache` class or the deployed destination ID as a `str`. 161 """ 162 if destination is None and cache is None: 163 raise ValueError("You must provide either a destination ID or a cache object.") # noqa: TRY003 164 if destination is not None and cache is not None: 165 raise ValueError( # noqa: TRY003 166 "You must provide either a destination ID or a cache object, not both." 167 ) 168 169 if cache: 170 if not cache._deployed_destination_id: # noqa: SLF001 171 raise ValueError("Cache has not been deployed.") # noqa: TRY003 172 173 destination = cache._deployed_destination_id # noqa: SLF001 174 175 if destination is None: 176 raise ValueError("No destination ID provided.") # noqa: TRY003 177 178 delete_destination( 179 destination_id=destination, 180 api_root=self.api_root, 181 api_key=self.api_key, 182 ) 183 184 # Deploy and delete connections 185 186 def deploy_connection( 187 self, 188 source: Source | str, 189 cache: CacheBase | None = None, 190 destination: str | None = None, 191 table_prefix: str | None = None, 192 selected_streams: list[str] | None = None, 193 ) -> str: 194 """Deploy a source and cache to the workspace as a new connection. 195 196 Returns the newly deployed connection ID as a `str`. 197 198 Args: 199 source (Source | str): The source to deploy. You can pass either an already deployed 200 source ID `str` or a PyAirbyte `Source` object. If you pass a `Source` object, 201 it will be deployed automatically. 202 cache (CacheBase, optional): The cache to deploy as a new destination. You can provide 203 `cache` or `destination`, but not both. 204 destination (str, optional): The destination ID to use. You can provide 205 `cache` or `destination`, but not both. 206 """ 207 # Resolve source ID 208 source_id: str 209 if isinstance(source, Source): 210 selected_streams = selected_streams or source.get_selected_streams() 211 if source._deployed_source_id: # noqa: SLF001 212 source_id = source._deployed_source_id # noqa: SLF001 213 else: 214 source_id = self.deploy_source(source) 215 else: 216 source_id = source 217 if not selected_streams: 218 raise exc.PyAirbyteInputError( 219 guidance="You must provide `selected_streams` when deploying a source ID." 220 ) 221 222 # Resolve destination ID 223 destination_id: str 224 if destination: 225 destination_id = destination 226 elif cache: 227 table_prefix = table_prefix if table_prefix is not None else (cache.table_prefix or "") 228 if not cache._deployed_destination_id: # noqa: SLF001 229 destination_id = self.deploy_cache_as_destination(cache) 230 else: 231 destination_id = cache._deployed_destination_id # noqa: SLF001 232 else: 233 raise exc.PyAirbyteInputError( 234 guidance="You must provide either a destination ID or a cache object." 235 ) 236 237 assert source_id is not None 238 assert destination_id is not None 239 240 deployed_connection = create_connection( 241 name="Connection (Deployed by PyAirbyte)", 242 source_id=source_id, 243 destination_id=destination_id, 244 api_root=self.api_root, 245 api_key=self.api_key, 246 workspace_id=self.workspace_id, 247 selected_stream_names=selected_streams, 248 prefix=table_prefix or "", 249 ) 250 251 if isinstance(source, Source): 252 source._deployed_connection_id = deployed_connection.connection_id # noqa: SLF001 253 if cache: 254 cache._deployed_connection_id = deployed_connection.connection_id # noqa: SLF001 255 256 return deployed_connection.connection_id 257 258 def get_connection( 259 self, 260 connection_id: str, 261 ) -> CloudConnection: 262 """Get a connection by ID. 263 264 This method does not fetch data from the API. It returns a `CloudConnection` object, 265 which will be loaded lazily as needed. 266 """ 267 return CloudConnection( 268 workspace=self, 269 connection_id=connection_id, 270 ) 271 272 def delete_connection( 273 self, 274 connection_id: str | None, 275 *, 276 delete_source: bool = False, 277 delete_destination: bool = False, 278 ) -> None: 279 """Delete a deployed connection from the workspace.""" 280 if connection_id is None: 281 raise ValueError("No connection ID provided.") # noqa: TRY003 282 283 connection: ConnectionResponse = get_connection( 284 connection_id=connection_id, 285 api_root=self.api_root, 286 api_key=self.api_key, 287 workspace_id=self.workspace_id, 288 ) 289 delete_connection( 290 connection_id=connection_id, 291 api_root=self.api_root, 292 api_key=self.api_key, 293 workspace_id=self.workspace_id, 294 ) 295 if delete_source: 296 self.delete_source(source=connection.source_id) 297 298 if delete_destination: 299 self.delete_destination(destination=connection.destination_id) 300 301 # Run syncs 302 303 def run_sync( 304 self, 305 connection_id: str, 306 *, 307 wait: bool = True, 308 wait_timeout: int = 300, 309 ) -> SyncResult: 310 """Run a sync on a deployed connection.""" 311 connection = CloudConnection( 312 workspace=self, 313 connection_id=connection_id, 314 ) 315 return connection.run_sync(wait=wait, wait_timeout=wait_timeout) 316 317 # Get sync results and previous sync logs 318 319 def get_sync_result( 320 self, 321 connection_id: str, 322 job_id: str | None = None, 323 ) -> SyncResult | None: 324 """Get the sync result for a connection job. 325 326 If `job_id` is not provided, the most recent sync job will be used. 327 328 Returns `None` if job_id is omitted and no previous jobs are found. 329 """ 330 connection = CloudConnection( 331 workspace=self, 332 connection_id=connection_id, 333 ) 334 if job_id is None: 335 results = self.get_previous_sync_logs( 336 connection_id=connection_id, 337 limit=1, 338 ) 339 if results: 340 return results[0] 341 342 return None 343 connection = CloudConnection( 344 workspace=self, 345 connection_id=connection_id, 346 ) 347 return SyncResult( 348 workspace=self, 349 connection=connection, 350 job_id=job_id, 351 ) 352 353 def get_previous_sync_logs( 354 self, 355 connection_id: str, 356 *, 357 limit: int = 10, 358 ) -> list[SyncResult]: 359 """Get the previous sync logs for a connection.""" 360 connection = CloudConnection( 361 workspace=self, 362 connection_id=connection_id, 363 ) 364 return connection.get_previous_sync_logs( 365 limit=limit, 366 )
A remote workspace on the Airbyte Cloud.
By overriding api_root
, you can use this class to interact with self-managed Airbyte
instances, both OSS and Enterprise.
57 def connect(self) -> None: 58 """Check that the workspace is reachable and raise an exception otherwise. 59 60 Note: It is not necessary to call this method before calling other operations. It 61 serves primarily as a simple check to ensure that the workspace is reachable 62 and credentials are correct. 63 """ 64 _ = get_workspace( 65 api_root=self.api_root, 66 api_key=self.api_key, 67 workspace_id=self.workspace_id, 68 ) 69 print(f"Successfully connected to workspace: {self.workspace_url}")
Check that the workspace is reachable and raise an exception otherwise.
Note: It is not necessary to call this method before calling other operations. It serves primarily as a simple check to ensure that the workspace is reachable and credentials are correct.
73 def deploy_source( 74 self, 75 source: Source, 76 ) -> str: 77 """Deploy a source to the workspace. 78 79 Returns the newly deployed source ID. 80 """ 81 source_configuration = source.get_config().copy() 82 source_configuration["sourceType"] = source.name.replace("source-", "") 83 84 deployed_source = create_source( 85 name=f"{source.name.replace('-', ' ').title()} (Deployed by PyAirbyte)", 86 api_root=self.api_root, 87 api_key=self.api_key, 88 workspace_id=self.workspace_id, 89 config=source_configuration, 90 ) 91 92 # Set the deployment Ids on the source object 93 source._deployed_api_root = self.api_root # noqa: SLF001 # Accessing nn-public API 94 source._deployed_workspace_id = self.workspace_id # noqa: SLF001 # Accessing nn-public API 95 source._deployed_source_id = deployed_source.source_id # noqa: SLF001 # Accessing nn-public API 96 97 return deployed_source.source_id
Deploy a source to the workspace.
Returns the newly deployed source ID.
99 def delete_source( 100 self, 101 source: str | Source, 102 ) -> None: 103 """Delete a source from the workspace. 104 105 You can pass either the source ID `str` or a deployed `Source` object. 106 """ 107 if not isinstance(source, (str, Source)): 108 raise ValueError(f"Invalid source type: {type(source)}") # noqa: TRY004, TRY003 109 110 if isinstance(source, Source): 111 if not source._deployed_source_id: # noqa: SLF001 112 raise ValueError("Source has not been deployed.") # noqa: TRY003 113 114 source_id = source._deployed_source_id # noqa: SLF001 115 116 elif isinstance(source, str): 117 source_id = source 118 119 delete_source( 120 source_id=source_id, 121 api_root=self.api_root, 122 api_key=self.api_key, 123 )
Delete a source from the workspace.
You can pass either the source ID str
or a deployed Source
object.
127 def deploy_cache_as_destination( 128 self, 129 cache: CacheBase, 130 ) -> str: 131 """Deploy a cache to the workspace as a new destination. 132 133 Returns the newly deployed destination ID. 134 """ 135 cache_type_name = cache.__class__.__name__.replace("Cache", "") 136 137 deployed_destination: DestinationResponse = create_destination( 138 name=f"Destination {cache_type_name} (Deployed by PyAirbyte)", 139 api_root=self.api_root, 140 api_key=self.api_key, 141 workspace_id=self.workspace_id, 142 config=get_destination_config_from_cache(cache), 143 ) 144 145 # Set the deployment Ids on the source object 146 cache._deployed_api_root = self.api_root # noqa: SLF001 # Accessing nn-public API 147 cache._deployed_workspace_id = self.workspace_id # noqa: SLF001 # Accessing nn-public API 148 cache._deployed_destination_id = deployed_destination.destination_id # noqa: SLF001 # Accessing nn-public API 149 150 return deployed_destination.destination_id
Deploy a cache to the workspace as a new destination.
Returns the newly deployed destination ID.
152 def delete_destination( 153 self, 154 *, 155 destination: str | None = None, 156 cache: CacheBase | None = None, 157 ) -> None: 158 """Delete a deployed destination from the workspace. 159 160 You can pass either the `Cache` class or the deployed destination ID as a `str`. 161 """ 162 if destination is None and cache is None: 163 raise ValueError("You must provide either a destination ID or a cache object.") # noqa: TRY003 164 if destination is not None and cache is not None: 165 raise ValueError( # noqa: TRY003 166 "You must provide either a destination ID or a cache object, not both." 167 ) 168 169 if cache: 170 if not cache._deployed_destination_id: # noqa: SLF001 171 raise ValueError("Cache has not been deployed.") # noqa: TRY003 172 173 destination = cache._deployed_destination_id # noqa: SLF001 174 175 if destination is None: 176 raise ValueError("No destination ID provided.") # noqa: TRY003 177 178 delete_destination( 179 destination_id=destination, 180 api_root=self.api_root, 181 api_key=self.api_key, 182 )
Delete a deployed destination from the workspace.
You can pass either the Cache
class or the deployed destination ID as a str
.
186 def deploy_connection( 187 self, 188 source: Source | str, 189 cache: CacheBase | None = None, 190 destination: str | None = None, 191 table_prefix: str | None = None, 192 selected_streams: list[str] | None = None, 193 ) -> str: 194 """Deploy a source and cache to the workspace as a new connection. 195 196 Returns the newly deployed connection ID as a `str`. 197 198 Args: 199 source (Source | str): The source to deploy. You can pass either an already deployed 200 source ID `str` or a PyAirbyte `Source` object. If you pass a `Source` object, 201 it will be deployed automatically. 202 cache (CacheBase, optional): The cache to deploy as a new destination. You can provide 203 `cache` or `destination`, but not both. 204 destination (str, optional): The destination ID to use. You can provide 205 `cache` or `destination`, but not both. 206 """ 207 # Resolve source ID 208 source_id: str 209 if isinstance(source, Source): 210 selected_streams = selected_streams or source.get_selected_streams() 211 if source._deployed_source_id: # noqa: SLF001 212 source_id = source._deployed_source_id # noqa: SLF001 213 else: 214 source_id = self.deploy_source(source) 215 else: 216 source_id = source 217 if not selected_streams: 218 raise exc.PyAirbyteInputError( 219 guidance="You must provide `selected_streams` when deploying a source ID." 220 ) 221 222 # Resolve destination ID 223 destination_id: str 224 if destination: 225 destination_id = destination 226 elif cache: 227 table_prefix = table_prefix if table_prefix is not None else (cache.table_prefix or "") 228 if not cache._deployed_destination_id: # noqa: SLF001 229 destination_id = self.deploy_cache_as_destination(cache) 230 else: 231 destination_id = cache._deployed_destination_id # noqa: SLF001 232 else: 233 raise exc.PyAirbyteInputError( 234 guidance="You must provide either a destination ID or a cache object." 235 ) 236 237 assert source_id is not None 238 assert destination_id is not None 239 240 deployed_connection = create_connection( 241 name="Connection (Deployed by PyAirbyte)", 242 source_id=source_id, 243 destination_id=destination_id, 244 api_root=self.api_root, 245 api_key=self.api_key, 246 workspace_id=self.workspace_id, 247 selected_stream_names=selected_streams, 248 prefix=table_prefix or "", 249 ) 250 251 if isinstance(source, Source): 252 source._deployed_connection_id = deployed_connection.connection_id # noqa: SLF001 253 if cache: 254 cache._deployed_connection_id = deployed_connection.connection_id # noqa: SLF001 255 256 return deployed_connection.connection_id
Deploy a source and cache to the workspace as a new connection.
Returns the newly deployed connection ID as a str
.
Arguments:
- source (Source | str): The source to deploy. You can pass either an already deployed
source ID
str
or a PyAirbyteSource
object. If you pass aSource
object, it will be deployed automatically. - cache (CacheBase, optional): The cache to deploy as a new destination. You can provide
cache
ordestination
, but not both. - destination (str, optional): The destination ID to use. You can provide
cache
ordestination
, but not both.
258 def get_connection( 259 self, 260 connection_id: str, 261 ) -> CloudConnection: 262 """Get a connection by ID. 263 264 This method does not fetch data from the API. It returns a `CloudConnection` object, 265 which will be loaded lazily as needed. 266 """ 267 return CloudConnection( 268 workspace=self, 269 connection_id=connection_id, 270 )
Get a connection by ID.
This method does not fetch data from the API. It returns a CloudConnection
object,
which will be loaded lazily as needed.
272 def delete_connection( 273 self, 274 connection_id: str | None, 275 *, 276 delete_source: bool = False, 277 delete_destination: bool = False, 278 ) -> None: 279 """Delete a deployed connection from the workspace.""" 280 if connection_id is None: 281 raise ValueError("No connection ID provided.") # noqa: TRY003 282 283 connection: ConnectionResponse = get_connection( 284 connection_id=connection_id, 285 api_root=self.api_root, 286 api_key=self.api_key, 287 workspace_id=self.workspace_id, 288 ) 289 delete_connection( 290 connection_id=connection_id, 291 api_root=self.api_root, 292 api_key=self.api_key, 293 workspace_id=self.workspace_id, 294 ) 295 if delete_source: 296 self.delete_source(source=connection.source_id) 297 298 if delete_destination: 299 self.delete_destination(destination=connection.destination_id)
Delete a deployed connection from the workspace.
303 def run_sync( 304 self, 305 connection_id: str, 306 *, 307 wait: bool = True, 308 wait_timeout: int = 300, 309 ) -> SyncResult: 310 """Run a sync on a deployed connection.""" 311 connection = CloudConnection( 312 workspace=self, 313 connection_id=connection_id, 314 ) 315 return connection.run_sync(wait=wait, wait_timeout=wait_timeout)
Run a sync on a deployed connection.
319 def get_sync_result( 320 self, 321 connection_id: str, 322 job_id: str | None = None, 323 ) -> SyncResult | None: 324 """Get the sync result for a connection job. 325 326 If `job_id` is not provided, the most recent sync job will be used. 327 328 Returns `None` if job_id is omitted and no previous jobs are found. 329 """ 330 connection = CloudConnection( 331 workspace=self, 332 connection_id=connection_id, 333 ) 334 if job_id is None: 335 results = self.get_previous_sync_logs( 336 connection_id=connection_id, 337 limit=1, 338 ) 339 if results: 340 return results[0] 341 342 return None 343 connection = CloudConnection( 344 workspace=self, 345 connection_id=connection_id, 346 ) 347 return SyncResult( 348 workspace=self, 349 connection=connection, 350 job_id=job_id, 351 )
Get the sync result for a connection job.
If job_id
is not provided, the most recent sync job will be used.
Returns None
if job_id is omitted and no previous jobs are found.
353 def get_previous_sync_logs( 354 self, 355 connection_id: str, 356 *, 357 limit: int = 10, 358 ) -> list[SyncResult]: 359 """Get the previous sync logs for a connection.""" 360 connection = CloudConnection( 361 workspace=self, 362 connection_id=connection_id, 363 ) 364 return connection.get_previous_sync_logs( 365 limit=limit, 366 )
Get the previous sync logs for a connection.
20class CloudConnection: 21 """A connection is a link between a source and a destination. 22 23 Do not instantiate this class directly. Instead, use 24 `CloudWorkspace.deploy_connection` 25 or `.CloudWorkspace.get_connection` methods. 26 """ 27 28 def __init__( 29 self, 30 workspace: CloudWorkspace, 31 connection_id: str, 32 source: str | None = None, 33 destination: str | None = None, 34 ) -> None: 35 self.connection_id = connection_id 36 """The ID of the connection.""" 37 38 self.workspace = workspace 39 """The workspace that the connection belongs to.""" 40 41 self._source_id = source 42 """The ID of the source.""" 43 44 self._destination_id = destination 45 """The ID of the destination.""" 46 47 self._connection_info: ConnectionResponse | None = None 48 49 def _fetch_connection_info(self) -> ConnectionResponse: 50 """Populate the connection with data from the API.""" 51 return api_util.get_connection( 52 workspace_id=self.workspace.workspace_id, 53 connection_id=self.connection_id, 54 api_root=self.workspace.api_root, 55 api_key=self.workspace.api_key, 56 ) 57 58 # Properties 59 60 @property 61 def source_id(self) -> str: 62 """The ID of the source.""" 63 if not self._source_id: 64 if not self._connection_info: 65 self._connection_info = self._fetch_connection_info() 66 67 self._source_id = self._connection_info.source_id 68 69 return cast(str, self._source_id) 70 71 @property 72 def destination_id(self) -> str: 73 """The ID of the destination.""" 74 if not self._destination_id: 75 if not self._connection_info: 76 self._connection_info = self._fetch_connection_info() 77 78 self._destination_id = self._connection_info.source_id 79 80 return cast(str, self._destination_id) 81 82 @property 83 def stream_names(self) -> list[str]: 84 """The stream names.""" 85 if not self._connection_info: 86 self._connection_info = self._fetch_connection_info() 87 88 return [stream.name for stream in self._connection_info.configurations.streams] 89 90 @property 91 def table_prefix(self) -> str: 92 """The table prefix.""" 93 if not self._connection_info: 94 self._connection_info = self._fetch_connection_info() 95 96 return self._connection_info.configurations.prefix 97 98 @property 99 def connection_url(self) -> str | None: 100 return f"{self.workspace.workspace_url}/connections/{self.connection_id}" 101 102 @property 103 def job_history_url(self) -> str | None: 104 return f"{self.connection_url}/job-history" 105 106 # Run Sync 107 108 def run_sync( 109 self, 110 *, 111 wait: bool = True, 112 wait_timeout: int = 300, 113 ) -> SyncResult: 114 """Run a sync.""" 115 connection_response = api_util.run_connection( 116 connection_id=self.connection_id, 117 api_root=self.workspace.api_root, 118 api_key=self.workspace.api_key, 119 workspace_id=self.workspace.workspace_id, 120 ) 121 sync_result = SyncResult( 122 workspace=self.workspace, 123 connection=self, 124 job_id=connection_response.job_id, 125 ) 126 127 if wait: 128 sync_result.wait_for_completion( 129 wait_timeout=wait_timeout, 130 raise_failure=True, 131 raise_timeout=True, 132 ) 133 134 return sync_result 135 136 # Logs 137 138 def get_previous_sync_logs( 139 self, 140 *, 141 limit: int = 10, 142 ) -> list[SyncResult]: 143 """Get the previous sync logs for a connection.""" 144 sync_logs: list[JobResponse] = api_util.get_job_logs( 145 connection_id=self.connection_id, 146 api_root=self.workspace.api_root, 147 api_key=self.workspace.api_key, 148 workspace_id=self.workspace.workspace_id, 149 limit=limit, 150 ) 151 return [ 152 SyncResult( 153 workspace=self.workspace, 154 connection=self, 155 job_id=sync_log.job_id, 156 _latest_status=sync_log.status, 157 ) 158 for sync_log in sync_logs 159 ] 160 161 def get_sync_result( 162 self, 163 job_id: str | None = None, 164 ) -> SyncResult | None: 165 """Get the sync result for the connection. 166 167 If `job_id` is not provided, the most recent sync job will be used. 168 169 Returns `None` if job_id is omitted and no previous jobs are found. 170 """ 171 if job_id is None: 172 # Get the most recent sync job 173 results = self.get_previous_sync_logs( 174 limit=1, 175 ) 176 if results: 177 return results[0] 178 179 return None 180 181 # Get the sync job by ID (lazy loaded) 182 return SyncResult( 183 workspace=self.workspace, 184 connection=self, 185 job_id=job_id, 186 ) 187 188 # Deletions 189 190 def delete( 191 self, 192 *, 193 delete_source: bool = False, 194 delete_destination: bool = False, 195 ) -> None: 196 """Delete the connection. 197 198 Args: 199 delete_source: Whether to also delete the source. 200 delete_destination: Whether to also delete the destination. 201 """ 202 self.workspace.delete_connection(connection_id=self.connection_id) 203 204 if delete_source: 205 self.workspace.delete_source(source=self.source_id) 206 207 if delete_destination: 208 self.workspace.delete_destination(destination=self.destination_id)
A connection is a link between a source and a destination.
Do not instantiate this class directly. Instead, use
CloudWorkspace.deploy_connection
or airbyte.cloud.CloudWorkspace.get_connection
methods.
28 def __init__( 29 self, 30 workspace: CloudWorkspace, 31 connection_id: str, 32 source: str | None = None, 33 destination: str | None = None, 34 ) -> None: 35 self.connection_id = connection_id 36 """The ID of the connection.""" 37 38 self.workspace = workspace 39 """The workspace that the connection belongs to.""" 40 41 self._source_id = source 42 """The ID of the source.""" 43 44 self._destination_id = destination 45 """The ID of the destination.""" 46 47 self._connection_info: ConnectionResponse | None = None
60 @property 61 def source_id(self) -> str: 62 """The ID of the source.""" 63 if not self._source_id: 64 if not self._connection_info: 65 self._connection_info = self._fetch_connection_info() 66 67 self._source_id = self._connection_info.source_id 68 69 return cast(str, self._source_id)
The ID of the source.
71 @property 72 def destination_id(self) -> str: 73 """The ID of the destination.""" 74 if not self._destination_id: 75 if not self._connection_info: 76 self._connection_info = self._fetch_connection_info() 77 78 self._destination_id = self._connection_info.source_id 79 80 return cast(str, self._destination_id)
The ID of the destination.
82 @property 83 def stream_names(self) -> list[str]: 84 """The stream names.""" 85 if not self._connection_info: 86 self._connection_info = self._fetch_connection_info() 87 88 return [stream.name for stream in self._connection_info.configurations.streams]
The stream names.
90 @property 91 def table_prefix(self) -> str: 92 """The table prefix.""" 93 if not self._connection_info: 94 self._connection_info = self._fetch_connection_info() 95 96 return self._connection_info.configurations.prefix
The table prefix.
108 def run_sync( 109 self, 110 *, 111 wait: bool = True, 112 wait_timeout: int = 300, 113 ) -> SyncResult: 114 """Run a sync.""" 115 connection_response = api_util.run_connection( 116 connection_id=self.connection_id, 117 api_root=self.workspace.api_root, 118 api_key=self.workspace.api_key, 119 workspace_id=self.workspace.workspace_id, 120 ) 121 sync_result = SyncResult( 122 workspace=self.workspace, 123 connection=self, 124 job_id=connection_response.job_id, 125 ) 126 127 if wait: 128 sync_result.wait_for_completion( 129 wait_timeout=wait_timeout, 130 raise_failure=True, 131 raise_timeout=True, 132 ) 133 134 return sync_result
Run a sync.
138 def get_previous_sync_logs( 139 self, 140 *, 141 limit: int = 10, 142 ) -> list[SyncResult]: 143 """Get the previous sync logs for a connection.""" 144 sync_logs: list[JobResponse] = api_util.get_job_logs( 145 connection_id=self.connection_id, 146 api_root=self.workspace.api_root, 147 api_key=self.workspace.api_key, 148 workspace_id=self.workspace.workspace_id, 149 limit=limit, 150 ) 151 return [ 152 SyncResult( 153 workspace=self.workspace, 154 connection=self, 155 job_id=sync_log.job_id, 156 _latest_status=sync_log.status, 157 ) 158 for sync_log in sync_logs 159 ]
Get the previous sync logs for a connection.
161 def get_sync_result( 162 self, 163 job_id: str | None = None, 164 ) -> SyncResult | None: 165 """Get the sync result for the connection. 166 167 If `job_id` is not provided, the most recent sync job will be used. 168 169 Returns `None` if job_id is omitted and no previous jobs are found. 170 """ 171 if job_id is None: 172 # Get the most recent sync job 173 results = self.get_previous_sync_logs( 174 limit=1, 175 ) 176 if results: 177 return results[0] 178 179 return None 180 181 # Get the sync job by ID (lazy loaded) 182 return SyncResult( 183 workspace=self.workspace, 184 connection=self, 185 job_id=job_id, 186 )
Get the sync result for the connection.
If job_id
is not provided, the most recent sync job will be used.
Returns None
if job_id is omitted and no previous jobs are found.
190 def delete( 191 self, 192 *, 193 delete_source: bool = False, 194 delete_destination: bool = False, 195 ) -> None: 196 """Delete the connection. 197 198 Args: 199 delete_source: Whether to also delete the source. 200 delete_destination: Whether to also delete the destination. 201 """ 202 self.workspace.delete_connection(connection_id=self.connection_id) 203 204 if delete_source: 205 self.workspace.delete_source(source=self.source_id) 206 207 if delete_destination: 208 self.workspace.delete_destination(destination=self.destination_id)
Delete the connection.
Arguments:
- delete_source: Whether to also delete the source.
- delete_destination: Whether to also delete the destination.
42@dataclass 43class SyncResult: 44 """The result of a sync operation.""" 45 46 workspace: CloudWorkspace 47 connection: CloudConnection 48 job_id: str 49 table_name_prefix: str = "" 50 table_name_suffix: str = "" 51 _latest_status: JobStatusEnum | None = None 52 _connection_response: ConnectionResponse | None = None 53 _cache: CacheBase | None = None 54 55 @property 56 def job_url(self) -> str: 57 """Return the URL of the sync job.""" 58 return f"{self.connection.job_history_url}/{self.job_id}" 59 60 def _get_connection_info(self, *, force_refresh: bool = False) -> ConnectionResponse: 61 """Return connection info for the sync job.""" 62 if self._connection_response and not force_refresh: 63 return self._connection_response 64 65 self._connection_response = api_util.get_connection( 66 workspace_id=self.workspace.workspace_id, 67 api_root=self.workspace.api_root, 68 api_key=self.workspace.api_key, 69 connection_id=self.connection.connection_id, 70 ) 71 return self._connection_response 72 73 def _get_destination_configuration(self, *, force_refresh: bool = False) -> dict[str, Any]: 74 """Return the destination configuration for the sync job.""" 75 connection_info: ConnectionResponse = self._get_connection_info(force_refresh=force_refresh) 76 destination_response = api_util.get_destination( 77 destination_id=connection_info.destination_id, 78 api_root=self.workspace.api_root, 79 api_key=self.workspace.api_key, 80 ) 81 return destination_response.configuration 82 83 def is_job_complete(self) -> bool: 84 """Check if the sync job is complete.""" 85 return self.get_job_status() in FINAL_STATUSES 86 87 def get_job_status(self) -> JobStatusEnum: 88 """Check if the sync job is still running.""" 89 if self._latest_status and self._latest_status in FINAL_STATUSES: 90 return self._latest_status 91 92 job_info = api_util.get_job_info( 93 job_id=self.job_id, 94 api_root=self.workspace.api_root, 95 api_key=self.workspace.api_key, 96 ) 97 self._latest_status = job_info.status 98 99 return job_info.status 100 101 def raise_failure_status( 102 self, 103 *, 104 refresh_status: bool = False, 105 ) -> None: 106 """Raise an exception if the sync job failed. 107 108 By default, this method will use the latest status available. If you want to refresh the 109 status before checking for failure, set `refresh_status=True`. If the job has failed, this 110 method will raise a `AirbyteConnectionSyncError`. 111 112 Otherwise, do nothing. 113 """ 114 latest_status = self._latest_status 115 if refresh_status: 116 latest_status = self.get_job_status() 117 118 if latest_status in FAILED_STATUSES: 119 raise AirbyteConnectionSyncError( 120 workspace=self.workspace, 121 connection_id=self.connection.connection_id, 122 job_id=self.job_id, 123 job_status=self._latest_status, 124 ) 125 126 def wait_for_completion( 127 self, 128 *, 129 wait_timeout: int = DEFAULT_SYNC_TIMEOUT_SECONDS, 130 raise_timeout: bool = True, 131 raise_failure: bool = False, 132 ) -> JobStatusEnum: 133 """Wait for a job to finish running.""" 134 start_time = time.time() 135 while True: 136 latest_status = self.get_job_status() 137 if latest_status in FINAL_STATUSES: 138 if raise_failure: 139 # No-op if the job succeeded or is still running: 140 self.raise_failure_status() 141 142 return latest_status 143 144 if time.time() - start_time > wait_timeout: 145 if raise_timeout: 146 raise AirbyteConnectionSyncTimeoutError( 147 workspace=self.workspace, 148 connection_id=self.connection.connection_id, 149 job_id=self.job_id, 150 job_status=latest_status, 151 timeout=wait_timeout, 152 ) 153 154 return latest_status # This will be a non-final status 155 156 time.sleep(api_util.JOB_WAIT_INTERVAL_SECS) 157 158 def get_sql_cache(self) -> CacheBase: 159 """Return a SQL Cache object for working with the data in a SQL-based destination's.""" 160 if self._cache: 161 return self._cache 162 163 destination_configuration: dict[str, Any] = self._get_destination_configuration() 164 self._cache = create_cache_from_destination_config( 165 destination_configuration=destination_configuration 166 ) 167 return self._cache 168 169 def get_sql_engine(self) -> sqlalchemy.engine.Engine: 170 """Return a SQL Engine for querying a SQL-based destination.""" 171 self.get_sql_cache().get_sql_engine() 172 173 def get_sql_table_name(self, stream_name: str) -> str: 174 """Return the SQL table name of the named stream.""" 175 return self.get_sql_cache().processor.get_sql_table_name(stream_name=stream_name) 176 177 def get_sql_table( 178 self, 179 stream_name: str, 180 ) -> sqlalchemy.Table: 181 """Return a SQLAlchemy table object for the named stream.""" 182 self.get_sql_cache().processor.get_sql_table(stream_name) 183 184 def get_dataset(self, stream_name: str) -> CachedDataset: 185 """Return cached dataset.""" 186 return CachedDataset(self.get_sql_cache(), stream_name=stream_name) 187 188 def get_sql_database_name(self) -> str: 189 """Return the SQL database name.""" 190 cache = self.get_sql_cache() 191 return cache.get_database_name() 192 193 def get_sql_schema_name(self) -> str: 194 """Return the SQL schema name.""" 195 cache = self.get_sql_cache() 196 return cache.schema_name 197 198 @property 199 def stream_names(self) -> set[str]: 200 """Return the set of stream names.""" 201 return self.get_sql_cache().processor.expected_streams 202 203 @final 204 @property 205 def streams( 206 self, 207 ) -> SyncResultStreams: 208 """Return a temporary table name.""" 209 return self.SyncResultStreams(self) 210 211 class SyncResultStreams(Mapping[str, CachedDataset]): 212 """A mapping of stream names to cached datasets.""" 213 214 def __init__( 215 self, 216 parent: SyncResult, 217 /, 218 ) -> None: 219 self.parent: SyncResult = parent 220 221 def __getitem__(self, key: str) -> CachedDataset: 222 return self.parent.get_dataset(stream_name=key) 223 224 def __iter__(self) -> Iterator[str]: 225 """TODO""" 226 return iter(self.parent.stream_names) 227 228 def __len__(self) -> int: 229 return len(self.parent.stream_names)
The result of a sync operation.
55 @property 56 def job_url(self) -> str: 57 """Return the URL of the sync job.""" 58 return f"{self.connection.job_history_url}/{self.job_id}"
Return the URL of the sync job.
83 def is_job_complete(self) -> bool: 84 """Check if the sync job is complete.""" 85 return self.get_job_status() in FINAL_STATUSES
Check if the sync job is complete.
87 def get_job_status(self) -> JobStatusEnum: 88 """Check if the sync job is still running.""" 89 if self._latest_status and self._latest_status in FINAL_STATUSES: 90 return self._latest_status 91 92 job_info = api_util.get_job_info( 93 job_id=self.job_id, 94 api_root=self.workspace.api_root, 95 api_key=self.workspace.api_key, 96 ) 97 self._latest_status = job_info.status 98 99 return job_info.status
Check if the sync job is still running.
101 def raise_failure_status( 102 self, 103 *, 104 refresh_status: bool = False, 105 ) -> None: 106 """Raise an exception if the sync job failed. 107 108 By default, this method will use the latest status available. If you want to refresh the 109 status before checking for failure, set `refresh_status=True`. If the job has failed, this 110 method will raise a `AirbyteConnectionSyncError`. 111 112 Otherwise, do nothing. 113 """ 114 latest_status = self._latest_status 115 if refresh_status: 116 latest_status = self.get_job_status() 117 118 if latest_status in FAILED_STATUSES: 119 raise AirbyteConnectionSyncError( 120 workspace=self.workspace, 121 connection_id=self.connection.connection_id, 122 job_id=self.job_id, 123 job_status=self._latest_status, 124 )
Raise an exception if the sync job failed.
By default, this method will use the latest status available. If you want to refresh the
status before checking for failure, set refresh_status=True
. If the job has failed, this
method will raise a AirbyteConnectionSyncError
.
Otherwise, do nothing.
126 def wait_for_completion( 127 self, 128 *, 129 wait_timeout: int = DEFAULT_SYNC_TIMEOUT_SECONDS, 130 raise_timeout: bool = True, 131 raise_failure: bool = False, 132 ) -> JobStatusEnum: 133 """Wait for a job to finish running.""" 134 start_time = time.time() 135 while True: 136 latest_status = self.get_job_status() 137 if latest_status in FINAL_STATUSES: 138 if raise_failure: 139 # No-op if the job succeeded or is still running: 140 self.raise_failure_status() 141 142 return latest_status 143 144 if time.time() - start_time > wait_timeout: 145 if raise_timeout: 146 raise AirbyteConnectionSyncTimeoutError( 147 workspace=self.workspace, 148 connection_id=self.connection.connection_id, 149 job_id=self.job_id, 150 job_status=latest_status, 151 timeout=wait_timeout, 152 ) 153 154 return latest_status # This will be a non-final status 155 156 time.sleep(api_util.JOB_WAIT_INTERVAL_SECS)
Wait for a job to finish running.
158 def get_sql_cache(self) -> CacheBase: 159 """Return a SQL Cache object for working with the data in a SQL-based destination's.""" 160 if self._cache: 161 return self._cache 162 163 destination_configuration: dict[str, Any] = self._get_destination_configuration() 164 self._cache = create_cache_from_destination_config( 165 destination_configuration=destination_configuration 166 ) 167 return self._cache
Return a SQL Cache object for working with the data in a SQL-based destination's.
169 def get_sql_engine(self) -> sqlalchemy.engine.Engine: 170 """Return a SQL Engine for querying a SQL-based destination.""" 171 self.get_sql_cache().get_sql_engine()
Return a SQL Engine for querying a SQL-based destination.
173 def get_sql_table_name(self, stream_name: str) -> str: 174 """Return the SQL table name of the named stream.""" 175 return self.get_sql_cache().processor.get_sql_table_name(stream_name=stream_name)
Return the SQL table name of the named stream.
177 def get_sql_table( 178 self, 179 stream_name: str, 180 ) -> sqlalchemy.Table: 181 """Return a SQLAlchemy table object for the named stream.""" 182 self.get_sql_cache().processor.get_sql_table(stream_name)
Return a SQLAlchemy table object for the named stream.
184 def get_dataset(self, stream_name: str) -> CachedDataset: 185 """Return cached dataset.""" 186 return CachedDataset(self.get_sql_cache(), stream_name=stream_name)
Return cached dataset.
188 def get_sql_database_name(self) -> str: 189 """Return the SQL database name.""" 190 cache = self.get_sql_cache() 191 return cache.get_database_name()
Return the SQL database name.
193 def get_sql_schema_name(self) -> str: 194 """Return the SQL schema name.""" 195 cache = self.get_sql_cache() 196 return cache.schema_name
Return the SQL schema name.
211 class SyncResultStreams(Mapping[str, CachedDataset]): 212 """A mapping of stream names to cached datasets.""" 213 214 def __init__( 215 self, 216 parent: SyncResult, 217 /, 218 ) -> None: 219 self.parent: SyncResult = parent 220 221 def __getitem__(self, key: str) -> CachedDataset: 222 return self.parent.get_dataset(stream_name=key) 223 224 def __iter__(self) -> Iterator[str]: 225 """TODO""" 226 return iter(self.parent.stream_names) 227 228 def __len__(self) -> int: 229 return len(self.parent.stream_names)
A mapping of stream names to cached datasets.
Inherited Members
- collections.abc.Mapping
- get
- keys
- items
- values