airbyte.sources.registry

  1# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
  2from __future__ import annotations
  3
  4import json
  5import os
  6from copy import copy
  7from dataclasses import dataclass
  8from pathlib import Path
  9
 10import requests
 11
 12from airbyte import exceptions as exc
 13from airbyte.version import get_version
 14
 15
 16__cache: dict[str, ConnectorMetadata] | None = None
 17
 18
 19_REGISTRY_ENV_VAR = "AIRBYTE_LOCAL_REGISTRY"
 20_REGISTRY_URL = "https://connectors.airbyte.com/files/registries/v0/oss_registry.json"
 21
 22
 23@dataclass
 24class ConnectorMetadata:
 25    """Metadata for a connector."""
 26
 27    name: str
 28    """Connector name. For example, "source-google-sheets"."""
 29
 30    latest_available_version: str
 31    """The latest available version of the connector."""
 32
 33    pypi_package_name: str | None
 34    """The name of the PyPI package for the connector, if it exists."""
 35
 36
 37def _get_registry_url() -> str:
 38    if _REGISTRY_ENV_VAR in os.environ:
 39        return str(os.environ.get(_REGISTRY_ENV_VAR))
 40
 41    return _REGISTRY_URL
 42
 43
 44def _registry_entry_to_connector_metadata(entry: dict) -> ConnectorMetadata:
 45    name = entry["dockerRepository"].replace("airbyte/", "")
 46    remote_registries: dict = entry.get("remoteRegistries", {})
 47    pypi_registry: dict = remote_registries.get("pypi", {})
 48    pypi_package_name: str = pypi_registry.get("packageName", None)
 49    pypi_enabled: bool = pypi_registry.get("enabled", False)
 50    return ConnectorMetadata(
 51        name=name,
 52        latest_available_version=entry["dockerImageTag"],
 53        pypi_package_name=pypi_package_name if pypi_enabled else None,
 54    )
 55
 56
 57def _get_registry_cache(*, force_refresh: bool = False) -> dict[str, ConnectorMetadata]:
 58    """Return the registry cache."""
 59    global __cache
 60    if __cache and not force_refresh:
 61        return __cache
 62
 63    registry_url = _get_registry_url()
 64    if registry_url.startswith("http"):
 65        response = requests.get(
 66            registry_url, headers={"User-Agent": f"airbyte-lib-{get_version()}"}
 67        )
 68        response.raise_for_status()
 69        data = response.json()
 70    else:
 71        # Assume local file
 72        with Path(registry_url).open() as f:
 73            data = json.load(f)
 74
 75    new_cache: dict[str, ConnectorMetadata] = {}
 76
 77    for connector in data["sources"]:
 78        connector_metadata = _registry_entry_to_connector_metadata(connector)
 79        new_cache[connector_metadata.name] = connector_metadata
 80
 81    if len(new_cache) == 0:
 82        raise exc.PyAirbyteInternalError(
 83            message="Connector registry is empty.",
 84            context={
 85                "registry_url": _get_registry_url(),
 86            },
 87        )
 88
 89    __cache = new_cache
 90    return __cache
 91
 92
 93def get_connector_metadata(name: str) -> ConnectorMetadata:
 94    """Check the cache for the connector.
 95
 96    If the cache is empty, populate by calling update_cache.
 97    """
 98    cache = copy(_get_registry_cache())
 99    if not cache:
100        raise exc.PyAirbyteInternalError(
101            message="Connector registry could not be loaded.",
102            context={
103                "registry_url": _get_registry_url(),
104            },
105        )
106    if name not in cache:
107        raise exc.AirbyteConnectorNotRegisteredError(
108            connector_name=name,
109            context={
110                "registry_url": _get_registry_url(),
111                "available_connectors": get_available_connectors(),
112            },
113        )
114    return cache[name]
115
116
117def get_available_connectors() -> list[str]:
118    """Return a list of all available connectors.
119
120    Connectors will be returned in alphabetical order, with the standard prefix "source-".
121    """
122    return sorted(
123        conn.name for conn in _get_registry_cache().values() if conn.pypi_package_name is not None
124    )
@dataclass
class ConnectorMetadata:
24@dataclass
25class ConnectorMetadata:
26    """Metadata for a connector."""
27
28    name: str
29    """Connector name. For example, "source-google-sheets"."""
30
31    latest_available_version: str
32    """The latest available version of the connector."""
33
34    pypi_package_name: str | None
35    """The name of the PyPI package for the connector, if it exists."""

Metadata for a connector.

ConnectorMetadata( name: str, latest_available_version: str, pypi_package_name: str | None)
name: str

Connector name. For example, "source-google-sheets".

latest_available_version: str

The latest available version of the connector.

pypi_package_name: str | None

The name of the PyPI package for the connector, if it exists.

def get_connector_metadata(name: str) -> ConnectorMetadata:
 94def get_connector_metadata(name: str) -> ConnectorMetadata:
 95    """Check the cache for the connector.
 96
 97    If the cache is empty, populate by calling update_cache.
 98    """
 99    cache = copy(_get_registry_cache())
100    if not cache:
101        raise exc.PyAirbyteInternalError(
102            message="Connector registry could not be loaded.",
103            context={
104                "registry_url": _get_registry_url(),
105            },
106        )
107    if name not in cache:
108        raise exc.AirbyteConnectorNotRegisteredError(
109            connector_name=name,
110            context={
111                "registry_url": _get_registry_url(),
112                "available_connectors": get_available_connectors(),
113            },
114        )
115    return cache[name]

Check the cache for the connector.

If the cache is empty, populate by calling update_cache.

def get_available_connectors() -> list[str]:
118def get_available_connectors() -> list[str]:
119    """Return a list of all available connectors.
120
121    Connectors will be returned in alphabetical order, with the standard prefix "source-".
122    """
123    return sorted(
124        conn.name for conn in _get_registry_cache().values() if conn.pypi_package_name is not None
125    )

Return a list of all available connectors.

Connectors will be returned in alphabetical order, with the standard prefix "source-".