airbyte.validate

Defines the airbyte-lib-validate-source CLI.

This tool checks if connectors are compatible with PyAirbyte.

  1# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
  2"""Defines the `airbyte-lib-validate-source` CLI.
  3
  4This tool checks if connectors are compatible with PyAirbyte.
  5"""
  6from __future__ import annotations
  7
  8import argparse
  9import json
 10import os
 11import subprocess
 12import sys
 13import tempfile
 14from pathlib import Path
 15
 16import yaml
 17from rich import print
 18
 19import airbyte as ab
 20from airbyte import exceptions as exc
 21from airbyte._executor import _get_bin_dir
 22
 23
 24def _parse_args() -> argparse.Namespace:
 25    parser = argparse.ArgumentParser(description="Validate a connector")
 26    parser.add_argument(
 27        "--connector-dir",
 28        type=str,
 29        required=True,
 30        help="Path to the connector directory",
 31    )
 32    parser.add_argument(
 33        "--validate-install-only",
 34        action="store_true",
 35        help="Only validate that the connector can be installed and config can be validated.",
 36    )
 37    parser.add_argument(
 38        "--sample-config",
 39        type=str,
 40        required=False,
 41        help="Path to the sample config.json file. Required without --validate-install-only.",
 42    )
 43    return parser.parse_args()
 44
 45
 46def _run_subprocess_and_raise_on_failure(args: list[str]) -> None:
 47    result = subprocess.run(
 48        args,
 49        check=False,
 50        stderr=subprocess.PIPE,
 51    )
 52    if result.returncode != 0:
 53        raise exc.AirbyteSubprocessFailedError(
 54            run_args=args,
 55            exit_code=result.returncode,
 56            log_text=result.stderr.decode("utf-8"),
 57        )
 58
 59
 60def full_tests(connector_name: str, sample_config: str) -> None:
 61    print("Creating source and validating spec and version...")
 62    source = ab.get_source(
 63        # TODO: FIXME: noqa: SIM115, PTH123
 64        connector_name,
 65        config=json.load(open(sample_config)),  # noqa: SIM115, PTH123,
 66        install_if_missing=False,
 67    )
 68
 69    print("Running check...")
 70    source.check()
 71
 72    print("Fetching streams...")
 73    streams = source.get_available_streams()
 74
 75    # try to peek all streams - if one works, stop, if none works, throw exception
 76    for stream in streams:
 77        try:
 78            print(f"Trying to read from stream {stream}...")
 79            record = next(source.get_records(stream))
 80            assert record, "No record returned"
 81            break
 82        except exc.AirbyteError as e:
 83            print(f"Could not read from stream {stream}: {e}")
 84        except Exception as e:
 85            print(f"Unhandled error occurred when trying to read from {stream}: {e}")
 86    else:
 87        raise exc.AirbyteNoDataFromConnectorError(
 88            context={"selected_streams": streams},
 89        )
 90
 91
 92def install_only_test(connector_name: str) -> None:
 93    print("Creating source and validating spec is returned successfully...")
 94    source = ab.get_source(connector_name)
 95    source._get_spec(force_refresh=True)  # noqa: SLF001  # Member is private until we have a public API for it.
 96
 97
 98def run() -> None:
 99    """Handle CLI entrypoint for the `airbyte-lib-validate-source` command.
100
101    It's called like this:
102    > airbyte-lib-validate-source —connector-dir . -—sample-config secrets/config.json
103
104    It performs a basic smoke test to make sure the connector in question is PyAirbyte compliant:
105    * Can be installed into a venv
106    * Can be called via cli entrypoint
107    * Answers according to the Airbyte protocol when called with spec, check, discover and read.
108    """
109    # parse args
110    args = _parse_args()
111    connector_dir = args.connector_dir
112    sample_config = args.sample_config
113    validate_install_only = args.validate_install_only
114    validate(connector_dir, sample_config, validate_install_only=validate_install_only)
115
116
117def validate(connector_dir: str, sample_config: str, *, validate_install_only: bool) -> None:
118    # read metadata.yaml
119    metadata_path = Path(connector_dir) / "metadata.yaml"
120    with Path(metadata_path).open() as stream:
121        metadata = yaml.safe_load(stream)["data"]
122
123    # TODO: Use remoteRegistries.pypi.packageName once set for connectors
124    connector_name = metadata["dockerRepository"].replace("airbyte/", "")
125
126    # create a venv and install the connector
127    venv_name = f".venv-{connector_name}"
128    venv_path = Path(venv_name)
129    if not venv_path.exists():
130        _run_subprocess_and_raise_on_failure([sys.executable, "-m", "venv", venv_name])
131
132    pip_path = str(_get_bin_dir(Path(venv_path)) / "pip")
133
134    _run_subprocess_and_raise_on_failure([pip_path, "install", connector_dir])
135
136    # write basic registry to temp json file
137    registry = {
138        "sources": [
139            {
140                "dockerRepository": f"airbyte/{connector_name}",
141                "dockerImageTag": "0.0.1",
142                "remoteRegistries": {
143                    "pypi": {"packageName": "airbyte-{connector_name}", "enabled": True}
144                },
145            },
146        ],
147    }
148
149    with tempfile.NamedTemporaryFile(mode="w+t", delete=True) as temp_file:
150        temp_file.write(json.dumps(registry))
151        temp_file.seek(0)
152        os.environ["AIRBYTE_LOCAL_REGISTRY"] = str(temp_file.name)
153        if validate_install_only:
154            install_only_test(connector_name)
155        else:
156            if not sample_config:
157                raise exc.PyAirbyteInputError(
158                    input_value="--sample-config is required without --validate-install-only set"
159                )
160            full_tests(connector_name, sample_config)
def full_tests(connector_name: str, sample_config: str) -> None:
61def full_tests(connector_name: str, sample_config: str) -> None:
62    print("Creating source and validating spec and version...")
63    source = ab.get_source(
64        # TODO: FIXME: noqa: SIM115, PTH123
65        connector_name,
66        config=json.load(open(sample_config)),  # noqa: SIM115, PTH123,
67        install_if_missing=False,
68    )
69
70    print("Running check...")
71    source.check()
72
73    print("Fetching streams...")
74    streams = source.get_available_streams()
75
76    # try to peek all streams - if one works, stop, if none works, throw exception
77    for stream in streams:
78        try:
79            print(f"Trying to read from stream {stream}...")
80            record = next(source.get_records(stream))
81            assert record, "No record returned"
82            break
83        except exc.AirbyteError as e:
84            print(f"Could not read from stream {stream}: {e}")
85        except Exception as e:
86            print(f"Unhandled error occurred when trying to read from {stream}: {e}")
87    else:
88        raise exc.AirbyteNoDataFromConnectorError(
89            context={"selected_streams": streams},
90        )
def install_only_test(connector_name: str) -> None:
93def install_only_test(connector_name: str) -> None:
94    print("Creating source and validating spec is returned successfully...")
95    source = ab.get_source(connector_name)
96    source._get_spec(force_refresh=True)  # noqa: SLF001  # Member is private until we have a public API for it.
def run() -> None:
 99def run() -> None:
100    """Handle CLI entrypoint for the `airbyte-lib-validate-source` command.
101
102    It's called like this:
103    > airbyte-lib-validate-source —connector-dir . -—sample-config secrets/config.json
104
105    It performs a basic smoke test to make sure the connector in question is PyAirbyte compliant:
106    * Can be installed into a venv
107    * Can be called via cli entrypoint
108    * Answers according to the Airbyte protocol when called with spec, check, discover and read.
109    """
110    # parse args
111    args = _parse_args()
112    connector_dir = args.connector_dir
113    sample_config = args.sample_config
114    validate_install_only = args.validate_install_only
115    validate(connector_dir, sample_config, validate_install_only=validate_install_only)

Handle CLI entrypoint for the airbyte-lib-validate-source command.

It's called like this:

airbyte-lib-validate-source —connector-dir . -—sample-config secrets/config.json

It performs a basic smoke test to make sure the connector in question is PyAirbyte compliant:

  • Can be installed into a venv
  • Can be called via cli entrypoint
  • Answers according to the Airbyte protocol when called with spec, check, discover and read.
def validate( connector_dir: str, sample_config: str, *, validate_install_only: bool) -> None:
118def validate(connector_dir: str, sample_config: str, *, validate_install_only: bool) -> None:
119    # read metadata.yaml
120    metadata_path = Path(connector_dir) / "metadata.yaml"
121    with Path(metadata_path).open() as stream:
122        metadata = yaml.safe_load(stream)["data"]
123
124    # TODO: Use remoteRegistries.pypi.packageName once set for connectors
125    connector_name = metadata["dockerRepository"].replace("airbyte/", "")
126
127    # create a venv and install the connector
128    venv_name = f".venv-{connector_name}"
129    venv_path = Path(venv_name)
130    if not venv_path.exists():
131        _run_subprocess_and_raise_on_failure([sys.executable, "-m", "venv", venv_name])
132
133    pip_path = str(_get_bin_dir(Path(venv_path)) / "pip")
134
135    _run_subprocess_and_raise_on_failure([pip_path, "install", connector_dir])
136
137    # write basic registry to temp json file
138    registry = {
139        "sources": [
140            {
141                "dockerRepository": f"airbyte/{connector_name}",
142                "dockerImageTag": "0.0.1",
143                "remoteRegistries": {
144                    "pypi": {"packageName": "airbyte-{connector_name}", "enabled": True}
145                },
146            },
147        ],
148    }
149
150    with tempfile.NamedTemporaryFile(mode="w+t", delete=True) as temp_file:
151        temp_file.write(json.dumps(registry))
152        temp_file.seek(0)
153        os.environ["AIRBYTE_LOCAL_REGISTRY"] = str(temp_file.name)
154        if validate_install_only:
155            install_only_test(connector_name)
156        else:
157            if not sample_config:
158                raise exc.PyAirbyteInputError(
159                    input_value="--sample-config is required without --validate-install-only set"
160                )
161            full_tests(connector_name, sample_config)