From e4d3d60ca8b683c301dfd5408917e1bb5e261cc0 Mon Sep 17 00:00:00 2001 From: Alexander Marquardt Date: Wed, 22 Jun 2022 13:44:27 +0200 Subject: [PATCH] :tada: New Source: Webflow (#13617) * Added webflow code * Updated readme * Updated README * Added webflow to source_definitions.yaml * Enhanced documentation for the Webflow source connector * Improved webflow source connector instructions * Moved Site ID to before API token in Spec.yaml (for presentation in the UI) * Addressed comments in PR. * Changes to address requests in PR review * Removed version from config * Minor udpate to spec.yaml for clarity * Updated to pass the accept-version as a constant rather than parameter * Updated check_connection to hit the collections API that requires both site id and the authentication token. * Fixed the test_check_connection to use the new check_connection function * Added a streams test for generate_streams * Re-named "autentication" object to "auth" to be more consistent with the way it is created by the CDK * Added in an explict line to instantiante an "auth" object from WebflowTokenAuthenticator, to make it easier to describe in the blog * Fixed a typo in a comment * Renamed some classes to be more intuitive * Renamed class to be more intuitive * Minor change to an internal method name * Made _get_collection_name_to_id_dict staticmethod * Fixed a unit-test error that only appeared when running " python -m pytest -s unit_tests". This was caused by Mocked settings from test_source.py leaking into test_streams.py * format: add double quotes and remove unused import * readme: remove semantic version naming of connector in build commands * Updated spec.yaml * auto-bump connector version * format files * add changelog * update dockerfile * auto-bump connector version Co-authored-by: sajarin Co-authored-by: Octavia Squidington III Co-authored-by: marcosmarxm --- .../resources/seed/source_definitions.yaml | 7 + .../src/main/resources/seed/source_specs.yaml | 29 ++ .../connectors/source-webflow/.dockerignore | 6 + .../connectors/source-webflow/.gitignore | 1 + .../connectors/source-webflow/Dockerfile | 38 ++ .../connectors/source-webflow/README.md | 141 ++++++++ .../source-webflow/acceptance-test-config.yml | 19 + .../source-webflow/acceptance-test-docker.sh | 16 + .../connectors/source-webflow/build.gradle | 9 + .../integration_tests/__init__.py | 3 + .../integration_tests/abnormal_state.json | 5 + .../integration_tests/acceptance.py | 14 + .../integration_tests/catalog.json | 1 + .../integration_tests/configured_catalog.json | 12 + .../integration_tests/invalid_config.json | 4 + .../integration_tests/sample_config.json | 4 + .../integration_tests/sample_state.json | 5 + .../connectors/source-webflow/main.py | 13 + .../source-webflow/requirements.txt | 2 + .../sample_files/configured_catalog.json | 12 + .../connectors/source-webflow/setup.py | 29 ++ .../source-webflow/source_webflow/__init__.py | 8 + .../source-webflow/source_webflow/auth.py | 28 ++ .../source-webflow/source_webflow/source.py | 332 ++++++++++++++++++ .../source-webflow/source_webflow/spec.yaml | 23 ++ .../webflow_to_airbyte_mapping.py | 33 ++ .../source-webflow/unit_tests/__init__.py | 3 + .../source-webflow/unit_tests/test_source.py | 28 ++ .../source-webflow/unit_tests/test_streams.py | 78 ++++ docs/integrations/sources/webflow.md | 38 ++ 30 files changed, 941 insertions(+) create mode 100644 airbyte-integrations/connectors/source-webflow/.dockerignore create mode 100644 airbyte-integrations/connectors/source-webflow/.gitignore create mode 100644 airbyte-integrations/connectors/source-webflow/Dockerfile create mode 100644 airbyte-integrations/connectors/source-webflow/README.md create mode 100644 airbyte-integrations/connectors/source-webflow/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-webflow/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-webflow/build.gradle create mode 100644 airbyte-integrations/connectors/source-webflow/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-webflow/integration_tests/abnormal_state.json create mode 100644 airbyte-integrations/connectors/source-webflow/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-webflow/integration_tests/catalog.json create mode 100644 airbyte-integrations/connectors/source-webflow/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-webflow/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-webflow/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-webflow/integration_tests/sample_state.json create mode 100644 airbyte-integrations/connectors/source-webflow/main.py create mode 100644 airbyte-integrations/connectors/source-webflow/requirements.txt create mode 100644 airbyte-integrations/connectors/source-webflow/sample_files/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-webflow/setup.py create mode 100644 airbyte-integrations/connectors/source-webflow/source_webflow/__init__.py create mode 100644 airbyte-integrations/connectors/source-webflow/source_webflow/auth.py create mode 100644 airbyte-integrations/connectors/source-webflow/source_webflow/source.py create mode 100644 airbyte-integrations/connectors/source-webflow/source_webflow/spec.yaml create mode 100644 airbyte-integrations/connectors/source-webflow/source_webflow/webflow_to_airbyte_mapping.py create mode 100644 airbyte-integrations/connectors/source-webflow/unit_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-webflow/unit_tests/test_source.py create mode 100644 airbyte-integrations/connectors/source-webflow/unit_tests/test_streams.py create mode 100644 docs/integrations/sources/webflow.md diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index efa01185a5b4c..ccc1ba314a063 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -990,6 +990,13 @@ icon: victorops.svg sourceType: api releaseStage: alpha +- name: Webflow + sourceDefinitionId: ef580275-d9a9-48bb-af5e-db0f5855be04 + dockerRepository: airbyte/source-webflow + dockerImageTag: 0.1.1 + documentationUrl: https://docs.airbyte.io/integrations/sources/webflow + sourceType: api + releaseStage: alpha - name: Zendesk Chat sourceDefinitionId: 40d24d0f-b8f9-4fe0-9e6c-b06c0f3f45e4 dockerRepository: airbyte/source-zendesk-chat diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 81382543dfa86..c76c177d3e3a7 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -9445,6 +9445,35 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-webflow:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/webflow" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Webflow Spec" + type: "object" + required: + - "api_key" + - "site_id" + additionalProperties: false + properties: + site_id: + title: "Site id" + type: "string" + description: "The id of the Webflow site you are requesting data from. See\ + \ https://developers.webflow.com/#sites" + example: "a relatively long hex sequence" + order: 0 + api_key: + title: "API token" + type: "string" + description: "The API token for authenticating to Webflow. See https://university.webflow.com/lesson/intro-to-the-webflow-api" + example: "a very long hex sequence" + order: 1 + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] - dockerImage: "airbyte/source-zendesk-chat:0.1.7" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-chat" diff --git a/airbyte-integrations/connectors/source-webflow/.dockerignore b/airbyte-integrations/connectors/source-webflow/.dockerignore new file mode 100644 index 0000000000000..f0867b6ea8b1f --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/.dockerignore @@ -0,0 +1,6 @@ +* +!Dockerfile +!main.py +!source_webflow +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-webflow/.gitignore b/airbyte-integrations/connectors/source-webflow/.gitignore new file mode 100644 index 0000000000000..1d17dae13b53a --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/.gitignore @@ -0,0 +1 @@ +.venv diff --git a/airbyte-integrations/connectors/source-webflow/Dockerfile b/airbyte-integrations/connectors/source-webflow/Dockerfile new file mode 100644 index 0000000000000..d41f6e3e21ff7 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.11-alpine3.15 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_webflow ./source_webflow + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.name=airbyte/source-webflow diff --git a/airbyte-integrations/connectors/source-webflow/README.md b/airbyte-integrations/connectors/source-webflow/README.md new file mode 100644 index 0000000000000..9fdf25dced574 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/README.md @@ -0,0 +1,141 @@ +# Webflow Source + +This is the repository for the Webflow source connector, written in Python. +For information about how to use this connector within Airbyte, see [Webflow source documentation](https://docs.airbyte.io/integrations/sources/webflow). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.9.11` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-webflow:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/webflow) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_webflow/spec.yaml` file. +Note that any directory named `secrets` is git-ignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +For more information about creating Webflow credentials, see [the documentation](https://docs.airbyte.io/integrations/sources/webflow). + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source webflow test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image. Execute the following from +the source-webflow project directory (where Dockerfile can be found): +``` +docker build . -t airbyte/source-webflow:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-webflow:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-webflow:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-webflow:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-webflow:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-webflow:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` + +Or if you are running in OSX with zsh, you may need to execute the following instead +``` +pip install .'[tests]' +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-webflow:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-webflow:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/source-webflow/acceptance-test-config.yml b/airbyte-integrations/connectors/source-webflow/acceptance-test-config.yml new file mode 100644 index 0000000000000..8e6e6f5ae782a --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/acceptance-test-config.yml @@ -0,0 +1,19 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-webflow:dev +tests: + spec: + - spec_path: "source_webflow/spec.yaml" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + + diff --git a/airbyte-integrations/connectors/source-webflow/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-webflow/acceptance-test-docker.sh new file mode 100644 index 0000000000000..c51577d10690c --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2-) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-webflow/build.gradle b/airbyte-integrations/connectors/source-webflow/build.gradle new file mode 100644 index 0000000000000..a35d8aee048e7 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/build.gradle @@ -0,0 +1,9 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_webflow' +} diff --git a/airbyte-integrations/connectors/source-webflow/integration_tests/__init__.py b/airbyte-integrations/connectors/source-webflow/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-webflow/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-webflow/integration_tests/abnormal_state.json new file mode 100644 index 0000000000000..52b0f2c2118f4 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/integration_tests/abnormal_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "todo-abnormal-value" + } +} diff --git a/airbyte-integrations/connectors/source-webflow/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-webflow/integration_tests/acceptance.py new file mode 100644 index 0000000000000..950b53b59d416 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-webflow/integration_tests/catalog.json b/airbyte-integrations/connectors/source-webflow/integration_tests/catalog.json new file mode 100644 index 0000000000000..0967ef424bce6 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/integration_tests/catalog.json @@ -0,0 +1 @@ +{} diff --git a/airbyte-integrations/connectors/source-webflow/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-webflow/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..c2887e81f620a --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/integration_tests/configured_catalog.json @@ -0,0 +1,12 @@ +{ + "streams": [ + { + "stream": { + "name": "Blog Authors", + "json_schema": {} + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-webflow/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-webflow/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..cdb9bc2f275b6 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/integration_tests/invalid_config.json @@ -0,0 +1,4 @@ +{ + "site_id": "wrong data", + "api_key": "wrong data" +} diff --git a/airbyte-integrations/connectors/source-webflow/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-webflow/integration_tests/sample_config.json new file mode 100644 index 0000000000000..2e7ab495a80cb --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/integration_tests/sample_config.json @@ -0,0 +1,4 @@ +{ + "site_id": "your-webflow-site-id", + "api_key": "your-webflow-token" +} diff --git a/airbyte-integrations/connectors/source-webflow/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-webflow/integration_tests/sample_state.json new file mode 100644 index 0000000000000..3587e579822d0 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/integration_tests/sample_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "value" + } +} diff --git a/airbyte-integrations/connectors/source-webflow/main.py b/airbyte-integrations/connectors/source-webflow/main.py new file mode 100644 index 0000000000000..de73919146892 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_webflow import SourceWebflow + +if __name__ == "__main__": + source = SourceWebflow() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-webflow/requirements.txt b/airbyte-integrations/connectors/source-webflow/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-webflow/sample_files/configured_catalog.json b/airbyte-integrations/connectors/source-webflow/sample_files/configured_catalog.json new file mode 100644 index 0000000000000..c2887e81f620a --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/sample_files/configured_catalog.json @@ -0,0 +1,12 @@ +{ + "streams": [ + { + "stream": { + "name": "Blog Authors", + "json_schema": {} + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-webflow/setup.py b/airbyte-integrations/connectors/source-webflow/setup.py new file mode 100644 index 0000000000000..168f25863cf43 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.1", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + +setup( + name="source_webflow", + description="Source implementation for Webflow.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-webflow/source_webflow/__init__.py b/airbyte-integrations/connectors/source-webflow/source_webflow/__init__.py new file mode 100644 index 0000000000000..2f6bd6e79775e --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/source_webflow/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceWebflow + +__all__ = ["SourceWebflow"] diff --git a/airbyte-integrations/connectors/source-webflow/source_webflow/auth.py b/airbyte-integrations/connectors/source-webflow/source_webflow/auth.py new file mode 100644 index 0000000000000..59951ca45c951 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/source_webflow/auth.py @@ -0,0 +1,28 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +from typing import Any, Mapping + +from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator + + +class WebflowAuthMixin: + """ + Mixin class for providing additional HTTP header for specifying the "accept-version" + """ + + def __init__(self, *, accept_version_header: str = "accept-version", accept_version: str, **kwargs): + super().__init__(**kwargs) + self.accept_version = accept_version + self.accept_version_header = accept_version_header + + def get_auth_header(self) -> Mapping[str, Any]: + return {**super().get_auth_header(), self.accept_version_header: self.accept_version} + + +class WebflowTokenAuthenticator(WebflowAuthMixin, TokenAuthenticator): + """ + Auth class for Personal Access Token + https://help.getharvest.com/api-v2/authentication-api/authentication/authentication/#personal-access-tokens + """ diff --git a/airbyte-integrations/connectors/source-webflow/source_webflow/source.py b/airbyte-integrations/connectors/source-webflow/source_webflow/source.py new file mode 100644 index 0000000000000..2dad6eb5671fa --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/source_webflow/source.py @@ -0,0 +1,332 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +import logging +from abc import ABC +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple + +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream + +from .auth import WebflowTokenAuthenticator +from .webflow_to_airbyte_mapping import WebflowToAirbyteMapping + +""" +This module is used for pulling the contents of "collections" out of Webflow, which is a CMS for hosting websites. +A Webflow collection may be a group of items such as "Blog Posts", "Blog Authors", etc. +There may be many collections, each of which can have its own distinct schema. This module will dynamically figure out +which collections are available, and will dynamically create the schema for each collection based on information +extracted from Webflow. It will then download all of the items from all of the selected collections. + +Because the amount of data is expected to be "small" (not TB of data), we have not implemented any kind of +incremental downloading of data from Webflow. Each time this code is exectued, it will pull back all of the items +that are contained in each of the desired collections. +""" + + +# Webflow expects a 'accept-version' header with a value of '1.0.0' (as of May 2022) +WEBFLOW_ACCEPT_VERSION = "1.0.0" + + +# Basic full refresh stream +class WebflowStream(HttpStream, ABC): + """ + This class represents a stream output by the connector. + This is an abstract base class meant to contain all the common functionality at the API level e.g: the API base URL, + pagination strategy, parsing responses etc.. + + Each stream should extend this class (or another abstract subclass of it) to specify behavior unique to that stream. + """ + + url_base = "https://api.webflow.com/" + + # The following call is need to fix what appears to be a bug in http.py line 119 + # Bug reported at: https://github.com/airbytehq/airbyte/issues/13283 + @property + def authenticator(self) -> WebflowTokenAuthenticator: + return self._session.auth + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + """ + Common params e.g. pagination size etc. + """ + return {} + + +class CollectionSchema(WebflowStream): + """ + Gets the schema of the current collection - see: https://developers.webflow.com/#get-collection-with-full-schema, and + then converts that schema to a json-schema.org-compatible schema that uses supported Airbyte types. + + More info about Webflow schema: https://developers.webflow.com/#get-collection-with-full-schema + Airbyte data types: https://docs.airbyte.com/understanding-airbyte/supported-data-types/ + """ + + # primary_key is not used as we don't do incremental syncs - https://docs.airbyte.com/understanding-airbyte/connections/ + primary_key = None + + def __init__(self, collection_id: str = None, **kwargs): + self.collection_id = collection_id + super().__init__(**kwargs) + + def path(self, **kwargs) -> str: + """ + See: https://developers.webflow.com/#list-collections + Returns a list which contains high-level information about each collection. + """ + + path = f"collections/{self.collection_id}" + return path + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + """ + Converts the webflow schema into an Airbyte-compatible schema + + Webflow schema API returns an array of fields contained in the "fields" field. + Get field name and field type from this array, and then map it to an airbyte-supported type + """ + + response_json = response.json() + for field in response_json["fields"]: + try: + field_name = field["slug"] + field_type = field["type"] + field_schema = {field_name: WebflowToAirbyteMapping.webflow_to_airbyte_mapping[field_type]} + yield field_schema # get records from the "fields" array + except Exception as e: + msg = f"""Encountered an exception parsing schema for Webflow type: {field_type}. +Is "{field_type}" defined in the mapping between Webflow and json schma ? """ + self.logger.exception(msg) + + # Don't eat the exception, raise it again as this needs to be fixed + raise e + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + """This API does not return any information to support pagination""" + return {} + + +class CollectionsList(WebflowStream): + """ + The data that we are generally interested in pulling from Webflow is stored in "Collections". + Example Collections that may be of interest are: "Blog Posts", "Blog Authors", etc. + + This class provides the functionality for getting a list containing metadata about available collections + More info https://developers.webflow.com/#list-collections + """ + + # primary_key is not used as we don't do incremental syncs - https://docs.airbyte.com/understanding-airbyte/connections/ + primary_key = None + + def __init__(self, site_id: str = None, **kwargs): + self.site_id = site_id + super().__init__(**kwargs) + + def path(self, **kwargs) -> str: + """ + See: https://developers.webflow.com/#list-collections + Returns a list which contains high-level information about each collection. + """ + + path = f"sites/{self.site_id}/collections" + return path + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + """ + This API returns a list containing json objects. So we can just yield each element from the list + """ + response_json = response.json() + yield from response_json + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + """This API does not return any information to support pagination""" + return {} + + +class CollectionContents(WebflowStream): + """ + This stream is used for pulling "items" out of a given Webflow collection. Because there is not a fixed number of collections with + pre-defined names, each stream is an object that uses the passed-in collection name for the stream name. + + Note that because the Webflow API works with collection ids rather than collection names, the collection id is + used for hitting the Webflow API. + + An example of a collection is "Blog Posts", which contains a list of items, where each item is a JSON-representation of a blog article. + """ + + # primary_key is not used as we don't do incremental syncs - https://docs.airbyte.com/understanding-airbyte/connections/ + primary_key = None + + # only want to create the name to id lookup table once + + def __init__(self, site_id: str = None, collection_id: str = None, collection_name: str = None, **kwargs): + """override __init__ to add collection-related variables""" + self.site_id = site_id + super().__init__(**kwargs) + self.collection_name = collection_name + self.collection_id = collection_id + + @property + def name(self) -> str: + return self.collection_name + + def path(self, **kwargs) -> str: + """ + The path to get the "items" in the requested collection uses the "_id" of the collection in the URL. + See: https://developers.webflow.com/#items + + return collections//items + """ + path = f"collections/{self.collection_id}/items" + return path + + def next_page_token(self, response: requests.Response) -> Mapping[str, Any]: + decoded_response = response.json() + if decoded_response.get("count", 0) != 0 and decoded_response.get("items", []) != []: + # Webflow uses an offset for pagination https://developers.webflow.com/#item-model + offset = decoded_response["offset"] + decoded_response["count"] + return {"offset": offset} + else: + return {} + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> MutableMapping[str, Any]: + + # Webflow default pagination is 100, for debugging pagination we set this to a low value. + # This should be set back to 100 for production + params = {"limit": 100} + + # Handle pagination by inserting the next page's token in the request parameters + if next_page_token: + params.update(next_page_token) + + return params + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + """ + Webflow items API returns an array of items contained in the "items" field. + """ + + response_json = response.json() + # The items API returns records inside a container list called "items" + for item in response_json["items"]: + yield item + + def get_json_schema(self) -> Mapping[str, Any]: + """ + Webflow has an API,but it is not consistent with json-schema.org schemas. We use the CollectionSchema stream + to get these schemas and to also map them to json-schema format. + """ + + collection_id = self.collection_id + schema_stream = CollectionSchema(authenticator=self.authenticator, collection_id=collection_id) + schema_records = schema_stream.read_records(sync_mode="full_refresh") + + # each record corresponds to a property in the json schema. So we loop over each of these properties + # and add it to the json schema. + json_schema = {} + for schema_property in schema_records: + json_schema.update(schema_property) + + return { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "type": "object", + "properties": json_schema, + } + + +class SourceWebflow(AbstractSource): + + """This is the main class that defines the methods that will be called by Airbyte infrastructure""" + + @staticmethod + def _get_collection_name_to_id_dict(authenticator: str = None, site_id: str = None) -> Mapping[str, str]: + """ + Most of the Webflow APIs require the collection id, but the streams that we are generating use the collection name. + This function will return a dictionary containing collection_name: collection_id entries. + """ + + collection_name_to_id_dict = {} + + collections_stream = CollectionsList(authenticator=authenticator, site_id=site_id) + collections_records = collections_stream.read_records(sync_mode="full_refresh") + + # Loop over the list of records and create a dictionary with name as key, and _id as value + for collection_obj in collections_records: + collection_name_to_id_dict[collection_obj["name"]] = collection_obj["_id"] + + return collection_name_to_id_dict + + @staticmethod + def get_authenticator(config): + """ + Verifies that the information for setting the header has been set, and returns a class + which overloads that standard authentication to include additional headers that are required by Webflow. + """ + api_key = config.get("api_key", None) + accept_version = WEBFLOW_ACCEPT_VERSION + if not api_key: + raise Exception("Config validation error: 'api_key' is a required property") + + auth = WebflowTokenAuthenticator(token=api_key, accept_version=accept_version) + return auth + + def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, any]: + """ + A check to validate that the user-provided config can be used to connect to the underlying API + + :param config: the user-input config object conforming to the connector's spec.yaml + :param logger: logger object + :return Tuple[bool, any]: (True, None) if the input config can be used to connect to the API successfully, (False, error) otherwise. + """ + + try: + # Check that authenticator can be retrieved + auth = self.get_authenticator(config) + site_id = config.get("site_id") + collections_stream = CollectionsList(authenticator=auth, site_id=site_id) + collections_records = collections_stream.read_records(sync_mode="full_refresh") + record = next(collections_records) + logger.info(f"Successfully connected to CollectionsList stream. Pulled one record: {record}") + return True, None + except Exception as e: + return False, e + + def generate_streams(self, authenticator: WebflowTokenAuthenticator, site_id: str) -> List[Stream]: + """Generates a list of stream by their names.""" + + collection_name_to_id_dict = self._get_collection_name_to_id_dict(authenticator=authenticator, site_id=site_id) + + for collection_name, collection_id in collection_name_to_id_dict.items(): + yield CollectionContents( + authenticator=authenticator, + site_id=site_id, + collection_id=collection_id, + collection_name=collection_name, + ) + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + """ + :param config: A Mapping of the user input configuration as defined in the connector spec. + :return List[Stream]: A list/generator of the streams that Airbyte can pull data from. + """ + + auth = self.get_authenticator(config) + site_id = config.get("site_id") + + # Return a list (iterator) of the streams that will be available for use. + # We _dynamically_ generate streams that correspond to Webflow collections (eg. Blog Authors, Blog Posts, etc.) + streams = self.generate_streams(authenticator=auth, site_id=site_id) + + return streams diff --git a/airbyte-integrations/connectors/source-webflow/source_webflow/spec.yaml b/airbyte-integrations/connectors/source-webflow/source_webflow/spec.yaml new file mode 100644 index 0000000000000..7a1754509df5e --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/source_webflow/spec.yaml @@ -0,0 +1,23 @@ +documentationUrl: https://docs.airbyte.io/integrations/sources/webflow +connectionSpecification: + $schema: http://json-schema.org/draft-07/schema# + title: Webflow Spec + type: object + required: + - api_key + - site_id + additionalProperties: false + properties: + site_id: + title: Site id + type: string + description: "The id of the Webflow site you are requesting data from. See https://developers.webflow.com/#sites" + example: "a relatively long hex sequence" + order: 0 + api_key: + title: API token + type: string + description: "The API token for authenticating to Webflow. See https://university.webflow.com/lesson/intro-to-the-webflow-api" + example: "a very long hex sequence" + order: 1 + airbyte_secret: true diff --git a/airbyte-integrations/connectors/source-webflow/source_webflow/webflow_to_airbyte_mapping.py b/airbyte-integrations/connectors/source-webflow/source_webflow/webflow_to_airbyte_mapping.py new file mode 100644 index 0000000000000..d16b65fbd4fa1 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/source_webflow/webflow_to_airbyte_mapping.py @@ -0,0 +1,33 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +class WebflowToAirbyteMapping: + + """ + The following disctionary is used for dynamically pulling the schema from Webflow, and mapping it to an Airbyte-compatible json-schema + Webflow: https://developers.webflow.com/#get-collection-with-full-schema + Airbyte/json-schema: https://docs.airbyte.com/understanding-airbyte/supported-data-types/ + """ + + webflow_to_airbyte_mapping = { + "Bool": {"type": ["null", "boolean"]}, + "Date": { + "type": ["null", "string"], + "format": "date-time", + }, + "Email": { + "type": ["null", "string"], + }, + "ImageRef": {"type": ["null", "object"], "additionalProperties": True}, + "ItemRef": {"type": ["null", "string"]}, + "ItemRefSet": {"type": ["null", "array"]}, + "Link": {"type": ["null", "string"]}, + "Number": {"type": ["null", "number"]}, + "Option": {"type": ["null", "string"]}, + "PlainText": {"type": ["null", "string"]}, + "RichText": {"type": ["null", "string"]}, + "User": {"type": ["null", "string"]}, + "Video": {"type": ["null", "string"]}, + } diff --git a/airbyte-integrations/connectors/source-webflow/unit_tests/__init__.py b/airbyte-integrations/connectors/source-webflow/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-webflow/unit_tests/test_source.py b/airbyte-integrations/connectors/source-webflow/unit_tests/test_source.py new file mode 100644 index 0000000000000..3964f6e5c2057 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/unit_tests/test_source.py @@ -0,0 +1,28 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +from unittest import TestCase +from unittest.mock import MagicMock, patch + +from source_webflow.source import SourceWebflow + + +def test_check_connection(mocker): + source = SourceWebflow() + fake_info_record = {"collection": "is_mocked"} + with patch("source_webflow.source.CollectionsList.read_records", MagicMock(return_value=iter([fake_info_record]))): + logger_mock, config_mock = MagicMock(), MagicMock() + assert source.check_connection(logger_mock, config_mock) == (True, None) + logger_mock.info.assert_called_once() + my_regex = r"Successfully connected.*" + str(fake_info_record) + TestCase().assertRegex(logger_mock.method_calls[0].args[0], my_regex) + + +def test_streams(mocker): + # use the "with" to prevent the patch from impacting other tests + with patch("source_webflow.source.SourceWebflow.generate_streams", MagicMock(return_value=["This would be a stream"])): + source = SourceWebflow() + config_mock = MagicMock() + streams = source.streams(config_mock) + assert len(streams) == 1 diff --git a/airbyte-integrations/connectors/source-webflow/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-webflow/unit_tests/test_streams.py new file mode 100644 index 0000000000000..8929f3a8d5392 --- /dev/null +++ b/airbyte-integrations/connectors/source-webflow/unit_tests/test_streams.py @@ -0,0 +1,78 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +from http import HTTPStatus +from unittest.mock import MagicMock + +import pytest +from source_webflow.source import CollectionContents, SourceWebflow, WebflowStream + + +@pytest.fixture +def patch_base_class(mocker): + # Mock abstract methods to enable instantiating abstract class + mocker.patch.object(WebflowStream, "path", "v0/example_endpoint") + mocker.patch.object(WebflowStream, "primary_key", "test_primary_key") + mocker.patch.object(WebflowStream, "__abstractmethods__", set()) + + +def test_request_params_of_collection_items(patch_base_class): + stream = CollectionContents() + inputs = {"stream_slice": None, "stream_state": None, "next_page_token": {"offset": 1}} + expected_params = {"limit": 100, "offset": 1} + assert stream.request_params(**inputs) == expected_params + + +def test_next_page_token_of_collection_items(patch_base_class): + stream = CollectionContents() + response_data = {"items": [{"item1_key": "item1_val"}], "count": 10, "offset": 100} + inputs = {"response": MagicMock(json=lambda: response_data)} + expected_token = {"offset": 110} + assert stream.next_page_token(**inputs) == expected_token + + +def test_parse_response_of_collection_items(patch_base_class): + stream = CollectionContents() + mock_record = {"item1_key": "item1_val"} + response_data = {"items": [mock_record]} + inputs = {"response": MagicMock(json=lambda: response_data)} + parsed_item = next(stream.parse_response(**inputs)) + assert parsed_item == mock_record + + +def test_generate_streams(patch_base_class): + SourceWebflow._get_collection_name_to_id_dict = MagicMock(return_value={"name-1": "id-1", "name-2": "id-2"}) + source = SourceWebflow() + config_mock = MagicMock() + streams = source.generate_streams(config_mock, "fake site id") + assert len(list(streams)) == 2 + + +def test_http_method(patch_base_class): + stream = WebflowStream() + expected_method = "GET" + assert stream.http_method == expected_method + + +@pytest.mark.parametrize( + ("http_status", "should_retry"), + [ + (HTTPStatus.OK, False), + (HTTPStatus.BAD_REQUEST, False), + (HTTPStatus.TOO_MANY_REQUESTS, True), + (HTTPStatus.INTERNAL_SERVER_ERROR, True), + ], +) +def test_should_retry(patch_base_class, http_status, should_retry): + response_mock = MagicMock() + response_mock.status_code = http_status + stream = WebflowStream() + assert stream.should_retry(response_mock) == should_retry + + +def test_backoff_time(patch_base_class): + response_mock = MagicMock() + stream = WebflowStream() + expected_backoff_time = None + assert stream.backoff_time(response_mock) == expected_backoff_time diff --git a/docs/integrations/sources/webflow.md b/docs/integrations/sources/webflow.md new file mode 100644 index 0000000000000..b9768f75527ee --- /dev/null +++ b/docs/integrations/sources/webflow.md @@ -0,0 +1,38 @@ +--- +description: 'This connector extracts "collections" from Webflow' +--- + +# Sources + +Webflow is used for publishing Airbyte's blogs, and provides several APIs. The APIs that are used by this connector to extract information from Webflow are described in [Webflow Developers documentation](https://developers.webflow.com/). + +Webflow uses [Collections](https://developers.webflow.com/#collections) to store different kinds of information. A collection can be "Blog Posts", or "Blog Authors", etc. Collection names are not pre-defined, the number of collections is not known in advance, and the schema for each collection may be different. Therefore this connector dynamically figures our which collections are available and downloads the schema for each collection from Webflow. Each collection is mapped to an [Airbyte Streams](https://docs.airbyte.com/connector-development/cdk-python/full-refresh-stream/). + +# Webflow credentials +You should be able to create a Webflow `API key` (aka `API token`) as described in [Intro to the Webflow API](https://university.webflow.com/lesson/intro-to-the-webflow-api). + +Once you have the `API Key`/`API token`, you can confirm a [list of available sites](https://developers.webflow.com/#sites) and get their `_id` by executing the following: + +``` +curl https://api.webflow.com/sites \ + -H "Authorization: Bearer " \ + -H "accept-version: 1.0.0" +``` + +Which should respond with something similar to: + +``` +[{"_id":"","createdOn":"2021-03-26T15:46:04.032Z","name":"Airbyte","shortName":"airbyte-dev","lastPublished":"2022-06-09T12:55:52.533Z","previewUrl":"https://screenshots.webflow.com/sites/","timezone":"America/Los_Angeles","database":""}] +``` + +After retrieving your `site id`, you can create a file `secrets/config.json` conforming to the fields expected in `source_webflow/spec.yaml` file. +(Note that any directory named `secrets` is git-ignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information in this folder). + +See `integration_tests/sample_config.json` for a sample config file that you can use as a template for entering in your `site id` and your `Webflow API Key`. + +| Version | Date | Pull Request | Subject | +| :--- | :--- | :--- | :--- | +| 0.1.0 | 2022-06-22 | [13617](https://github.com/airbytehq/airbyte/pull/13617) | Initial release | +| 0.1.1 | 2022-06-22 | [13617](https://github.com/airbytehq/airbyte/pull/13617) | Update Spec Documentation URL | + +