Skip to content

Commit

Permalink
馃帀 New Source: The Guardian API [low-code CDK] (#18654)
Browse files Browse the repository at this point in the history
* Add new source: The Guardian API

* Add documentation

* Fix custom paginator, it now stops without throwing an error

* Update the-guardian-api.md with PR number and link

* Remove catalog file, add titles to all properties in spec.yaml

* Add incremental sync, change parameter names

* format

* remove order from spec

* add guardian to source def

* auto-bump connector version

Co-authored-by: Vincent Koc <koconder@users.noreply.github.com>
Co-authored-by: marcosmarxm <marcosmarxm@gmail.com>
Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com>
Co-authored-by: Marcos Marx <marcosmarxm@users.noreply.github.com>
  • Loading branch information
5 people committed Nov 9, 2022
1 parent 1403c1b commit 538a420
Show file tree
Hide file tree
Showing 28 changed files with 754 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1334,6 +1334,13 @@
icon: timely.svg
sourceType: api
releaseStage: alpha
- name: The Guardian API
sourceDefinitionId: d42bd69f-6bf0-4d0b-9209-16231af07a92
dockerRepository: airbyte/source-the-guardian-api
dockerImageTag: 0.1.0
documentationUrl: https://docs.airbyte.com/integrations/sources/the-guardian-api
sourceType: api
releaseStage: alpha
- name: Trello
sourceDefinitionId: 8da67652-004c-11ec-9a03-0242ac130003
dockerRepository: airbyte/source-trello
Expand Down
72 changes: 72 additions & 0 deletions airbyte-config/init/src/main/resources/seed/source_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12858,6 +12858,78 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-the-guardian-api:0.1.0"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/the-guardian-api"
connectionSpecification:
$schema: "http://json-schema.org/draft-07/schema#"
title: "The Guardian Api Spec"
type: "object"
required:
- "api_key"
- "start_date"
additionalProperties: true
properties:
api_key:
title: "API Key"
type: "string"
description: "Your API Key. See <a href=\"https://open-platform.theguardian.com/access/\"\
>here</a>. The key is case sensitive."
airbyte_secret: true
start_date:
title: "Start Date"
type: "string"
description: "Use this to set the minimum date (YYYY-MM-DD) of the results.\
\ Results older than the start_date will not be shown."
pattern: "^([1-9][0-9]{3})\\-(0?[1-9]|1[012])\\-(0?[1-9]|[12][0-9]|3[01])$"
examples:
- "YYYY-MM-DD"
query:
title: "Query"
type: "string"
description: "(Optional) The query (q) parameter filters the results to\
\ only those that include that search term. The q parameter supports AND,\
\ OR and NOT operators."
examples:
- "environment AND NOT water"
- "environment AND political"
- "amusement park"
- "political"
tag:
title: "Tag"
type: "string"
description: "(Optional) A tag is a piece of data that is used by The Guardian\
\ to categorise content. Use this parameter to filter results by showing\
\ only the ones matching the entered tag. See <a href=\"https://content.guardianapis.com/tags?api-key=test\"\
>here</a> for a list of all tags, and <a href=\"https://open-platform.theguardian.com/documentation/tag\"\
>here</a> for the tags endpoint documentation."
examples:
- "environment/recycling"
- "environment/plasticbags"
- "environment/energyefficiency"
section:
title: "Section"
type: "string"
description: "(Optional) Use this to filter the results by a particular\
\ section. See <a href=\"https://content.guardianapis.com/sections?api-key=test\"\
>here</a> for a list of all sections, and <a href=\"https://open-platform.theguardian.com/documentation/section\"\
>here</a> for the sections endpoint documentation."
examples:
- "media"
- "technology"
- "housing-network"
end_date:
title: "End Date"
type: "string"
description: "(Optional) Use this to set the maximum date (YYYY-MM-DD) of\
\ the results. Results newer than the end_date will not be shown. Default\
\ is set to the current date (today) for incremental syncs."
pattern: "^([1-9][0-9]{3})\\-(0?[1-9]|1[012])\\-(0?[1-9]|[12][0-9]|3[01])$"
examples:
- "YYYY-MM-DD"
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-trello:0.1.6"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/trello"
Expand Down
1 change: 1 addition & 0 deletions airbyte-integrations/builds.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
| Strava | [![source-stava](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-strava%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-strava) |
| Stripe | [![source-stripe](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-stripe%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-stripe) |
| Tempo | [![source-tempo](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-tempo%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-tempo) |
| The Guardian API | [![source-the-guardian-api](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-the-guardian-api%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-the-guardian-api) |
| TikTok Marketing | [![source-tiktok-marketing](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-tiktok-marketing%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-tiktok-marketing) |
| Trello | [![source-trello](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-trello%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-trello) |
| Twilio | [![source-twilio](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-twilio%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-twilio) |
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
*
!Dockerfile
!main.py
!source_the_guardian_api
!setup.py
!secrets
38 changes: 38 additions & 0 deletions airbyte-integrations/connectors/source-the-guardian-api/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
FROM python:3.9.11-alpine3.15 as base

# build and load all requirements
FROM base as builder
WORKDIR /airbyte/integration_code

# upgrade pip to the latest version
RUN apk --no-cache upgrade \
&& pip install --upgrade pip \
&& apk --no-cache add tzdata build-base


COPY setup.py ./
# install necessary packages to a temporary folder
RUN pip install --prefix=/install .

# build a clean environment
FROM base
WORKDIR /airbyte/integration_code

# copy all loaded and built libraries to a pure basic image
COPY --from=builder /install /usr/local
# add default timezone settings
COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime
RUN echo "Etc/UTC" > /etc/timezone

# bash is installed for more convenient debugging.
RUN apk --no-cache add bash

# copy payload code only
COPY main.py ./
COPY source_the_guardian_api ./source_the_guardian_api

ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.1.0
LABEL io.airbyte.name=airbyte/source-the-guardian-api
79 changes: 79 additions & 0 deletions airbyte-integrations/connectors/source-the-guardian-api/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# The Guardian Api Source

This is the repository for the The Guardian Api configuration based source connector.
For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/the-guardian-api).

## Local development

#### Building via Gradle
You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow.

To build using Gradle, from the Airbyte repository root, run:
```
./gradlew :airbyte-integrations:connectors:source-the-guardian-api:build
```

#### Create credentials
**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/the-guardian-api)
to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_the_guardian_api/spec.yaml` file.
Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information.
See `integration_tests/sample_config.json` for a sample config file.

**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source the-guardian-api test creds`
and place them into `secrets/config.json`.

### Locally running the connector docker image

#### Build
First, make sure you build the latest Docker image:
```
docker build . -t airbyte/source-the-guardian-api:dev
```

You can also build the connector image via Gradle:
```
./gradlew :airbyte-integrations:connectors:source-the-guardian-api:airbyteDocker
```
When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in
the Dockerfile.

#### Run
Then run any of the connector commands as follows:
```
docker run --rm airbyte/source-the-guardian-api:dev spec
docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-the-guardian-api:dev check --config /secrets/config.json
docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-the-guardian-api:dev discover --config /secrets/config.json
docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-the-guardian-api:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json
```
## Testing

#### Acceptance Tests
Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information.
If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py.

To run your integration tests with docker

### Using gradle to run tests
All commands should be run from airbyte project root.
To run unit tests:
```
./gradlew :airbyte-integrations:connectors:source-the-guardian-api:unitTest
```
To run acceptance and custom integration tests:
```
./gradlew :airbyte-integrations:connectors:source-the-guardian-api:integrationTest
```

## Dependency Management
All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development.
We split dependencies between two groups, dependencies that are:
* required for your connector to work need to go to `MAIN_REQUIREMENTS` list.
* required for the testing need to go to `TEST_REQUIREMENTS` list

### Publishing a new version of the connector
You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what?
1. Make sure your changes are passing unit and integration tests.
1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)).
1. Create a Pull Request.
1. Pat yourself on the back for being an awesome contributor.
1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# See [Source Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/source-acceptance-tests-reference)
# for more information about how to configure these tests
connector_image: airbyte/source-the-guardian-api:dev
acceptance_tests:
spec:
tests:
- spec_path: "source_the_guardian_api/spec.yaml"
connection:
tests:
- config_path: "secrets/config.json"
status: "succeed"
- config_path: "integration_tests/invalid_config.json"
status: "failed"
discovery:
tests:
- config_path: "secrets/config.json"
basic_read:
tests:
- config_path: "secrets/config.json"
configured_catalog_path: "integration_tests/configured_catalog.json"
empty_streams: []
incremental:
tests:
- config_path: "secrets/config.json"
configured_catalog_path: "integration_tests/configured_catalog.json"
future_state_path: "integration_tests/abnormal_state.json"
full_refresh:
tests:
- config_path: "secrets/config.json"
configured_catalog_path: "integration_tests/configured_catalog.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env sh

# Build latest connector image
docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2-)

# Pull latest acctest image
docker pull airbyte/source-acceptance-test:latest

# Run
docker run --rm -it \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /tmp:/tmp \
-v $(pwd):/test_input \
airbyte/source-acceptance-test \
--acceptance-test-config /test_input

Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# The Guardian API

## Overview

[The Guardian Open Platform](https://open-platform.theguardian.com/) is a public web service for accessing all the content the Guardian creates, categorised by tags and section. To get started, You need a key to successfully authenticate against the API. The Guardian API Connector is implemented with the [Airbyte Low-Code CDK](https://docs.airbyte.com/connector-development/config-based/low-code-cdk-overview).

## Output Format

#### Each content item has the following structure:-

```yaml
{
"id": "string",
"type": "string"
"sectionId": "string"
"sectionName": "string"
"webPublicationDate": "string"
"webTitle": "string"
"webUrl": "string"
"apiUrl": "string"
"isHosted": "boolean"
"pillarId": "string"
"pillarName": "string"
}
```

**Description:-**

**webPublicationDate**: The combined date and time of publication
**webUrl**: The URL of the html content
**apiUrl**: The URL of the raw content

## Core Streams

Connector supports the `content` stream that returns all pieces of content in the API.

## Rate Limiting

The key that you are assigned is rate-limited and as such any applications that depend on making large numbers of requests on a polling basis are likely to exceed their daily quota and thus be prevented from making further requests until the next period begins.

## Authentication and Permissions

To access the API, you will need to sign up for an API key, which should be sent with every request. Visit [this](https://open-platform.theguardian.com/access) link to get an API key.
The easiest way to see what data is included is to explore the data. You can build complex queries quickly and browse the results. Visit [this](https://open-platform.theguardian.com/explore) link to explore the data.

See [this](https://docs.airbyte.io/integrations/sources/the-guardian-api) link for the connector docs.
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
plugins {
id 'airbyte-python'
id 'airbyte-docker'
id 'airbyte-source-acceptance-test'
}

airbytePython {
moduleDirectory 'source_the_guardian_api'
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"content": {
"webPublicationDate": "2123-10-31T10:10:10Z"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#


import pytest

pytest_plugins = ("source_acceptance_test.plugin",)


@pytest.fixture(scope="session", autouse=True)
def connector_setup():
"""This fixture is a placeholder for external resources that acceptance test might require."""
# TODO: setup test dependencies if needed. otherwise remove the TODO comments
yield
# TODO: clean up test dependencies
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"streams": [
{
"stream": {
"name": "content",
"json_schema": {},
"supported_sync_modes": ["full_refresh", "incremental"]
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"api_key": "<invalid api_key>",
"query": "water OR rain",
"start_date": "2022-10-25"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"api_key": "<valid api_key>",
"query": "water OR rain OR thunder",
"start_date": "2022-10-25"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"content": {
"webPublicationDate": "2022-10-25T10:10:10Z"
}
}
Loading

0 comments on commit 538a420

Please sign in to comment.