Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎉 Source File: Migrate File source to CDK structure #7387

Merged
merged 9 commits into from
Oct 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions airbyte-integrations/connectors/source-file/.dockerignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
build

*
!Dockerfile
!main.py
!source_file
!setup.py
!integration_tests
!secrets
15 changes: 7 additions & 8 deletions airbyte-integrations/connectors/source-file/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
FROM airbyte/integration-base-python:0.1.1
FROM python:3.7-slim

# Bash is installed for more convenient debugging.
RUN apt-get update && apt-get install -y jq curl bash && rm -rf /var/lib/apt/lists/*

ENV CODE_PATH="source_file"
ENV AIRBYTE_IMPL_MODULE="source_file"
ENV AIRBYTE_IMPL_PATH="SourceFile"

WORKDIR /airbyte/integration_code
COPY $CODE_PATH ./$CODE_PATH
COPY source_file ./source_file
COPY main.py ./
COPY setup.py ./
RUN pip install .

ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh"
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.6
LABEL io.airbyte.version=0.2.7
LABEL io.airbyte.name=airbyte/source-file
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference)
# for more information about how to configure these tests
connector_image: airbyte/source-file:dev
tests:
spec:
- spec_path: "source_file/spec.json"
connection:
- config_path: "integration_tests/config.json"
status: "succeed"
- config_path: "integration_tests/invalid_config.json"
status: "failed"
discovery:
- config_path: "integration_tests/config.json"
basic_read:
- config_path: "integration_tests/config.json"
configured_catalog_path: "integration_tests/configured_catalog.json"
full_refresh:
- config_path: "integration_tests/config.json"
configured_catalog_path: "integration_tests/configured_catalog.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env sh

# Build latest connector image
docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2)

# Pull latest acctest image
docker pull airbyte/source-acceptance-test:latest

# Run
docker run --rm -it \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /tmp:/tmp \
-v $(pwd):/test_input \
airbyte/source-acceptance-test \
--acceptance-test-config /test_input
24 changes: 1 addition & 23 deletions airbyte-integrations/connectors/source-file/build.gradle
Original file line number Diff line number Diff line change
@@ -1,31 +1,9 @@
import ru.vyarus.gradle.plugin.python.task.PythonTask

plugins {
id 'airbyte-python'
id 'airbyte-docker'
id 'airbyte-standard-source-test-file'
id 'airbyte-source-acceptance-test'
}

airbytePython {
moduleDirectory 'source_file'
}


airbyteStandardSourceTestFile {
specPath = "source_file/spec.json"
configPath = "integration_tests/config.json"
configuredCatalogPath = "integration_tests/configured_catalog.json"
}

task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs){
module = "pytest"
command = "-s integration_tests"
}

integrationTest.dependsOn("customIntegrationTestPython")


dependencies {
implementation files(project(':airbyte-integrations:bases:base-standard-source-test-file').airbyteDocker.outputs)
implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#


import pytest

pytest_plugins = ("source_acceptance_test.plugin",)


@pytest.fixture(scope="session", autouse=True)
def connector_setup():
"""This fixture is a placeholder for external resources that acceptance test might require."""
# TODO: setup test dependencies
yield
# TODO: clean up test dependencies
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ def test__streams_from_ssh_providers(provider_config, provider_name, file_path,
streams = list(client.streams)
assert len(streams) == 1
assert streams[0].json_schema["properties"] == {
"header1": {"type": "string"},
"header2": {"type": "number"},
"header3": {"type": "number"},
"header4": {"type": "boolean"},
"header1": {"type": ["string", "null"]},
"header2": {"type": ["number", "null"]},
"header3": {"type": ["number", "null"]},
"header4": {"type": ["boolean", "null"]},
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,44 @@
"streams": [
{
"stream": {
"name": "my_own_data_sample/my_file.csv",
"name": "integrationTestFile",
"json_schema": {
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"date": {
"type": "string"
"type": ["string", "null"]
},
"key": {
"type": "string"
"type": ["string", "null"]
},
"total_confirmed": {
"type": "number"
"new_confirmed": {
"type": ["number", "null"]
},
"new_deceased": {
"type": ["number", "null"]
},
"new_recovered": {
"type": ["number", "null"]
},
"total_healed": {
"type": "number"
"new_tested": {
"type": ["number", "null"]
},
"total_confirmed": {
"type": ["number", "null"]
},
"total_deceased": {
"type": "number"
"type": ["number", "null"]
},
"total_recovered": {
"type": "number"
"type": ["number", "null"]
},
"total_tested": {
"type": "number"
"type": ["number", "null"]
}
}
}
},
"supported_sync_modes": ["full_refresh"]
},
"sync_mode": "full_refresh",
"destination_sync_mode": "overwrite"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path

import pytest
from base_python import AirbyteLogger
from airbyte_cdk import AirbyteLogger
from source_file import SourceFile
from source_file.client import Client

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"dataset_name": "fake_csv",
"format": "csv",
"reader_options": "{\"sep\": \",\", \"nrows\": 20}",
"url": "https://test.fakr.com/cfake_data.csv",
"provider": {
"storage": "HTTPS",
"reader_impl": "fake"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import sys

from base_python.entrypoint import launch
from airbyte_cdk.entrypoint import launch
from source_file import SourceFile

if __name__ == "__main__":
Expand Down
3 changes: 1 addition & 2 deletions airbyte-integrations/connectors/source-file/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# This file is autogenerated -- only edit if you know what you are doing. Use setup.py for declaring dependencies.
-e ../../bases/airbyte-protocol
-e ../../bases/base-python
-e ../../bases/source-acceptance-test
-e .
9 changes: 2 additions & 7 deletions airbyte-integrations/connectors/source-file/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
from setuptools import find_packages, setup

MAIN_REQUIREMENTS = [
"airbyte-protocol",
"base-python",
"airbyte-cdk~=0.1",
"gcsfs==0.7.1",
"genson==1.2.2",
"google-cloud-storage==1.35.0",
Expand All @@ -24,11 +23,7 @@
"pyxlsb==1.0.8",
]

TEST_REQUIREMENTS = [
"boto3==1.16.57",
"pytest==6.1.2",
"pytest-docker==0.10.1",
]
TEST_REQUIREMENTS = ["boto3==1.16.57", "pytest==6.1.2", "pytest-docker==0.10.1"]

setup(
name="source_file",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@
from urllib.parse import urlparse

import google
import numpy as np
import pandas as pd
import smart_open
from airbyte_protocol import AirbyteStream
from airbyte_cdk.entrypoint import logger
from airbyte_cdk.models import AirbyteStream, SyncMode
from azure.storage.blob import BlobServiceClient
from base_python.entrypoint import logger
from botocore import UNSIGNED
from botocore.config import Config
from genson import SchemaBuilder
Expand Down Expand Up @@ -339,7 +338,7 @@ def read(self, fields: Iterable = None) -> Iterable[dict]:
fields = set(fields) if fields else None
for df in self.load_dataframes(fp):
columns = fields.intersection(set(df.columns)) if fields else df.columns
df = df.replace(np.nan, "NaN", regex=True)
df = df.where(pd.notnull(df), None)
yield from df[columns].to_dict(orient="records")

def _stream_properties(self):
Expand All @@ -352,7 +351,7 @@ def _stream_properties(self):
for df in df_list:
for col in df.columns:
fields[col] = self.dtype_to_json_type(df[col].dtype)
return {field: {"type": fields[field]} for field in fields}
return {field: {"type": [fields[field], "null"]} for field in fields}

@property
def streams(self) -> Iterable:
Expand All @@ -363,4 +362,4 @@ def streams(self) -> Iterable:
"type": "object",
"properties": self._stream_properties(),
}
yield AirbyteStream(name=self.stream_name, json_schema=json_schema)
yield AirbyteStream(name=self.stream_name, json_schema=json_schema, supported_sync_modes=[SyncMode.full_refresh])
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from datetime import datetime
from typing import Generator, Iterable, Mapping

from airbyte_protocol import (
from airbyte_cdk import AirbyteLogger
from airbyte_cdk.models import (
AirbyteCatalog,
AirbyteConnectionStatus,
AirbyteMessage,
Expand All @@ -16,7 +17,7 @@
Status,
Type,
)
from base_python import AirbyteLogger, Source
from airbyte_cdk.sources import Source

from .client import Client

Expand Down
24 changes: 16 additions & 8 deletions airbyte-integrations/connectors/source-file/source_file/spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
"storage": {
"type": "string",
"enum": ["HTTPS"],
"default": "HTTPS"
"default": "HTTPS",
"const": "HTTPS"
}
}
},
Expand All @@ -50,7 +51,8 @@
"storage": {
"type": "string",
"enum": ["GCS"],
"default": "GCS"
"default": "GCS",
"const": "GCS"
},
"service_account_json": {
"type": "string",
Expand All @@ -65,7 +67,8 @@
"storage": {
"type": "string",
"enum": ["S3"],
"default": "S3"
"default": "S3",
"const": "S3"
},
"aws_access_key_id": {
"type": "string",
Expand All @@ -85,7 +88,8 @@
"storage": {
"type": "string",
"enum": ["AzBlob"],
"default": "AzBlob"
"default": "AzBlob",
"const": "AzBlob"
},
"storage_account": {
"type": "string",
Expand All @@ -110,7 +114,8 @@
"storage": {
"type": "string",
"enum": ["SSH"],
"default": "SSH"
"default": "SSH",
"const": "SSH"
},
"user": {
"type": "string"
Expand All @@ -135,7 +140,8 @@
"storage": {
"type": "string",
"enum": ["SCP"],
"default": "SCP"
"default": "SCP",
"const": "SCP"
},
"user": {
"type": "string"
Expand All @@ -160,7 +166,8 @@
"storage": {
"type": "string",
"enum": ["SFTP"],
"default": "SFTP"
"default": "SFTP",
"const": "SFTP"
},
"user": {
"type": "string"
Expand All @@ -186,7 +193,8 @@
"type": "string",
"description": "WARNING: Note that local storage URL available for read must start with the local mount \"/local/\" at the moment until we implement more advanced docker mounting options...",
"enum": ["local"],
"default": "local"
"default": "local",
"const": "local"
}
}
}
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/file.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ In order to read large files from a remote location, this connector uses the [sm

| Version | Date | Pull Request | Subject |
| :--- | :--- | :--- | :--- |
| 0.2.7 | 2021-10-28 | [7387](https://github.com/airbytehq/airbyte/pull/7387) | Migrate source to CDK structure, add SAT testing. |
| 0.2.6 | 2021-08-26 | [5613](https://github.com/airbytehq/airbyte/pull/5613) | Add support to xlsb format |
| 0.2.5 | 2021-07-26 | [4953](https://github.com/airbytehq/airbyte/pull/4953) | Allow non-default port for SFTP type |
| 0.2.4 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE\_ENTRYPOINT for Kubernetes support |
Expand Down
Loading
Loading