Skip to content

Commit

Permalink
🎉 Source File: Migrate File source to CDK structure (#7387)
Browse files Browse the repository at this point in the history
* Migrate File source to CDK structure

* fix .dockerignore file

* remove SAT requirements

* update Dockerfile

* change Dockerfile to base images python:3.7-slim

* add SAT tests

* update tests

* add secret/config.json for source-file

* update changelogs
  • Loading branch information
yevhenii-ldv committed Oct 29, 2021
1 parent cdb80f4 commit 269298c
Show file tree
Hide file tree
Showing 19 changed files with 134 additions and 75 deletions.
9 changes: 7 additions & 2 deletions airbyte-integrations/connectors/source-file/.dockerignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
build

*
!Dockerfile
!main.py
!source_file
!setup.py
!integration_tests
!secrets
15 changes: 7 additions & 8 deletions airbyte-integrations/connectors/source-file/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
FROM airbyte/integration-base-python:0.1.1
FROM python:3.7-slim

# Bash is installed for more convenient debugging.
RUN apt-get update && apt-get install -y jq curl bash && rm -rf /var/lib/apt/lists/*

ENV CODE_PATH="source_file"
ENV AIRBYTE_IMPL_MODULE="source_file"
ENV AIRBYTE_IMPL_PATH="SourceFile"

WORKDIR /airbyte/integration_code
COPY $CODE_PATH ./$CODE_PATH
COPY source_file ./source_file
COPY main.py ./
COPY setup.py ./
RUN pip install .

ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh"
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.6
LABEL io.airbyte.version=0.2.7
LABEL io.airbyte.name=airbyte/source-file
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference)
# for more information about how to configure these tests
connector_image: airbyte/source-file:dev
tests:
spec:
- spec_path: "source_file/spec.json"
connection:
- config_path: "integration_tests/config.json"
status: "succeed"
- config_path: "integration_tests/invalid_config.json"
status: "failed"
discovery:
- config_path: "integration_tests/config.json"
basic_read:
- config_path: "integration_tests/config.json"
configured_catalog_path: "integration_tests/configured_catalog.json"
full_refresh:
- config_path: "integration_tests/config.json"
configured_catalog_path: "integration_tests/configured_catalog.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env sh

# Build latest connector image
docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2)

# Pull latest acctest image
docker pull airbyte/source-acceptance-test:latest

# Run
docker run --rm -it \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /tmp:/tmp \
-v $(pwd):/test_input \
airbyte/source-acceptance-test \
--acceptance-test-config /test_input
24 changes: 1 addition & 23 deletions airbyte-integrations/connectors/source-file/build.gradle
Original file line number Diff line number Diff line change
@@ -1,31 +1,9 @@
import ru.vyarus.gradle.plugin.python.task.PythonTask

plugins {
id 'airbyte-python'
id 'airbyte-docker'
id 'airbyte-standard-source-test-file'
id 'airbyte-source-acceptance-test'
}

airbytePython {
moduleDirectory 'source_file'
}


airbyteStandardSourceTestFile {
specPath = "source_file/spec.json"
configPath = "integration_tests/config.json"
configuredCatalogPath = "integration_tests/configured_catalog.json"
}

task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs){
module = "pytest"
command = "-s integration_tests"
}

integrationTest.dependsOn("customIntegrationTestPython")


dependencies {
implementation files(project(':airbyte-integrations:bases:base-standard-source-test-file').airbyteDocker.outputs)
implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#


import pytest

pytest_plugins = ("source_acceptance_test.plugin",)


@pytest.fixture(scope="session", autouse=True)
def connector_setup():
"""This fixture is a placeholder for external resources that acceptance test might require."""
# TODO: setup test dependencies
yield
# TODO: clean up test dependencies
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ def test__streams_from_ssh_providers(provider_config, provider_name, file_path,
streams = list(client.streams)
assert len(streams) == 1
assert streams[0].json_schema["properties"] == {
"header1": {"type": "string"},
"header2": {"type": "number"},
"header3": {"type": "number"},
"header4": {"type": "boolean"},
"header1": {"type": ["string", "null"]},
"header2": {"type": ["number", "null"]},
"header3": {"type": ["number", "null"]},
"header4": {"type": ["boolean", "null"]},
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,44 @@
"streams": [
{
"stream": {
"name": "my_own_data_sample/my_file.csv",
"name": "integrationTestFile",
"json_schema": {
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"date": {
"type": "string"
"type": ["string", "null"]
},
"key": {
"type": "string"
"type": ["string", "null"]
},
"total_confirmed": {
"type": "number"
"new_confirmed": {
"type": ["number", "null"]
},
"new_deceased": {
"type": ["number", "null"]
},
"new_recovered": {
"type": ["number", "null"]
},
"total_healed": {
"type": "number"
"new_tested": {
"type": ["number", "null"]
},
"total_confirmed": {
"type": ["number", "null"]
},
"total_deceased": {
"type": "number"
"type": ["number", "null"]
},
"total_recovered": {
"type": "number"
"type": ["number", "null"]
},
"total_tested": {
"type": "number"
"type": ["number", "null"]
}
}
}
},
"supported_sync_modes": ["full_refresh"]
},
"sync_mode": "full_refresh",
"destination_sync_mode": "overwrite"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path

import pytest
from base_python import AirbyteLogger
from airbyte_cdk import AirbyteLogger
from source_file import SourceFile
from source_file.client import Client

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"dataset_name": "fake_csv",
"format": "csv",
"reader_options": "{\"sep\": \",\", \"nrows\": 20}",
"url": "https://test.fakr.com/cfake_data.csv",
"provider": {
"storage": "HTTPS",
"reader_impl": "fake"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import sys

from base_python.entrypoint import launch
from airbyte_cdk.entrypoint import launch
from source_file import SourceFile

if __name__ == "__main__":
Expand Down
3 changes: 1 addition & 2 deletions airbyte-integrations/connectors/source-file/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# This file is autogenerated -- only edit if you know what you are doing. Use setup.py for declaring dependencies.
-e ../../bases/airbyte-protocol
-e ../../bases/base-python
-e ../../bases/source-acceptance-test
-e .
9 changes: 2 additions & 7 deletions airbyte-integrations/connectors/source-file/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
from setuptools import find_packages, setup

MAIN_REQUIREMENTS = [
"airbyte-protocol",
"base-python",
"airbyte-cdk~=0.1",
"gcsfs==0.7.1",
"genson==1.2.2",
"google-cloud-storage==1.35.0",
Expand All @@ -24,11 +23,7 @@
"pyxlsb==1.0.8",
]

TEST_REQUIREMENTS = [
"boto3==1.16.57",
"pytest==6.1.2",
"pytest-docker==0.10.1",
]
TEST_REQUIREMENTS = ["boto3==1.16.57", "pytest==6.1.2", "pytest-docker==0.10.1"]

setup(
name="source_file",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@
from urllib.parse import urlparse

import google
import numpy as np
import pandas as pd
import smart_open
from airbyte_protocol import AirbyteStream
from airbyte_cdk.entrypoint import logger
from airbyte_cdk.models import AirbyteStream, SyncMode
from azure.storage.blob import BlobServiceClient
from base_python.entrypoint import logger
from botocore import UNSIGNED
from botocore.config import Config
from genson import SchemaBuilder
Expand Down Expand Up @@ -339,7 +338,7 @@ def read(self, fields: Iterable = None) -> Iterable[dict]:
fields = set(fields) if fields else None
for df in self.load_dataframes(fp):
columns = fields.intersection(set(df.columns)) if fields else df.columns
df = df.replace(np.nan, "NaN", regex=True)
df = df.where(pd.notnull(df), None)
yield from df[columns].to_dict(orient="records")

def _stream_properties(self):
Expand All @@ -352,7 +351,7 @@ def _stream_properties(self):
for df in df_list:
for col in df.columns:
fields[col] = self.dtype_to_json_type(df[col].dtype)
return {field: {"type": fields[field]} for field in fields}
return {field: {"type": [fields[field], "null"]} for field in fields}

@property
def streams(self) -> Iterable:
Expand All @@ -363,4 +362,4 @@ def streams(self) -> Iterable:
"type": "object",
"properties": self._stream_properties(),
}
yield AirbyteStream(name=self.stream_name, json_schema=json_schema)
yield AirbyteStream(name=self.stream_name, json_schema=json_schema, supported_sync_modes=[SyncMode.full_refresh])
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from datetime import datetime
from typing import Generator, Iterable, Mapping

from airbyte_protocol import (
from airbyte_cdk import AirbyteLogger
from airbyte_cdk.models import (
AirbyteCatalog,
AirbyteConnectionStatus,
AirbyteMessage,
Expand All @@ -16,7 +17,7 @@
Status,
Type,
)
from base_python import AirbyteLogger, Source
from airbyte_cdk.sources import Source

from .client import Client

Expand Down
24 changes: 16 additions & 8 deletions airbyte-integrations/connectors/source-file/source_file/spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
"storage": {
"type": "string",
"enum": ["HTTPS"],
"default": "HTTPS"
"default": "HTTPS",
"const": "HTTPS"
}
}
},
Expand All @@ -50,7 +51,8 @@
"storage": {
"type": "string",
"enum": ["GCS"],
"default": "GCS"
"default": "GCS",
"const": "GCS"
},
"service_account_json": {
"type": "string",
Expand All @@ -65,7 +67,8 @@
"storage": {
"type": "string",
"enum": ["S3"],
"default": "S3"
"default": "S3",
"const": "S3"
},
"aws_access_key_id": {
"type": "string",
Expand All @@ -85,7 +88,8 @@
"storage": {
"type": "string",
"enum": ["AzBlob"],
"default": "AzBlob"
"default": "AzBlob",
"const": "AzBlob"
},
"storage_account": {
"type": "string",
Expand All @@ -110,7 +114,8 @@
"storage": {
"type": "string",
"enum": ["SSH"],
"default": "SSH"
"default": "SSH",
"const": "SSH"
},
"user": {
"type": "string"
Expand All @@ -135,7 +140,8 @@
"storage": {
"type": "string",
"enum": ["SCP"],
"default": "SCP"
"default": "SCP",
"const": "SCP"
},
"user": {
"type": "string"
Expand All @@ -160,7 +166,8 @@
"storage": {
"type": "string",
"enum": ["SFTP"],
"default": "SFTP"
"default": "SFTP",
"const": "SFTP"
},
"user": {
"type": "string"
Expand All @@ -186,7 +193,8 @@
"type": "string",
"description": "WARNING: Note that local storage URL available for read must start with the local mount \"/local/\" at the moment until we implement more advanced docker mounting options...",
"enum": ["local"],
"default": "local"
"default": "local",
"const": "local"
}
}
}
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/file.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ In order to read large files from a remote location, this connector uses the [sm

| Version | Date | Pull Request | Subject |
| :--- | :--- | :--- | :--- |
| 0.2.7 | 2021-10-28 | [7387](https://github.com/airbytehq/airbyte/pull/7387) | Migrate source to CDK structure, add SAT testing. |
| 0.2.6 | 2021-08-26 | [5613](https://github.com/airbytehq/airbyte/pull/5613) | Add support to xlsb format |
| 0.2.5 | 2021-07-26 | [4953](https://github.com/airbytehq/airbyte/pull/4953) | Allow non-default port for SFTP type |
| 0.2.4 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE\_ENTRYPOINT for Kubernetes support |
Expand Down

0 comments on commit 269298c

Please sign in to comment.