diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/778daa7c-feaf-4db6-96f3-70fd645acc77.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/778daa7c-feaf-4db6-96f3-70fd645acc77.json index 21c7e9f5d006c9..3fd159d8ec1bae 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/778daa7c-feaf-4db6-96f3-70fd645acc77.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/778daa7c-feaf-4db6-96f3-70fd645acc77.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "778daa7c-feaf-4db6-96f3-70fd645acc77", "name": "File", "dockerRepository": "airbyte/source-file", - "dockerImageTag": "0.2.2", + "dockerImageTag": "0.2.3", "documentationUrl": "https://hub.docker.com/r/airbyte/source-file", "icon": "file.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 9e7c89faf1ff1a..986db7daa2898a 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -12,7 +12,7 @@ - sourceDefinitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77 name: File dockerRepository: airbyte/source-file - dockerImageTag: 0.2.2 + dockerImageTag: 0.2.3 documentationUrl: https://hub.docker.com/r/airbyte/source-file icon: file.svg - sourceDefinitionId: fdc8b827-3257-4b33-83cc-106d234c34d4 diff --git a/airbyte-integrations/connectors/source-file/Dockerfile b/airbyte-integrations/connectors/source-file/Dockerfile index 99e06e72481109..548c24b4ab9f74 100644 --- a/airbyte-integrations/connectors/source-file/Dockerfile +++ b/airbyte-integrations/connectors/source-file/Dockerfile @@ -11,5 +11,5 @@ COPY $CODE_PATH ./$CODE_PATH COPY setup.py ./ RUN pip install . -LABEL io.airbyte.version=0.2.2 +LABEL io.airbyte.version=0.2.3 LABEL io.airbyte.name=airbyte/source-file diff --git a/airbyte-integrations/connectors/source-file/integration_tests/client_storage_providers_test.py b/airbyte-integrations/connectors/source-file/integration_tests/client_storage_providers_test.py index 441cf59c8300eb..2b58231e9c23e4 100644 --- a/airbyte-integrations/connectors/source-file/integration_tests/client_storage_providers_test.py +++ b/airbyte-integrations/connectors/source-file/integration_tests/client_storage_providers_test.py @@ -146,19 +146,18 @@ def test__read_from_private_aws(aws_credentials, private_aws_file): } check_read(config) + def test__read_from_public_azblob(azblob_credentials, public_azblob_file): config = { "dataset_name": "output", "format": "csv", "url": public_azblob_file, "reader_options": json.dumps({"sep": ",", "nrows": 42}), - "provider": { - "storage": "AzBlob", - "storage_account": azblob_credentials["storage_account"] - }, + "provider": {"storage": "AzBlob", "storage_account": azblob_credentials["storage_account"]}, } check_read(config) + def test__read_from_private_azblob_shared_key(azblob_credentials, private_azblob_file): config = { "dataset_name": "output", @@ -168,11 +167,12 @@ def test__read_from_private_azblob_shared_key(azblob_credentials, private_azblob "provider": { "storage": "AzBlob", "storage_account": azblob_credentials["storage_account"], - "shared_key": azblob_credentials["shared_key"] + "shared_key": azblob_credentials["shared_key"], }, } check_read(config) + def test__read_from_private_azblob_sas_token(azblob_credentials, private_azblob_file): config = { "dataset_name": "output", @@ -182,7 +182,7 @@ def test__read_from_private_azblob_sas_token(azblob_credentials, private_azblob_ "provider": { "storage": "AzBlob", "storage_account": azblob_credentials["storage_account"], - "sas_token": azblob_credentials["sas_token"] + "sas_token": azblob_credentials["sas_token"], }, } check_read(config) diff --git a/airbyte-integrations/connectors/source-file/integration_tests/conftest.py b/airbyte-integrations/connectors/source-file/integration_tests/conftest.py index 18d025b11f51b1..bbc0d1828b02b2 100644 --- a/airbyte-integrations/connectors/source-file/integration_tests/conftest.py +++ b/airbyte-integrations/connectors/source-file/integration_tests/conftest.py @@ -34,12 +34,12 @@ import boto3 import pandas import pytest +from azure.storage.blob import BlobServiceClient from botocore.errorfactory import ClientError from google.api_core.exceptions import Conflict from google.cloud import storage from paramiko.client import AutoAddPolicy, SSHClient from paramiko.ssh_exception import SSHException -from azure.storage.blob import BlobServiceClient HERE = Path(__file__).parent.absolute() @@ -192,32 +192,35 @@ def private_aws_file(aws_credentials, cloud_bucket_name, download_gcs_public_dat bucket.objects.all().delete() print(f"\nS3 Bucket {bucket_name} is now deleted") + def azblob_file(azblob_credentials, cloud_bucket_name, download_gcs_public_data, public=False): acc_url = f"https://{azblob_credentials['storage_account']}.blob.core.windows.net" - azblob_client = BlobServiceClient(account_url=acc_url, credential=azblob_credentials['shared_key']) - container_name = cloud_bucket_name + azblob_client = BlobServiceClient(account_url=acc_url, credential=azblob_credentials["shared_key"]) + container_name = cloud_bucket_name if public: container_name += "public" print(f"\nUpload dataset to private azure blob container {container_name}") - if container_name not in [cntr['name'] for cntr in azblob_client.list_containers()]: + if container_name not in [cntr["name"] for cntr in azblob_client.list_containers()]: if public: - azblob_client.create_container(name=container_name, metadata=None, public_access='container') + azblob_client.create_container(name=container_name, metadata=None, public_access="container") else: azblob_client.create_container(name=container_name, metadata=None, public_access=None) blob_client = azblob_client.get_blob_client(container_name, "myfile.csv") with open(download_gcs_public_data, "r") as f: - blob_client.upload_blob(f.read(), blob_type='BlockBlob', overwrite=True) + blob_client.upload_blob(f.read(), blob_type="BlockBlob", overwrite=True) yield f"{container_name}/myfile.csv" azblob_client.delete_container(container_name) print(f"\nAzure Blob Container {container_name} is now marked for deletion") + @pytest.fixture(scope="session") def private_azblob_file(azblob_credentials, cloud_bucket_name, download_gcs_public_data): for yld in azblob_file(azblob_credentials, cloud_bucket_name, download_gcs_public_data, public=False): yield yld + @pytest.fixture(scope="session") def public_azblob_file(azblob_credentials, cloud_bucket_name, download_gcs_public_data): for yld in azblob_file(azblob_credentials, cloud_bucket_name, download_gcs_public_data, public=True): diff --git a/airbyte-integrations/connectors/source-file/source_file/client.py b/airbyte-integrations/connectors/source-file/source_file/client.py index f9e7bdb83dbe3f..08b7f3ff6bc257 100644 --- a/airbyte-integrations/connectors/source-file/source_file/client.py +++ b/airbyte-integrations/connectors/source-file/source_file/client.py @@ -33,13 +33,13 @@ import pandas as pd import smart_open from airbyte_protocol import AirbyteStream +from azure.storage.blob import BlobServiceClient from base_python.entrypoint import logger from botocore import UNSIGNED from botocore.config import Config from genson import SchemaBuilder from google.cloud.storage import Client as GCSClient from google.oauth2 import service_account -from azure.storage.blob import BlobServiceClient class ConfigurationError(Exception): @@ -223,7 +223,7 @@ def _open_azblob_url(self, binary): else: # assuming anonymous public read access given no credential client = BlobServiceClient(account_url=storage_acc_url) - + result = smart_open.open(f"{self.storage_scheme}{self.url}", transport_params=dict(client=client), mode=mode) return result