Skip to content

Commit

Permalink
Release Source File (#3771)
Browse files Browse the repository at this point in the history
  • Loading branch information
davinchia committed Jun 1, 2021
1 parent 9360ed0 commit cd24ccd
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sourceDefinitionId": "778daa7c-feaf-4db6-96f3-70fd645acc77",
"name": "File",
"dockerRepository": "airbyte/source-file",
"dockerImageTag": "0.2.2",
"dockerImageTag": "0.2.3",
"documentationUrl": "https://hub.docker.com/r/airbyte/source-file",
"icon": "file.svg"
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
- sourceDefinitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77
name: File
dockerRepository: airbyte/source-file
dockerImageTag: 0.2.2
dockerImageTag: 0.2.3
documentationUrl: https://hub.docker.com/r/airbyte/source-file
icon: file.svg
- sourceDefinitionId: fdc8b827-3257-4b33-83cc-106d234c34d4
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-file/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ COPY $CODE_PATH ./$CODE_PATH
COPY setup.py ./
RUN pip install .

LABEL io.airbyte.version=0.2.2
LABEL io.airbyte.version=0.2.3
LABEL io.airbyte.name=airbyte/source-file
Original file line number Diff line number Diff line change
Expand Up @@ -146,19 +146,18 @@ def test__read_from_private_aws(aws_credentials, private_aws_file):
}
check_read(config)


def test__read_from_public_azblob(azblob_credentials, public_azblob_file):
config = {
"dataset_name": "output",
"format": "csv",
"url": public_azblob_file,
"reader_options": json.dumps({"sep": ",", "nrows": 42}),
"provider": {
"storage": "AzBlob",
"storage_account": azblob_credentials["storage_account"]
},
"provider": {"storage": "AzBlob", "storage_account": azblob_credentials["storage_account"]},
}
check_read(config)


def test__read_from_private_azblob_shared_key(azblob_credentials, private_azblob_file):
config = {
"dataset_name": "output",
Expand All @@ -168,11 +167,12 @@ def test__read_from_private_azblob_shared_key(azblob_credentials, private_azblob
"provider": {
"storage": "AzBlob",
"storage_account": azblob_credentials["storage_account"],
"shared_key": azblob_credentials["shared_key"]
"shared_key": azblob_credentials["shared_key"],
},
}
check_read(config)


def test__read_from_private_azblob_sas_token(azblob_credentials, private_azblob_file):
config = {
"dataset_name": "output",
Expand All @@ -182,7 +182,7 @@ def test__read_from_private_azblob_sas_token(azblob_credentials, private_azblob_
"provider": {
"storage": "AzBlob",
"storage_account": azblob_credentials["storage_account"],
"sas_token": azblob_credentials["sas_token"]
"sas_token": azblob_credentials["sas_token"],
},
}
check_read(config)
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@
import boto3
import pandas
import pytest
from azure.storage.blob import BlobServiceClient
from botocore.errorfactory import ClientError
from google.api_core.exceptions import Conflict
from google.cloud import storage
from paramiko.client import AutoAddPolicy, SSHClient
from paramiko.ssh_exception import SSHException
from azure.storage.blob import BlobServiceClient

HERE = Path(__file__).parent.absolute()

Expand Down Expand Up @@ -192,32 +192,35 @@ def private_aws_file(aws_credentials, cloud_bucket_name, download_gcs_public_dat
bucket.objects.all().delete()
print(f"\nS3 Bucket {bucket_name} is now deleted")


def azblob_file(azblob_credentials, cloud_bucket_name, download_gcs_public_data, public=False):
acc_url = f"https://{azblob_credentials['storage_account']}.blob.core.windows.net"
azblob_client = BlobServiceClient(account_url=acc_url, credential=azblob_credentials['shared_key'])
container_name = cloud_bucket_name
azblob_client = BlobServiceClient(account_url=acc_url, credential=azblob_credentials["shared_key"])
container_name = cloud_bucket_name
if public:
container_name += "public"
print(f"\nUpload dataset to private azure blob container {container_name}")
if container_name not in [cntr['name'] for cntr in azblob_client.list_containers()]:
if container_name not in [cntr["name"] for cntr in azblob_client.list_containers()]:
if public:
azblob_client.create_container(name=container_name, metadata=None, public_access='container')
azblob_client.create_container(name=container_name, metadata=None, public_access="container")
else:
azblob_client.create_container(name=container_name, metadata=None, public_access=None)
blob_client = azblob_client.get_blob_client(container_name, "myfile.csv")
with open(download_gcs_public_data, "r") as f:
blob_client.upload_blob(f.read(), blob_type='BlockBlob', overwrite=True)
blob_client.upload_blob(f.read(), blob_type="BlockBlob", overwrite=True)

yield f"{container_name}/myfile.csv"

azblob_client.delete_container(container_name)
print(f"\nAzure Blob Container {container_name} is now marked for deletion")


@pytest.fixture(scope="session")
def private_azblob_file(azblob_credentials, cloud_bucket_name, download_gcs_public_data):
for yld in azblob_file(azblob_credentials, cloud_bucket_name, download_gcs_public_data, public=False):
yield yld


@pytest.fixture(scope="session")
def public_azblob_file(azblob_credentials, cloud_bucket_name, download_gcs_public_data):
for yld in azblob_file(azblob_credentials, cloud_bucket_name, download_gcs_public_data, public=True):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@
import pandas as pd
import smart_open
from airbyte_protocol import AirbyteStream
from azure.storage.blob import BlobServiceClient
from base_python.entrypoint import logger
from botocore import UNSIGNED
from botocore.config import Config
from genson import SchemaBuilder
from google.cloud.storage import Client as GCSClient
from google.oauth2 import service_account
from azure.storage.blob import BlobServiceClient


class ConfigurationError(Exception):
Expand Down Expand Up @@ -223,7 +223,7 @@ def _open_azblob_url(self, binary):
else:
# assuming anonymous public read access given no credential
client = BlobServiceClient(account_url=storage_acc_url)

result = smart_open.open(f"{self.storage_scheme}{self.url}", transport_params=dict(client=client), mode=mode)
return result

Expand Down

0 comments on commit cd24ccd

Please sign in to comment.