From 7208973cac39a40e9d65ba03b232862b9f9d5649 Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Tue, 16 Feb 2021 20:11:34 +0100 Subject: [PATCH 01/24] Add upload and download functions --- .../bucketfs_config.py | 14 +++++++ .../bucketfs_udf_utils.py | 27 ++++++++++++++ exasol_bucketfs_utils_python/download.py | 37 +++++++++++++++++++ exasol_bucketfs_utils_python/upload.py | 37 +++++++++++++++++++ pyproject.toml | 1 + setup.py | 2 +- 6 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 exasol_bucketfs_utils_python/bucketfs_config.py create mode 100644 exasol_bucketfs_utils_python/bucketfs_udf_utils.py create mode 100644 exasol_bucketfs_utils_python/download.py create mode 100644 exasol_bucketfs_utils_python/upload.py diff --git a/exasol_bucketfs_utils_python/bucketfs_config.py b/exasol_bucketfs_utils_python/bucketfs_config.py new file mode 100644 index 00000000..f0a338fc --- /dev/null +++ b/exasol_bucketfs_utils_python/bucketfs_config.py @@ -0,0 +1,14 @@ +class BucketFSCredentials: + def __init__(self, host="localhost", port=6666, user="w", pwd="write"): + self.host = host + self.port = port + self.user = user + self.pwd = pwd + + +class BucketFsConfig: + def __init__(self, credentials: BucketFSCredentials, bucket="default", bucketfs_name="bfsdefault", is_https=False): + self.is_https = is_https + self.credentials = credentials + self.bucket = bucket + self.bucketfs_name = bucketfs_name \ No newline at end of file diff --git a/exasol_bucketfs_utils_python/bucketfs_udf_utils.py b/exasol_bucketfs_utils_python/bucketfs_udf_utils.py new file mode 100644 index 00000000..7c90e438 --- /dev/null +++ b/exasol_bucketfs_utils_python/bucketfs_udf_utils.py @@ -0,0 +1,27 @@ +from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig + + +def get_bucketfs_udf_path(bucketfs_config: BucketFsConfig, file_name: str): + archive_extensions = [".tar.gz", ".tar.bz2", ".zip", ".tar"] + for extension in archive_extensions: + if file_name.endswith(extension): + file_name = file_name[:-len(extension)] + break + path = f"/buckets/{bucketfs_config.bucketfs_name}/{bucketfs_config.bucket}/{file_name}" + return path + + +def generate_bucketfs_url(bucketfs_config: BucketFsConfig, file_name: str, with_credentials: bool = True): + if with_credentials: + credentials = f"{bucketfs_config.credentials.user}:{bucketfs_config.credentials.pwd}@" + else: + credentials = "" + if bucketfs_config.is_https: + protocol = "https" + else: + protocol = "http" + url = f"{protocol}://{credentials}" \ + f"{bucketfs_config.credentials.host}:{bucketfs_config.credentials.port}/{bucketfs_config.bucket}/" + if file_name is not None: + url += f"{file_name}" + return url \ No newline at end of file diff --git a/exasol_bucketfs_utils_python/download.py b/exasol_bucketfs_utils_python/download.py new file mode 100644 index 00000000..15304f48 --- /dev/null +++ b/exasol_bucketfs_utils_python/download.py @@ -0,0 +1,37 @@ +import os +import uuid +from pathlib import Path + +import requests +import joblib + +from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig +from exasol_bucketfs_utils_python.bucketfs_udf_utils import generate_bucketfs_url + + +def download_from_bucketfs_to_file(bucketfs_config: BucketFsConfig, file_name: str, file_path: Path): + url = generate_bucketfs_url(bucketfs_config, file_name) + with requests.get(url, stream=True) as response: + response.raise_for_status() + with file_path.open("wb") as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + +def download_from_bucketfs_to_string(bucketfs_config: BucketFsConfig, file_name: str)->str: + url = generate_bucketfs_url(bucketfs_config, file_name) + response = requests.get(url) + response.raise_for_status() + return response.text + +def download_object_from_bucketfs_via_joblib(object, bucketfs_config: BucketFsConfig, file_name: str, compress=True): + temp_file = Path("/tmp/" + str(uuid.uuid4().hex + ".pkl")) + try: + joblib.dump(object, str(temp_file), compress=compress) + download_from_bucketfs_to_file(bucketfs_config, file_name, temp_file) + object = joblib.load(temp_file) + return object + finally: + try: + os.remove(temp_file) + except OSError: + pass \ No newline at end of file diff --git a/exasol_bucketfs_utils_python/upload.py b/exasol_bucketfs_utils_python/upload.py new file mode 100644 index 00000000..1539fa5d --- /dev/null +++ b/exasol_bucketfs_utils_python/upload.py @@ -0,0 +1,37 @@ +import os +import uuid +from pathlib import Path + +import joblib +import requests + +from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig +from exasol_bucketfs_utils_python.bucketfs_udf_utils import generate_bucketfs_url, get_bucketfs_udf_path + + +def upload_file_to_bucketfs(bucketfs_config: BucketFsConfig, file_name: str, file_path: Path): + with file_path.open("rb") as f: + url = generate_bucketfs_url(bucketfs_config, file_name) + response = requests.put(url, data=f) + response.raise_for_status() + path = get_bucketfs_udf_path(bucketfs_config, file_name) + return url, path + + +def upload_string_to_bucketfs(bucketfs_config: BucketFsConfig, file_name: str, string: str): + url = generate_bucketfs_url(bucketfs_config, file_name) + response = requests.put(url, data=string.encode("UTF-8")) + response.raise_for_status() + path = get_bucketfs_udf_path(bucketfs_config, file_name) + return url, path + +def upload_object_to_bucketfs_via_joblib(object, bucketfs_config: BucketFsConfig, file_name: str, compress=True): + temp_file = Path("/tmp/" + str(uuid.uuid4().hex + ".pkl")) + try: + joblib.dump(object, str(temp_file), compress=compress) + upload_file_to_bucketfs(bucketfs_config, file_name, temp_file) + finally: + try: + os.remove(temp_file) + except OSError: + pass \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 7a40e031..3e8ab0dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ keywords = ['exasol', 'bucketfs'] [tool.poetry.dependencies] python = ">=3.6.1" requests = "^2.24.0" +joblib="^1.0.1" [tool.poetry.dev-dependencies] pytest = "^6.1.1" diff --git a/setup.py b/setup.py index 6a9bc01d..f4f0fe12 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ {'': ['*']} install_requires = \ -['requests>=2.24.0,<3.0.0'] +['joblib>=1.0.1,<2.0.0', 'requests>=2.24.0,<3.0.0'] setup_kwargs = { 'name': 'exasol-bucketfs-utils-python', From 536798788ca05033aab41e2efadef717f0adf808 Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Tue, 16 Feb 2021 21:09:58 +0100 Subject: [PATCH 02/24] Refactor out upload and download on fileobj and use NamedTemporaryFile for temp files --- exasol_bucketfs_utils_python/download.py | 33 +++++++++++----------- exasol_bucketfs_utils_python/upload.py | 36 +++++++++++++++--------- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/exasol_bucketfs_utils_python/download.py b/exasol_bucketfs_utils_python/download.py index 15304f48..452775fe 100644 --- a/exasol_bucketfs_utils_python/download.py +++ b/exasol_bucketfs_utils_python/download.py @@ -1,37 +1,38 @@ -import os -import uuid +import typing from pathlib import Path +from tempfile import NamedTemporaryFile -import requests import joblib +import requests from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig from exasol_bucketfs_utils_python.bucketfs_udf_utils import generate_bucketfs_url def download_from_bucketfs_to_file(bucketfs_config: BucketFsConfig, file_name: str, file_path: Path): + with file_path.open("wb") as f: + download_from_bucketfs_to_fileobj(bucketfs_config, file_name, f) + + +def download_from_bucketfs_to_fileobj(bucketfs_config: BucketFsConfig, file_name: str, fileobj: typing.IO): url = generate_bucketfs_url(bucketfs_config, file_name) with requests.get(url, stream=True) as response: response.raise_for_status() - with file_path.open("wb") as f: - for chunk in response.iter_content(chunk_size=8192): - f.write(chunk) + for chunk in response.iter_content(chunk_size=8192): + fileobj.write(chunk) + -def download_from_bucketfs_to_string(bucketfs_config: BucketFsConfig, file_name: str)->str: +def download_from_bucketfs_to_string(bucketfs_config: BucketFsConfig, file_name: str) -> str: url = generate_bucketfs_url(bucketfs_config, file_name) response = requests.get(url) response.raise_for_status() return response.text + def download_object_from_bucketfs_via_joblib(object, bucketfs_config: BucketFsConfig, file_name: str, compress=True): - temp_file = Path("/tmp/" + str(uuid.uuid4().hex + ".pkl")) - try: - joblib.dump(object, str(temp_file), compress=compress) - download_from_bucketfs_to_file(bucketfs_config, file_name, temp_file) + with NamedTemporaryFile() as temp_file: + download_from_bucketfs_to_fileobj(bucketfs_config, file_name, temp_file) + temp_file.flush() + temp_file.seek(0) object = joblib.load(temp_file) return object - finally: - try: - os.remove(temp_file) - except OSError: - pass \ No newline at end of file diff --git a/exasol_bucketfs_utils_python/upload.py b/exasol_bucketfs_utils_python/upload.py index 1539fa5d..1561b217 100644 --- a/exasol_bucketfs_utils_python/upload.py +++ b/exasol_bucketfs_utils_python/upload.py @@ -1,6 +1,7 @@ import os -import uuid +import typing from pathlib import Path +from tempfile import NamedTemporaryFile import joblib import requests @@ -11,11 +12,15 @@ def upload_file_to_bucketfs(bucketfs_config: BucketFsConfig, file_name: str, file_path: Path): with file_path.open("rb") as f: - url = generate_bucketfs_url(bucketfs_config, file_name) - response = requests.put(url, data=f) - response.raise_for_status() - path = get_bucketfs_udf_path(bucketfs_config, file_name) - return url, path + upload_fileobj_to_bucketfs(bucketfs_config, file_name, f) + + +def upload_fileobj_to_bucketfs(bucketfs_config: BucketFsConfig, file_name: str, fileobj: typing.IO): + url = generate_bucketfs_url(bucketfs_config, file_name) + response = requests.put(url, data=fileobj) + response.raise_for_status() + path = get_bucketfs_udf_path(bucketfs_config, file_name) + return url, path def upload_string_to_bucketfs(bucketfs_config: BucketFsConfig, file_name: str, string: str): @@ -25,13 +30,16 @@ def upload_string_to_bucketfs(bucketfs_config: BucketFsConfig, file_name: str, s path = get_bucketfs_udf_path(bucketfs_config, file_name) return url, path + def upload_object_to_bucketfs_via_joblib(object, bucketfs_config: BucketFsConfig, file_name: str, compress=True): - temp_file = Path("/tmp/" + str(uuid.uuid4().hex + ".pkl")) - try: - joblib.dump(object, str(temp_file), compress=compress) - upload_file_to_bucketfs(bucketfs_config, file_name, temp_file) - finally: + with NamedTemporaryFile() as temp_file: try: - os.remove(temp_file) - except OSError: - pass \ No newline at end of file + joblib.dump(object, temp_file.name, compress=compress) + temp_file.flush() + temp_file.seek(0) + upload_fileobj_to_bucketfs(bucketfs_config, file_name, temp_file) + finally: + try: + os.remove(temp_file.name) + except OSError: + pass From 69803502c6ebbbffab8e09aca27e07551fbb02f2 Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Tue, 16 Feb 2021 21:12:48 +0100 Subject: [PATCH 03/24] Cleanup --- exasol_bucketfs_utils_python/upload.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/exasol_bucketfs_utils_python/upload.py b/exasol_bucketfs_utils_python/upload.py index 1561b217..4128c5db 100644 --- a/exasol_bucketfs_utils_python/upload.py +++ b/exasol_bucketfs_utils_python/upload.py @@ -33,13 +33,7 @@ def upload_string_to_bucketfs(bucketfs_config: BucketFsConfig, file_name: str, s def upload_object_to_bucketfs_via_joblib(object, bucketfs_config: BucketFsConfig, file_name: str, compress=True): with NamedTemporaryFile() as temp_file: - try: - joblib.dump(object, temp_file.name, compress=compress) - temp_file.flush() - temp_file.seek(0) - upload_fileobj_to_bucketfs(bucketfs_config, file_name, temp_file) - finally: - try: - os.remove(temp_file.name) - except OSError: - pass + joblib.dump(object, temp_file.name, compress=compress) + temp_file.flush() + temp_file.seek(0) + upload_fileobj_to_bucketfs(bucketfs_config, file_name, temp_file) From 49301066e7cd103aa1987e55314c5c9b75b1716b Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Wed, 17 Feb 2021 09:24:30 +0100 Subject: [PATCH 04/24] Improve parameter names and add tests --- .../bucketfs_config.py | 4 +- exasol_bucketfs_utils_python/download.py | 14 ++-- exasol_bucketfs_utils_python/upload.py | 22 ++--- tests/test_upload_download.py | 80 +++++++++++++++++++ 4 files changed, 100 insertions(+), 20 deletions(-) create mode 100644 tests/test_upload_download.py diff --git a/exasol_bucketfs_utils_python/bucketfs_config.py b/exasol_bucketfs_utils_python/bucketfs_config.py index f0a338fc..e8dcea38 100644 --- a/exasol_bucketfs_utils_python/bucketfs_config.py +++ b/exasol_bucketfs_utils_python/bucketfs_config.py @@ -1,5 +1,5 @@ class BucketFSCredentials: - def __init__(self, host="localhost", port=6666, user="w", pwd="write"): + def __init__(self, host, port, user, pwd): self.host = host self.port = port self.user = user @@ -7,7 +7,7 @@ def __init__(self, host="localhost", port=6666, user="w", pwd="write"): class BucketFsConfig: - def __init__(self, credentials: BucketFSCredentials, bucket="default", bucketfs_name="bfsdefault", is_https=False): + def __init__(self, credentials: BucketFSCredentials, bucket, bucketfs_name, is_https=False): self.is_https = is_https self.credentials = credentials self.bucket = bucket diff --git a/exasol_bucketfs_utils_python/download.py b/exasol_bucketfs_utils_python/download.py index 452775fe..0b793e56 100644 --- a/exasol_bucketfs_utils_python/download.py +++ b/exasol_bucketfs_utils_python/download.py @@ -9,21 +9,21 @@ from exasol_bucketfs_utils_python.bucketfs_udf_utils import generate_bucketfs_url -def download_from_bucketfs_to_file(bucketfs_config: BucketFsConfig, file_name: str, file_path: Path): - with file_path.open("wb") as f: - download_from_bucketfs_to_fileobj(bucketfs_config, file_name, f) +def download_from_bucketfs_to_file(bucketfs_config: BucketFsConfig, bucket_file_path: str, local_file_path: Path): + with local_file_path.open("wb") as f: + download_from_bucketfs_to_fileobj(bucketfs_config, bucket_file_path, f) -def download_from_bucketfs_to_fileobj(bucketfs_config: BucketFsConfig, file_name: str, fileobj: typing.IO): - url = generate_bucketfs_url(bucketfs_config, file_name) +def download_from_bucketfs_to_fileobj(bucketfs_config: BucketFsConfig, bucket_file_path: str, fileobj: typing.IO): + url = generate_bucketfs_url(bucketfs_config, bucket_file_path) with requests.get(url, stream=True) as response: response.raise_for_status() for chunk in response.iter_content(chunk_size=8192): fileobj.write(chunk) -def download_from_bucketfs_to_string(bucketfs_config: BucketFsConfig, file_name: str) -> str: - url = generate_bucketfs_url(bucketfs_config, file_name) +def download_from_bucketfs_to_string(bucketfs_config: BucketFsConfig, bucket_file_path: str) -> str: + url = generate_bucketfs_url(bucketfs_config, bucket_file_path) response = requests.get(url) response.raise_for_status() return response.text diff --git a/exasol_bucketfs_utils_python/upload.py b/exasol_bucketfs_utils_python/upload.py index 4128c5db..f47f78ac 100644 --- a/exasol_bucketfs_utils_python/upload.py +++ b/exasol_bucketfs_utils_python/upload.py @@ -10,30 +10,30 @@ from exasol_bucketfs_utils_python.bucketfs_udf_utils import generate_bucketfs_url, get_bucketfs_udf_path -def upload_file_to_bucketfs(bucketfs_config: BucketFsConfig, file_name: str, file_path: Path): - with file_path.open("rb") as f: - upload_fileobj_to_bucketfs(bucketfs_config, file_name, f) +def upload_file_to_bucketfs(bucketfs_config: BucketFsConfig, bucket_file_path: str, local_file_path: Path): + with local_file_path.open("rb") as f: + upload_fileobj_to_bucketfs(bucketfs_config, bucket_file_path, f) -def upload_fileobj_to_bucketfs(bucketfs_config: BucketFsConfig, file_name: str, fileobj: typing.IO): - url = generate_bucketfs_url(bucketfs_config, file_name) +def upload_fileobj_to_bucketfs(bucketfs_config: BucketFsConfig, bucket_file_path: str, fileobj: typing.IO): + url = generate_bucketfs_url(bucketfs_config, bucket_file_path) response = requests.put(url, data=fileobj) response.raise_for_status() - path = get_bucketfs_udf_path(bucketfs_config, file_name) + path = get_bucketfs_udf_path(bucketfs_config, bucket_file_path) return url, path -def upload_string_to_bucketfs(bucketfs_config: BucketFsConfig, file_name: str, string: str): - url = generate_bucketfs_url(bucketfs_config, file_name) +def upload_string_to_bucketfs(bucketfs_config: BucketFsConfig, bucket_file_path: str, string: str): + url = generate_bucketfs_url(bucketfs_config, bucket_file_path) response = requests.put(url, data=string.encode("UTF-8")) response.raise_for_status() - path = get_bucketfs_udf_path(bucketfs_config, file_name) + path = get_bucketfs_udf_path(bucketfs_config, bucket_file_path) return url, path -def upload_object_to_bucketfs_via_joblib(object, bucketfs_config: BucketFsConfig, file_name: str, compress=True): +def upload_object_to_bucketfs_via_joblib(object, bucketfs_config: BucketFsConfig, bucket_file_path: str, compress=True): with NamedTemporaryFile() as temp_file: joblib.dump(object, temp_file.name, compress=compress) temp_file.flush() temp_file.seek(0) - upload_fileobj_to_bucketfs(bucketfs_config, file_name, temp_file) + upload_fileobj_to_bucketfs(bucketfs_config, bucket_file_path, temp_file) diff --git a/tests/test_upload_download.py b/tests/test_upload_download.py new file mode 100644 index 00000000..02b61c12 --- /dev/null +++ b/tests/test_upload_download.py @@ -0,0 +1,80 @@ +from pathlib import Path +from tempfile import NamedTemporaryFile + +from exasol_bucketfs_utils_python import upload, download +from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig, BucketFSCredentials + + +def test_file_upload_download(): + bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") + bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, + bucket="default", + bucketfs_name="bfsdefault", + is_https=False) + with NamedTemporaryFile() as input_temp_file: + test_byte_string = b"test_byte_string" + input_temp_file.write(test_byte_string) + input_temp_file.flush() + + path_in_bucket = "path/in/bucket/file.txt" + upload.upload_file_to_bucketfs( + bucketfs_config=bucketfs_config, + bucket_file_path=path_in_bucket, + local_file_path=Path(input_temp_file.name)) + + with NamedTemporaryFile() as output_temp_file: + download.download_from_bucketfs_to_file( + bucketfs_config=bucketfs_config, + bucket_file_path=path_in_bucket, + local_file_path=Path(output_temp_file.name)) + output_test_byte_string = output_temp_file.read() + assert test_byte_string == output_test_byte_string + + +def test_fileobj_upload_download(): + bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") + bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, + bucket="default", + bucketfs_name="bfsdefault", + is_https=False) + with NamedTemporaryFile() as input_temp_file: + test_byte_string = b"test_byte_string" + input_temp_file.write(test_byte_string) + input_temp_file.flush() + input_temp_file.seek(0) + path_in_bucket = "path/in/bucket/file.txt" + upload.upload_fileobj_to_bucketfs( + bucketfs_config=bucketfs_config, + bucket_file_path=path_in_bucket, + fileobj=input_temp_file) + + with NamedTemporaryFile() as output_temp_file: + download.download_from_bucketfs_to_fileobj( + bucketfs_config=bucketfs_config, + bucket_file_path=path_in_bucket, + fileobj=output_temp_file) + output_temp_file.flush() + output_temp_file.seek(0) + output_test_byte_string = output_temp_file.read() + assert test_byte_string == output_test_byte_string + + +def test_string_upload_download(): + bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") + bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, + bucket="default", + bucketfs_name="bfsdefault", + is_https=False) + test_string = "test_string" + path_in_bucket = "path/in/bucket/file.txt" + upload.upload_string_to_bucketfs( + bucketfs_config=bucketfs_config, + bucket_file_path=path_in_bucket, + string=test_string) + + output_test_string = \ + download.download_from_bucketfs_to_string( + bucketfs_config=bucketfs_config, + bucket_file_path=path_in_bucket) + + assert test_string == output_test_string \ No newline at end of file From c02a252b56a2bbe150f4b751e4616d0d9a809f59 Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Wed, 17 Feb 2021 10:14:44 +0100 Subject: [PATCH 05/24] Add tests for bucketfs_utils.py and some fixes --- ...ucketfs_udf_utils.py => bucketfs_utils.py} | 20 +++-- exasol_bucketfs_utils_python/download.py | 13 ++- exasol_bucketfs_utils_python/upload.py | 14 +++- tests/test_bucketfs_utils.py | 79 +++++++++++++++++++ 4 files changed, 112 insertions(+), 14 deletions(-) rename exasol_bucketfs_utils_python/{bucketfs_udf_utils.py => bucketfs_utils.py} (51%) create mode 100644 tests/test_bucketfs_utils.py diff --git a/exasol_bucketfs_utils_python/bucketfs_udf_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py similarity index 51% rename from exasol_bucketfs_utils_python/bucketfs_udf_utils.py rename to exasol_bucketfs_utils_python/bucketfs_utils.py index 7c90e438..d35b0cbc 100644 --- a/exasol_bucketfs_utils_python/bucketfs_udf_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -1,17 +1,21 @@ from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig -def get_bucketfs_udf_path(bucketfs_config: BucketFsConfig, file_name: str): +def get_bucketfs_udf_path(bucketfs_config: BucketFsConfig, path_in_bucket: str): archive_extensions = [".tar.gz", ".tar.bz2", ".zip", ".tar"] + path_prefix = "" for extension in archive_extensions: - if file_name.endswith(extension): - file_name = file_name[:-len(extension)] + if path_in_bucket.endswith(extension): + path_in_bucket = path_in_bucket[:-len(extension)] + path_prefix = "/" break - path = f"/buckets/{bucketfs_config.bucketfs_name}/{bucketfs_config.bucket}/{file_name}" + if path_in_bucket.startswith("/"): + path_in_bucket = path_in_bucket[1:] + path = f"/buckets/{bucketfs_config.bucketfs_name}/{bucketfs_config.bucket}/{path_in_bucket}{path_prefix}" return path -def generate_bucketfs_url(bucketfs_config: BucketFsConfig, file_name: str, with_credentials: bool = True): +def generate_bucketfs_url(bucketfs_config: BucketFsConfig, path_in_bucket: str, with_credentials: bool = False): if with_credentials: credentials = f"{bucketfs_config.credentials.user}:{bucketfs_config.credentials.pwd}@" else: @@ -22,6 +26,8 @@ def generate_bucketfs_url(bucketfs_config: BucketFsConfig, file_name: str, with_ protocol = "http" url = f"{protocol}://{credentials}" \ f"{bucketfs_config.credentials.host}:{bucketfs_config.credentials.port}/{bucketfs_config.bucket}/" - if file_name is not None: - url += f"{file_name}" + if path_in_bucket is not None: + if path_in_bucket.startswith("/"): + path_in_bucket = path_in_bucket[1:] + url += f"{path_in_bucket}" return url \ No newline at end of file diff --git a/exasol_bucketfs_utils_python/download.py b/exasol_bucketfs_utils_python/download.py index 0b793e56..15dabad4 100644 --- a/exasol_bucketfs_utils_python/download.py +++ b/exasol_bucketfs_utils_python/download.py @@ -4,9 +4,10 @@ import joblib import requests +from requests.auth import HTTPBasicAuth from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig -from exasol_bucketfs_utils_python.bucketfs_udf_utils import generate_bucketfs_url +from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucketfs_url def download_from_bucketfs_to_file(bucketfs_config: BucketFsConfig, bucket_file_path: str, local_file_path: Path): @@ -16,7 +17,10 @@ def download_from_bucketfs_to_file(bucketfs_config: BucketFsConfig, bucket_file_ def download_from_bucketfs_to_fileobj(bucketfs_config: BucketFsConfig, bucket_file_path: str, fileobj: typing.IO): url = generate_bucketfs_url(bucketfs_config, bucket_file_path) - with requests.get(url, stream=True) as response: + auth = HTTPBasicAuth( + bucketfs_config.credentials.user, + bucketfs_config.credentials.pwd) + with requests.get(url, stream=True, auth=auth) as response: response.raise_for_status() for chunk in response.iter_content(chunk_size=8192): fileobj.write(chunk) @@ -24,7 +28,10 @@ def download_from_bucketfs_to_fileobj(bucketfs_config: BucketFsConfig, bucket_fi def download_from_bucketfs_to_string(bucketfs_config: BucketFsConfig, bucket_file_path: str) -> str: url = generate_bucketfs_url(bucketfs_config, bucket_file_path) - response = requests.get(url) + auth = HTTPBasicAuth( + bucketfs_config.credentials.user, + bucketfs_config.credentials.pwd) + response = requests.get(url,auth=auth) response.raise_for_status() return response.text diff --git a/exasol_bucketfs_utils_python/upload.py b/exasol_bucketfs_utils_python/upload.py index f47f78ac..17337723 100644 --- a/exasol_bucketfs_utils_python/upload.py +++ b/exasol_bucketfs_utils_python/upload.py @@ -1,13 +1,13 @@ -import os import typing from pathlib import Path from tempfile import NamedTemporaryFile import joblib import requests +from requests.auth import HTTPBasicAuth from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig -from exasol_bucketfs_utils_python.bucketfs_udf_utils import generate_bucketfs_url, get_bucketfs_udf_path +from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucketfs_url, get_bucketfs_udf_path def upload_file_to_bucketfs(bucketfs_config: BucketFsConfig, bucket_file_path: str, local_file_path: Path): @@ -17,7 +17,10 @@ def upload_file_to_bucketfs(bucketfs_config: BucketFsConfig, bucket_file_path: s def upload_fileobj_to_bucketfs(bucketfs_config: BucketFsConfig, bucket_file_path: str, fileobj: typing.IO): url = generate_bucketfs_url(bucketfs_config, bucket_file_path) - response = requests.put(url, data=fileobj) + auth = HTTPBasicAuth( + bucketfs_config.credentials.user, + bucketfs_config.credentials.pwd) + response = requests.put(url, data=fileobj, auth=auth) response.raise_for_status() path = get_bucketfs_udf_path(bucketfs_config, bucket_file_path) return url, path @@ -25,7 +28,10 @@ def upload_fileobj_to_bucketfs(bucketfs_config: BucketFsConfig, bucket_file_path def upload_string_to_bucketfs(bucketfs_config: BucketFsConfig, bucket_file_path: str, string: str): url = generate_bucketfs_url(bucketfs_config, bucket_file_path) - response = requests.put(url, data=string.encode("UTF-8")) + auth = HTTPBasicAuth( + bucketfs_config.credentials.user, + bucketfs_config.credentials.pwd) + response = requests.put(url, data=string.encode("UTF-8"), auth=auth) response.raise_for_status() path = get_bucketfs_udf_path(bucketfs_config, bucket_file_path) return url, path diff --git a/tests/test_bucketfs_utils.py b/tests/test_bucketfs_utils.py new file mode 100644 index 00000000..8b18e1a4 --- /dev/null +++ b/tests/test_bucketfs_utils.py @@ -0,0 +1,79 @@ +import pytest + +from exasol_bucketfs_utils_python import bucketfs_utils +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSCredentials, BucketFsConfig + + +def test_get_bucketfs_udf_path_non_archive_file(): + bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") + bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, + bucket="default", + bucketfs_name="bfsdefault", + is_https=False) + udf_path = bucketfs_utils.get_bucketfs_udf_path( + bucketfs_config=bucketfs_config, + path_in_bucket="path/in/bucket/test_file.txt" + ) + assert udf_path == "/buckets/bfsdefault/default/path/in/bucket/test_file.txt" + +def test_get_bucketfs_udf_path_trailing_slash(): + bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") + bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, + bucket="default", + bucketfs_name="bfsdefault", + is_https=False) + udf_path = bucketfs_utils.get_bucketfs_udf_path( + bucketfs_config=bucketfs_config, + path_in_bucket="/path/in/bucket/test_file.txt" + ) + assert udf_path == "/buckets/bfsdefault/default/path/in/bucket/test_file.txt" + +@pytest.mark.parametrize("extension",["tar.gz","zip","tar.bz2","tar"]) +def test_get_bucketfs_udf_path_archive_tar(extension): + bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") + bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, + bucket="default", + bucketfs_name="bfsdefault", + is_https=False) + udf_path = bucketfs_utils.get_bucketfs_udf_path( + bucketfs_config=bucketfs_config, + path_in_bucket=f"path/in/bucket/test_file.{extension}" + ) + assert udf_path == "/buckets/bfsdefault/default/path/in/bucket/test_file/" + +def test_generate_bucketfs_url_file(): + bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") + bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, + bucket="default", + bucketfs_name="bfsdefault", + is_https=False) + udf_path = bucketfs_utils.generate_bucketfs_url( + bucketfs_config=bucketfs_config, + path_in_bucket="path/in/bucket/test_file.txt" + ) + assert udf_path == "http://localhost:6666/default/path/in/bucket/test_file.txt" + +def test_generate_bucketfs_url_file_trailing_slash(): + bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") + bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, + bucket="default", + bucketfs_name="bfsdefault", + is_https=False) + udf_path = bucketfs_utils.generate_bucketfs_url( + bucketfs_config=bucketfs_config, + path_in_bucket="/path/in/bucket/test_file.txt" + ) + assert udf_path == "http://localhost:6666/default/path/in/bucket/test_file.txt" + +def test_generate_bucketfs_url_file_with_credentialsh(): + bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") + bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, + bucket="default", + bucketfs_name="bfsdefault", + is_https=False) + udf_path = bucketfs_utils.generate_bucketfs_url( + bucketfs_config=bucketfs_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path == "http://w:write@localhost:6666/default/path/in/bucket/test_file.txt" \ No newline at end of file From 5e625b40bb78ad7b2be9d31c003fa5ca5c5c0b8d Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Tue, 2 Mar 2021 13:26:26 +0100 Subject: [PATCH 06/24] Refactoring --- .../bucketfs_config.py | 24 ++-- .../bucketfs_utils.py | 64 ++++++++--- exasol_bucketfs_utils_python/download.py | 36 +++--- exasol_bucketfs_utils_python/upload.py | 34 +++--- tests/test_bucketfs_utils.py | 103 +++++++++--------- tests/test_upload_download.py | 43 ++++---- 6 files changed, 168 insertions(+), 136 deletions(-) diff --git a/exasol_bucketfs_utils_python/bucketfs_config.py b/exasol_bucketfs_utils_python/bucketfs_config.py index e8dcea38..55da62ae 100644 --- a/exasol_bucketfs_utils_python/bucketfs_config.py +++ b/exasol_bucketfs_utils_python/bucketfs_config.py @@ -1,5 +1,9 @@ -class BucketFSCredentials: - def __init__(self, host, port, user, pwd): +from typing import Union + + +class BucketFSConnectionConfig: + def __init__(self, host: str, port: str, user: str, pwd: str, is_https=False): + self.is_https = is_https self.host = host self.port = port self.user = user @@ -7,8 +11,14 @@ def __init__(self, host, port, user, pwd): class BucketFsConfig: - def __init__(self, credentials: BucketFSCredentials, bucket, bucketfs_name, is_https=False): - self.is_https = is_https - self.credentials = credentials - self.bucket = bucket - self.bucketfs_name = bucketfs_name \ No newline at end of file + def __init__(self, bucketfs_name: str, connection_config: Union[BucketFSConnectionConfig, None] = None): + self.connection_config = connection_config + self.bucketfs_name = bucketfs_name + + +class BucketConfig: + def __init__(self, bucket_name: str, bucketfs_config: BucketFsConfig): + if bucketfs_config is None: + raise TypeError("bucketfs_config can't be None") + self.bucket_name = bucket_name + self.bucketfs_config = bucketfs_config diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py index d35b0cbc..b5966cca 100644 --- a/exasol_bucketfs_utils_python/bucketfs_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -1,33 +1,67 @@ -from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig +from typing import Union +from requests.auth import HTTPBasicAuth -def get_bucketfs_udf_path(bucketfs_config: BucketFsConfig, path_in_bucket: str): - archive_extensions = [".tar.gz", ".tar.bz2", ".zip", ".tar"] - path_prefix = "" - for extension in archive_extensions: +from exasol_bucketfs_utils_python.bucketfs_config import BucketConfig, BucketFsConfig + +ARCHIVE_EXTENSIONS = [".tar.gz", ".tar.bz2", ".zip", ".tar"] + + +def _correct_path_in_bucket_for_archives(path_in_bucket: str) -> str: + for extension in ARCHIVE_EXTENSIONS: if path_in_bucket.endswith(extension): path_in_bucket = path_in_bucket[:-len(extension)] - path_prefix = "/" break - if path_in_bucket.startswith("/"): - path_in_bucket = path_in_bucket[1:] - path = f"/buckets/{bucketfs_config.bucketfs_name}/{bucketfs_config.bucket}/{path_in_bucket}{path_prefix}" + return path_in_bucket + + +def get_bucketfs_udf_path(bucketfs_config: BucketFsConfig) -> str: + path = f"/buckets/{bucketfs_config.bucketfs_name}" + return path + + +def get_bucket_udf_path(bucket_config: BucketConfig, path_in_bucket: str) -> str: + bucketfs_path = get_bucketfs_udf_path(bucket_config.bucketfs_config) + path = f"{bucketfs_path}/{bucket_config.bucket_name}" + + if path_in_bucket is not None: + path_in_bucket = _correct_path_in_bucket_for_archives(path_in_bucket) + if path_in_bucket.startswith("/"): + path_in_bucket = path_in_bucket[1:] + path = f"{path}/{path_in_bucket}" return path -def generate_bucketfs_url(bucketfs_config: BucketFsConfig, path_in_bucket: str, with_credentials: bool = False): +def generate_bucketfs_url(bucketfs_config: BucketFsConfig, with_credentials: bool = False) -> str: + if bucketfs_config.connection_config is None: + raise TypeError("bucket_config.bucketfs_config.connection_config can't be none for this operations") if with_credentials: - credentials = f"{bucketfs_config.credentials.user}:{bucketfs_config.credentials.pwd}@" + credentials = f"{bucketfs_config.connection_config.user}:{bucketfs_config.connection_config.pwd}@" else: credentials = "" - if bucketfs_config.is_https: + if bucketfs_config.connection_config.is_https: protocol = "https" else: protocol = "http" url = f"{protocol}://{credentials}" \ - f"{bucketfs_config.credentials.host}:{bucketfs_config.credentials.port}/{bucketfs_config.bucket}/" + f"{bucketfs_config.connection_config.host}:{bucketfs_config.connection_config.port}" + return url + + +def generate_bucket_url(bucket_config: BucketConfig, path_in_bucket: Union[None, str], with_credentials: bool = False): + url = generate_bucketfs_url(bucket_config.bucketfs_config, with_credentials) + url = url + f"/{bucket_config.bucket_name}" if path_in_bucket is not None: if path_in_bucket.startswith("/"): path_in_bucket = path_in_bucket[1:] - url += f"{path_in_bucket}" - return url \ No newline at end of file + url += f"/{path_in_bucket}" + return url + + +def create_auth_object(bucket_config): + if bucket_config.bucketfs_config.connection_config is None: + raise TypeError("bucket_config.bucketfs_config.connection_config can't be none for this operations") + auth = HTTPBasicAuth( + bucket_config.bucketfs_config.connection_config.user, + bucket_config.bucketfs_config.connection_config.pwd) + return auth diff --git a/exasol_bucketfs_utils_python/download.py b/exasol_bucketfs_utils_python/download.py index 15dabad4..ef7d8c94 100644 --- a/exasol_bucketfs_utils_python/download.py +++ b/exasol_bucketfs_utils_python/download.py @@ -4,42 +4,38 @@ import joblib import requests -from requests.auth import HTTPBasicAuth -from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig -from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucketfs_url +from exasol_bucketfs_utils_python import bucketfs_utils +from exasol_bucketfs_utils_python.bucketfs_config import BucketConfig +from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_url -def download_from_bucketfs_to_file(bucketfs_config: BucketFsConfig, bucket_file_path: str, local_file_path: Path): +def download_from_bucketfs_to_file(bucket_config: BucketConfig, bucket_file_path: str, local_file_path: Path): with local_file_path.open("wb") as f: - download_from_bucketfs_to_fileobj(bucketfs_config, bucket_file_path, f) + download_from_bucketfs_to_fileobj(bucket_config, bucket_file_path, f) -def download_from_bucketfs_to_fileobj(bucketfs_config: BucketFsConfig, bucket_file_path: str, fileobj: typing.IO): - url = generate_bucketfs_url(bucketfs_config, bucket_file_path) - auth = HTTPBasicAuth( - bucketfs_config.credentials.user, - bucketfs_config.credentials.pwd) +def download_from_bucketfs_to_fileobj(bucket_config: BucketConfig, bucket_file_path: str, fileobj: typing.IO): + url = generate_bucket_url(bucket_config, bucket_file_path) + auth = bucketfs_utils.create_auth_object(bucket_config) with requests.get(url, stream=True, auth=auth) as response: response.raise_for_status() for chunk in response.iter_content(chunk_size=8192): fileobj.write(chunk) -def download_from_bucketfs_to_string(bucketfs_config: BucketFsConfig, bucket_file_path: str) -> str: - url = generate_bucketfs_url(bucketfs_config, bucket_file_path) - auth = HTTPBasicAuth( - bucketfs_config.credentials.user, - bucketfs_config.credentials.pwd) - response = requests.get(url,auth=auth) +def download_from_bucketfs_to_string(bucket_config: BucketConfig, bucket_file_path: str) -> str: + url = generate_bucket_url(bucket_config, bucket_file_path) + auth = bucketfs_utils.create_auth_object(bucket_config) + response = requests.get(url, auth=auth) response.raise_for_status() return response.text -def download_object_from_bucketfs_via_joblib(object, bucketfs_config: BucketFsConfig, file_name: str, compress=True): +def download_object_from_bucketfs_via_joblib(bucket_config: BucketConfig, bucket_file_path: str): with NamedTemporaryFile() as temp_file: - download_from_bucketfs_to_fileobj(bucketfs_config, file_name, temp_file) + download_from_bucketfs_to_fileobj(bucket_config, bucket_file_path, temp_file) temp_file.flush() temp_file.seek(0) - object = joblib.load(temp_file) - return object + obj = joblib.load(temp_file) + return obj diff --git a/exasol_bucketfs_utils_python/upload.py b/exasol_bucketfs_utils_python/upload.py index 17337723..5ad74c4c 100644 --- a/exasol_bucketfs_utils_python/upload.py +++ b/exasol_bucketfs_utils_python/upload.py @@ -4,42 +4,38 @@ import joblib import requests -from requests.auth import HTTPBasicAuth -from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig -from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucketfs_url, get_bucketfs_udf_path +from exasol_bucketfs_utils_python import bucketfs_utils +from exasol_bucketfs_utils_python.bucketfs_config import BucketConfig +from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_url, get_bucket_udf_path -def upload_file_to_bucketfs(bucketfs_config: BucketFsConfig, bucket_file_path: str, local_file_path: Path): +def upload_file_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, local_file_path: Path): with local_file_path.open("rb") as f: - upload_fileobj_to_bucketfs(bucketfs_config, bucket_file_path, f) + upload_fileobj_to_bucketfs(bucket_config, bucket_file_path, f) -def upload_fileobj_to_bucketfs(bucketfs_config: BucketFsConfig, bucket_file_path: str, fileobj: typing.IO): - url = generate_bucketfs_url(bucketfs_config, bucket_file_path) - auth = HTTPBasicAuth( - bucketfs_config.credentials.user, - bucketfs_config.credentials.pwd) +def upload_fileobj_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, fileobj: typing.IO): + url = generate_bucket_url(bucket_config, bucket_file_path) + auth = bucketfs_utils.create_auth_object(bucket_config) response = requests.put(url, data=fileobj, auth=auth) response.raise_for_status() - path = get_bucketfs_udf_path(bucketfs_config, bucket_file_path) + path = get_bucket_udf_path(bucket_config, bucket_file_path) return url, path -def upload_string_to_bucketfs(bucketfs_config: BucketFsConfig, bucket_file_path: str, string: str): - url = generate_bucketfs_url(bucketfs_config, bucket_file_path) - auth = HTTPBasicAuth( - bucketfs_config.credentials.user, - bucketfs_config.credentials.pwd) +def upload_string_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, string: str): + url = generate_bucket_url(bucket_config, bucket_file_path) + auth = bucketfs_utils.create_auth_object(bucket_config) response = requests.put(url, data=string.encode("UTF-8"), auth=auth) response.raise_for_status() - path = get_bucketfs_udf_path(bucketfs_config, bucket_file_path) + path = get_bucket_udf_path(bucket_config, bucket_file_path) return url, path -def upload_object_to_bucketfs_via_joblib(object, bucketfs_config: BucketFsConfig, bucket_file_path: str, compress=True): +def upload_object_to_bucketfs_via_joblib(object, bucket_config: BucketConfig, bucket_file_path: str, compress=True): with NamedTemporaryFile() as temp_file: joblib.dump(object, temp_file.name, compress=compress) temp_file.flush() temp_file.seek(0) - upload_fileobj_to_bucketfs(bucketfs_config, bucket_file_path, temp_file) + upload_fileobj_to_bucketfs(bucket_config, bucket_file_path, temp_file) diff --git a/tests/test_bucketfs_utils.py b/tests/test_bucketfs_utils.py index 8b18e1a4..df06c0a9 100644 --- a/tests/test_bucketfs_utils.py +++ b/tests/test_bucketfs_utils.py @@ -1,79 +1,78 @@ import pytest from exasol_bucketfs_utils_python import bucketfs_utils -from exasol_bucketfs_utils_python.bucketfs_config import BucketFSCredentials, BucketFsConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConnectionConfig, BucketFsConfig, BucketConfig -def test_get_bucketfs_udf_path_non_archive_file(): - bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") - bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, - bucket="default", - bucketfs_name="bfsdefault", - is_https=False) - udf_path = bucketfs_utils.get_bucketfs_udf_path( - bucketfs_config=bucketfs_config, +def test_get_bucket_udf_path_non_archive_file(): + connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.get_bucket_udf_path( + bucket_config=bucket_config, path_in_bucket="path/in/bucket/test_file.txt" ) assert udf_path == "/buckets/bfsdefault/default/path/in/bucket/test_file.txt" -def test_get_bucketfs_udf_path_trailing_slash(): - bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") - bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, - bucket="default", - bucketfs_name="bfsdefault", - is_https=False) - udf_path = bucketfs_utils.get_bucketfs_udf_path( - bucketfs_config=bucketfs_config, + +def test_get_bucket_udf_path_trailing_slash(): + connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.get_bucket_udf_path( + bucket_config=bucket_config, path_in_bucket="/path/in/bucket/test_file.txt" ) assert udf_path == "/buckets/bfsdefault/default/path/in/bucket/test_file.txt" -@pytest.mark.parametrize("extension",["tar.gz","zip","tar.bz2","tar"]) -def test_get_bucketfs_udf_path_archive_tar(extension): - bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") - bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, - bucket="default", - bucketfs_name="bfsdefault", - is_https=False) - udf_path = bucketfs_utils.get_bucketfs_udf_path( - bucketfs_config=bucketfs_config, + +@pytest.mark.parametrize("extension", ["tar.gz", "zip", "tar.bz2", "tar"]) +def test_get_bucket_udf_path_archive_tar(extension): + connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.get_bucket_udf_path( + bucket_config=bucket_config, path_in_bucket=f"path/in/bucket/test_file.{extension}" ) - assert udf_path == "/buckets/bfsdefault/default/path/in/bucket/test_file/" + assert udf_path == "/buckets/bfsdefault/default/path/in/bucket/test_file" + -def test_generate_bucketfs_url_file(): - bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") - bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, - bucket="default", - bucketfs_name="bfsdefault", - is_https=False) - udf_path = bucketfs_utils.generate_bucketfs_url( - bucketfs_config=bucketfs_config, +def test_generate_bucket_url_file(): + connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_url( + bucket_config=bucket_config, path_in_bucket="path/in/bucket/test_file.txt" ) assert udf_path == "http://localhost:6666/default/path/in/bucket/test_file.txt" -def test_generate_bucketfs_url_file_trailing_slash(): - bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") - bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, - bucket="default", - bucketfs_name="bfsdefault", - is_https=False) - udf_path = bucketfs_utils.generate_bucketfs_url( - bucketfs_config=bucketfs_config, + +def test_generate_bucket_url_file_trailing_slash(): + connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_url( + bucket_config=bucket_config, path_in_bucket="/path/in/bucket/test_file.txt" ) assert udf_path == "http://localhost:6666/default/path/in/bucket/test_file.txt" -def test_generate_bucketfs_url_file_with_credentialsh(): - bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") - bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, - bucket="default", - bucketfs_name="bfsdefault", - is_https=False) - udf_path = bucketfs_utils.generate_bucketfs_url( - bucketfs_config=bucketfs_config, + +def test_generate_bucket_url_file_with_credentialsh(): + connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_url( + bucket_config=bucket_config, path_in_bucket="path/in/bucket/test_file.txt", with_credentials=True ) - assert udf_path == "http://w:write@localhost:6666/default/path/in/bucket/test_file.txt" \ No newline at end of file + assert udf_path == "http://w:write@localhost:6666/default/path/in/bucket/test_file.txt" diff --git a/tests/test_upload_download.py b/tests/test_upload_download.py index 02b61c12..ecd0ecf3 100644 --- a/tests/test_upload_download.py +++ b/tests/test_upload_download.py @@ -2,15 +2,14 @@ from tempfile import NamedTemporaryFile from exasol_bucketfs_utils_python import upload, download -from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig, BucketFSCredentials +from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig, BucketFSConnectionConfig, BucketConfig def test_file_upload_download(): - bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") - bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, - bucket="default", - bucketfs_name="bfsdefault", - is_https=False) + connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) with NamedTemporaryFile() as input_temp_file: test_byte_string = b"test_byte_string" input_temp_file.write(test_byte_string) @@ -18,13 +17,13 @@ def test_file_upload_download(): path_in_bucket = "path/in/bucket/file.txt" upload.upload_file_to_bucketfs( - bucketfs_config=bucketfs_config, + bucket_config=bucket_config, bucket_file_path=path_in_bucket, local_file_path=Path(input_temp_file.name)) with NamedTemporaryFile() as output_temp_file: download.download_from_bucketfs_to_file( - bucketfs_config=bucketfs_config, + bucket_config=bucket_config, bucket_file_path=path_in_bucket, local_file_path=Path(output_temp_file.name)) output_test_byte_string = output_temp_file.read() @@ -32,11 +31,10 @@ def test_file_upload_download(): def test_fileobj_upload_download(): - bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") - bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, - bucket="default", - bucketfs_name="bfsdefault", - is_https=False) + connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) with NamedTemporaryFile() as input_temp_file: test_byte_string = b"test_byte_string" input_temp_file.write(test_byte_string) @@ -44,13 +42,13 @@ def test_fileobj_upload_download(): input_temp_file.seek(0) path_in_bucket = "path/in/bucket/file.txt" upload.upload_fileobj_to_bucketfs( - bucketfs_config=bucketfs_config, + bucket_config=bucket_config, bucket_file_path=path_in_bucket, fileobj=input_temp_file) with NamedTemporaryFile() as output_temp_file: download.download_from_bucketfs_to_fileobj( - bucketfs_config=bucketfs_config, + bucket_config=bucket_config, bucket_file_path=path_in_bucket, fileobj=output_temp_file) output_temp_file.flush() @@ -60,21 +58,20 @@ def test_fileobj_upload_download(): def test_string_upload_download(): - bucketfs_credentials = BucketFSCredentials(host="localhost", port="6666", user="w", pwd="write") - bucketfs_config = BucketFsConfig(credentials=bucketfs_credentials, - bucket="default", - bucketfs_name="bfsdefault", - is_https=False) + connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) test_string = "test_string" path_in_bucket = "path/in/bucket/file.txt" upload.upload_string_to_bucketfs( - bucketfs_config=bucketfs_config, + bucket_config=bucket_config, bucket_file_path=path_in_bucket, string=test_string) output_test_string = \ download.download_from_bucketfs_to_string( - bucketfs_config=bucketfs_config, + bucket_config=bucket_config, bucket_file_path=path_in_bucket) - assert test_string == output_test_string \ No newline at end of file + assert test_string == output_test_string From 0fb0e9d3feacc66ca95d3f2c986d7b61d2e8c4d0 Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Tue, 2 Mar 2021 19:34:55 +0100 Subject: [PATCH 07/24] Add docstrings to some classes and functions --- .../bucketfs_config.py | 14 ++++++++++++++ exasol_bucketfs_utils_python/bucketfs_utils.py | 18 +++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/exasol_bucketfs_utils_python/bucketfs_config.py b/exasol_bucketfs_utils_python/bucketfs_config.py index 55da62ae..44c4fcdb 100644 --- a/exasol_bucketfs_utils_python/bucketfs_config.py +++ b/exasol_bucketfs_utils_python/bucketfs_config.py @@ -2,6 +2,10 @@ class BucketFSConnectionConfig: + """ + The BucketFSConnectionConfig contains all necessary information + to connect to the BucketFS Server via HTTP[s] + """ def __init__(self, host: str, port: str, user: str, pwd: str, is_https=False): self.is_https = is_https self.host = host @@ -11,12 +15,22 @@ def __init__(self, host: str, port: str, user: str, pwd: str, is_https=False): class BucketFsConfig: + """ + The BucketFSConfig contains all required information + to access it either via HTTP[S] or in the file system inside of UDFs. + The BucketFSConnectionConfig is here by optional, + because in UDF we sometimes don't want to use HTTP[S]. + """ def __init__(self, bucketfs_name: str, connection_config: Union[BucketFSConnectionConfig, None] = None): self.connection_config = connection_config self.bucketfs_name = bucketfs_name class BucketConfig: + """ + The BucketConfig contains all required information about a BucketFS + to access it either via HTTP[S] or in the file system inside of UDFs. + """ def __init__(self, bucket_name: str, bucketfs_config: BucketFsConfig): if bucketfs_config is None: raise TypeError("bucketfs_config can't be None") diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py index b5966cca..d661e57f 100644 --- a/exasol_bucketfs_utils_python/bucketfs_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -16,11 +16,23 @@ def _correct_path_in_bucket_for_archives(path_in_bucket: str) -> str: def get_bucketfs_udf_path(bucketfs_config: BucketFsConfig) -> str: + """ + This function generates the path where UDFs can access the content of a BucketFS in there file system + :param bucketfs_config: Config of the BucketFS, the BucketFSConnectionConfig in the BucketFSConfig can None + :return: Path of the given BucketFS in the file system of UDFs + """ path = f"/buckets/{bucketfs_config.bucketfs_name}" return path -def get_bucket_udf_path(bucket_config: BucketConfig, path_in_bucket: str) -> str: +def get_bucket_udf_path(bucket_config: BucketConfig, path_in_bucket: Union[None, str]) -> str: + """ + This function generates the path where UDFs can access the content of a Bucket or + the given Path in a Bucket in there file system + :param bucket_config: Config of the Bucket, the BucketFSConnectionConfig in the BucketFSConfig can be None + :param path_in_bucket: If not None, path_in_bucket gets concatenated to the path of the bucket + :return: + """ bucketfs_path = get_bucketfs_udf_path(bucket_config.bucketfs_config) path = f"{bucketfs_path}/{bucket_config.bucket_name}" @@ -34,7 +46,7 @@ def get_bucket_udf_path(bucket_config: BucketConfig, path_in_bucket: str) -> str def generate_bucketfs_url(bucketfs_config: BucketFsConfig, with_credentials: bool = False) -> str: if bucketfs_config.connection_config is None: - raise TypeError("bucket_config.bucketfs_config.connection_config can't be none for this operations") + raise TypeError("bucket_config.bucketfs_config.connection_config can't be None for this operations") if with_credentials: credentials = f"{bucketfs_config.connection_config.user}:{bucketfs_config.connection_config.pwd}@" else: @@ -60,7 +72,7 @@ def generate_bucket_url(bucket_config: BucketConfig, path_in_bucket: Union[None, def create_auth_object(bucket_config): if bucket_config.bucketfs_config.connection_config is None: - raise TypeError("bucket_config.bucketfs_config.connection_config can't be none for this operations") + raise TypeError("bucket_config.bucketfs_config.connection_config can't be None for this operations") auth = HTTPBasicAuth( bucket_config.bucketfs_config.connection_config.user, bucket_config.bucketfs_config.connection_config.pwd) From 342d1c5a4e5ca660d837f776c50c24155b1bb64b Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Wed, 3 Mar 2021 12:37:23 +0100 Subject: [PATCH 08/24] Refactoring and more docstrings --- .../bucketfs_utils.py | 38 +++++++++++++------ exasol_bucketfs_utils_python/download.py | 6 +-- exasol_bucketfs_utils_python/upload.py | 10 ++--- tests/test_bucketfs_utils.py | 18 ++++----- 4 files changed, 44 insertions(+), 28 deletions(-) diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py index d661e57f..00ce5d07 100644 --- a/exasol_bucketfs_utils_python/bucketfs_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -15,25 +15,25 @@ def _correct_path_in_bucket_for_archives(path_in_bucket: str) -> str: return path_in_bucket -def get_bucketfs_udf_path(bucketfs_config: BucketFsConfig) -> str: +def generate_bucketfs_udf_path(bucketfs_config: BucketFsConfig) -> str: """ This function generates the path where UDFs can access the content of a BucketFS in there file system :param bucketfs_config: Config of the BucketFS, the BucketFSConnectionConfig in the BucketFSConfig can None - :return: Path of the given BucketFS in the file system of UDFs + :return: Path of the given BucketFS in the file system of UDFs as string """ path = f"/buckets/{bucketfs_config.bucketfs_name}" return path -def get_bucket_udf_path(bucket_config: BucketConfig, path_in_bucket: Union[None, str]) -> str: +def generate_bucket_udf_path(bucket_config: BucketConfig, path_in_bucket: Union[None, str]) -> str: """ - This function generates the path where UDFs can access the content of a Bucket or - the given Path in a Bucket in there file system + This function generates the path where UDFs can access the content of a bucket or + the given Path in a bucket in there file system :param bucket_config: Config of the Bucket, the BucketFSConnectionConfig in the BucketFSConfig can be None :param path_in_bucket: If not None, path_in_bucket gets concatenated to the path of the bucket - :return: + :return: Path of the bucket or the file in the Bucket in the file system of UDFs as string """ - bucketfs_path = get_bucketfs_udf_path(bucket_config.bucketfs_config) + bucketfs_path = generate_bucketfs_udf_path(bucket_config.bucketfs_config) path = f"{bucketfs_path}/{bucket_config.bucket_name}" if path_in_bucket is not None: @@ -44,7 +44,14 @@ def get_bucket_udf_path(bucket_config: BucketConfig, path_in_bucket: Union[None, return path -def generate_bucketfs_url(bucketfs_config: BucketFsConfig, with_credentials: bool = False) -> str: +def generate_bucketfs_http_url(bucketfs_config: BucketFsConfig, with_credentials: bool = False) -> str: + """ + This function generates the HTTP[s] url for the given BucketFSConfig + with or without basic authentication (http[s]://user:password@host:port) + :param bucketfs_config: A BucketFSConfig with a non None BucketFSConnectionConfig + :param with_credentials: If True, this function generates a url with basic authentication, default False + :return: HTTP[S] URL of the BucketFS as string + """ if bucketfs_config.connection_config is None: raise TypeError("bucket_config.bucketfs_config.connection_config can't be None for this operations") if with_credentials: @@ -60,8 +67,17 @@ def generate_bucketfs_url(bucketfs_config: BucketFsConfig, with_credentials: boo return url -def generate_bucket_url(bucket_config: BucketConfig, path_in_bucket: Union[None, str], with_credentials: bool = False): - url = generate_bucketfs_url(bucket_config.bucketfs_config, with_credentials) +def generate_bucket_http_url(bucket_config: BucketConfig, path_in_bucket: Union[None, str], + with_credentials: bool = False): + """ + This function generates the HTTP[s] url for the given bucket ot the path in the bucket + with or without basic authentication (http[s]://user:password@host:port) + :param bucket_config: Config of the Bucket, the BucketFSConnectionConfig in the BucketFSConfig must be not None + :param path_in_bucket: If not None, path_in_bucket gets concatenated to the path of the bucket + :param with_credentials: If True, this function generates a url with basic authentication, default False + :return: HTTP[S] URL of the bucket or the path in the bucket as string + """ + url = generate_bucketfs_http_url(bucket_config.bucketfs_config, with_credentials) url = url + f"/{bucket_config.bucket_name}" if path_in_bucket is not None: if path_in_bucket.startswith("/"): @@ -70,7 +86,7 @@ def generate_bucket_url(bucket_config: BucketConfig, path_in_bucket: Union[None, return url -def create_auth_object(bucket_config): +def create_auth_object(bucket_config: BucketConfig) -> HTTPBasicAuth: if bucket_config.bucketfs_config.connection_config is None: raise TypeError("bucket_config.bucketfs_config.connection_config can't be None for this operations") auth = HTTPBasicAuth( diff --git a/exasol_bucketfs_utils_python/download.py b/exasol_bucketfs_utils_python/download.py index ef7d8c94..a4ac28c7 100644 --- a/exasol_bucketfs_utils_python/download.py +++ b/exasol_bucketfs_utils_python/download.py @@ -7,7 +7,7 @@ from exasol_bucketfs_utils_python import bucketfs_utils from exasol_bucketfs_utils_python.bucketfs_config import BucketConfig -from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_url +from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_http_url def download_from_bucketfs_to_file(bucket_config: BucketConfig, bucket_file_path: str, local_file_path: Path): @@ -16,7 +16,7 @@ def download_from_bucketfs_to_file(bucket_config: BucketConfig, bucket_file_path def download_from_bucketfs_to_fileobj(bucket_config: BucketConfig, bucket_file_path: str, fileobj: typing.IO): - url = generate_bucket_url(bucket_config, bucket_file_path) + url = generate_bucket_http_url(bucket_config, bucket_file_path) auth = bucketfs_utils.create_auth_object(bucket_config) with requests.get(url, stream=True, auth=auth) as response: response.raise_for_status() @@ -25,7 +25,7 @@ def download_from_bucketfs_to_fileobj(bucket_config: BucketConfig, bucket_file_p def download_from_bucketfs_to_string(bucket_config: BucketConfig, bucket_file_path: str) -> str: - url = generate_bucket_url(bucket_config, bucket_file_path) + url = generate_bucket_http_url(bucket_config, bucket_file_path) auth = bucketfs_utils.create_auth_object(bucket_config) response = requests.get(url, auth=auth) response.raise_for_status() diff --git a/exasol_bucketfs_utils_python/upload.py b/exasol_bucketfs_utils_python/upload.py index 5ad74c4c..bb5747df 100644 --- a/exasol_bucketfs_utils_python/upload.py +++ b/exasol_bucketfs_utils_python/upload.py @@ -7,7 +7,7 @@ from exasol_bucketfs_utils_python import bucketfs_utils from exasol_bucketfs_utils_python.bucketfs_config import BucketConfig -from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_url, get_bucket_udf_path +from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_http_url, generate_bucket_udf_path def upload_file_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, local_file_path: Path): @@ -16,20 +16,20 @@ def upload_file_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, def upload_fileobj_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, fileobj: typing.IO): - url = generate_bucket_url(bucket_config, bucket_file_path) + url = generate_bucket_http_url(bucket_config, bucket_file_path) auth = bucketfs_utils.create_auth_object(bucket_config) response = requests.put(url, data=fileobj, auth=auth) response.raise_for_status() - path = get_bucket_udf_path(bucket_config, bucket_file_path) + path = generate_bucket_udf_path(bucket_config, bucket_file_path) return url, path def upload_string_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, string: str): - url = generate_bucket_url(bucket_config, bucket_file_path) + url = generate_bucket_http_url(bucket_config, bucket_file_path) auth = bucketfs_utils.create_auth_object(bucket_config) response = requests.put(url, data=string.encode("UTF-8"), auth=auth) response.raise_for_status() - path = get_bucket_udf_path(bucket_config, bucket_file_path) + path = generate_bucket_udf_path(bucket_config, bucket_file_path) return url, path diff --git a/tests/test_bucketfs_utils.py b/tests/test_bucketfs_utils.py index df06c0a9..a2177d9b 100644 --- a/tests/test_bucketfs_utils.py +++ b/tests/test_bucketfs_utils.py @@ -4,24 +4,24 @@ from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConnectionConfig, BucketFsConfig, BucketConfig -def test_get_bucket_udf_path_non_archive_file(): +def test_generate_bucket_udf_path_non_archive_file(): connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) bucketfs_config = BucketFsConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) - udf_path = bucketfs_utils.get_bucket_udf_path( + udf_path = bucketfs_utils.generate_bucket_udf_path( bucket_config=bucket_config, path_in_bucket="path/in/bucket/test_file.txt" ) assert udf_path == "/buckets/bfsdefault/default/path/in/bucket/test_file.txt" -def test_get_bucket_udf_path_trailing_slash(): +def test_generate_bucket_udf_path_trailing_slash(): connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) bucketfs_config = BucketFsConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) - udf_path = bucketfs_utils.get_bucket_udf_path( + udf_path = bucketfs_utils.generate_bucket_udf_path( bucket_config=bucket_config, path_in_bucket="/path/in/bucket/test_file.txt" ) @@ -29,12 +29,12 @@ def test_get_bucket_udf_path_trailing_slash(): @pytest.mark.parametrize("extension", ["tar.gz", "zip", "tar.bz2", "tar"]) -def test_get_bucket_udf_path_archive_tar(extension): +def test_generate_bucket_udf_path_archive_tar(extension): connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) bucketfs_config = BucketFsConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) - udf_path = bucketfs_utils.get_bucket_udf_path( + udf_path = bucketfs_utils.generate_bucket_udf_path( bucket_config=bucket_config, path_in_bucket=f"path/in/bucket/test_file.{extension}" ) @@ -46,7 +46,7 @@ def test_generate_bucket_url_file(): bucketfs_config = BucketFsConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) - udf_path = bucketfs_utils.generate_bucket_url( + udf_path = bucketfs_utils.generate_bucket_http_url( bucket_config=bucket_config, path_in_bucket="path/in/bucket/test_file.txt" ) @@ -58,7 +58,7 @@ def test_generate_bucket_url_file_trailing_slash(): bucketfs_config = BucketFsConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) - udf_path = bucketfs_utils.generate_bucket_url( + udf_path = bucketfs_utils.generate_bucket_http_url( bucket_config=bucket_config, path_in_bucket="/path/in/bucket/test_file.txt" ) @@ -70,7 +70,7 @@ def test_generate_bucket_url_file_with_credentialsh(): bucketfs_config = BucketFsConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) - udf_path = bucketfs_utils.generate_bucket_url( + udf_path = bucketfs_utils.generate_bucket_http_url( bucket_config=bucket_config, path_in_bucket="path/in/bucket/test_file.txt", with_credentials=True From 4b9263297544ad9aed14e52eadd4355923efb7fa Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Wed, 3 Mar 2021 14:37:04 +0100 Subject: [PATCH 09/24] Harden url generation with url encoding and more tests --- .../bucketfs_config.py | 7 +- .../bucketfs_utils.py | 35 ++++-- exasol_bucketfs_utils_python/download.py | 8 +- exasol_bucketfs_utils_python/upload.py | 10 +- tests/test_bucketfs_utils.py | 104 ++++++++++++++++-- 5 files changed, 139 insertions(+), 25 deletions(-) diff --git a/exasol_bucketfs_utils_python/bucketfs_config.py b/exasol_bucketfs_utils_python/bucketfs_config.py index 44c4fcdb..0d179528 100644 --- a/exasol_bucketfs_utils_python/bucketfs_config.py +++ b/exasol_bucketfs_utils_python/bucketfs_config.py @@ -6,10 +6,13 @@ class BucketFSConnectionConfig: The BucketFSConnectionConfig contains all necessary information to connect to the BucketFS Server via HTTP[s] """ - def __init__(self, host: str, port: str, user: str, pwd: str, is_https=False): + + def __init__(self, host: str, port: int, user: str, pwd: str, is_https=False): self.is_https = is_https self.host = host self.port = port + if user not in ["w", "r"]: # The BucketFs currently supports only these two users + raise ValueError(f"User can only be, 'w' (read-write access) or 'r' (read-only access), but got {user}") self.user = user self.pwd = pwd @@ -21,6 +24,7 @@ class BucketFsConfig: The BucketFSConnectionConfig is here by optional, because in UDF we sometimes don't want to use HTTP[S]. """ + def __init__(self, bucketfs_name: str, connection_config: Union[BucketFSConnectionConfig, None] = None): self.connection_config = connection_config self.bucketfs_name = bucketfs_name @@ -31,6 +35,7 @@ class BucketConfig: The BucketConfig contains all required information about a BucketFS to access it either via HTTP[S] or in the file system inside of UDFs. """ + def __init__(self, bucket_name: str, bucketfs_config: BucketFsConfig): if bucketfs_config is None: raise TypeError("bucketfs_config can't be None") diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py index 00ce5d07..2d0e6d87 100644 --- a/exasol_bucketfs_utils_python/bucketfs_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -1,3 +1,5 @@ +import urllib.parse +from pathlib import Path, PurePosixPath from typing import Union from requests.auth import HTTPBasicAuth @@ -7,6 +9,11 @@ ARCHIVE_EXTENSIONS = [".tar.gz", ".tar.bz2", ".zip", ".tar"] +def _encode_url_part(part: str) -> str: + urlencoded = urllib.parse.quote(part) + return urlencoded + + def _correct_path_in_bucket_for_archives(path_in_bucket: str) -> str: for extension in ARCHIVE_EXTENSIONS: if path_in_bucket.endswith(extension): @@ -44,7 +51,8 @@ def generate_bucket_udf_path(bucket_config: BucketConfig, path_in_bucket: Union[ return path -def generate_bucketfs_http_url(bucketfs_config: BucketFsConfig, with_credentials: bool = False) -> str: +def generate_bucketfs_http_url(bucketfs_config: BucketFsConfig, + with_credentials: bool = False) -> urllib.parse.ParseResult: """ This function generates the HTTP[s] url for the given BucketFSConfig with or without basic authentication (http[s]://user:password@host:port) @@ -53,22 +61,26 @@ def generate_bucketfs_http_url(bucketfs_config: BucketFsConfig, with_credentials :return: HTTP[S] URL of the BucketFS as string """ if bucketfs_config.connection_config is None: - raise TypeError("bucket_config.bucketfs_config.connection_config can't be None for this operations") + raise ValueError("bucket_config.bucketfs_config.connection_config can't be None for this operations") if with_credentials: - credentials = f"{bucketfs_config.connection_config.user}:{bucketfs_config.connection_config.pwd}@" + encoded_password = _encode_url_part(bucketfs_config.connection_config.pwd) + encoded_user = _encode_url_part(bucketfs_config.connection_config.user) + credentials = f"{encoded_user}:{encoded_password}@" else: credentials = "" if bucketfs_config.connection_config.is_https: protocol = "https" else: protocol = "http" + encoded_host = _encode_url_part(bucketfs_config.connection_config.host) url = f"{protocol}://{credentials}" \ - f"{bucketfs_config.connection_config.host}:{bucketfs_config.connection_config.port}" - return url + f"{encoded_host}:{bucketfs_config.connection_config.port}" + urlparse = urllib.parse.urlparse(url) + return urlparse def generate_bucket_http_url(bucket_config: BucketConfig, path_in_bucket: Union[None, str], - with_credentials: bool = False): + with_credentials: bool = False) -> urllib.parse.ParseResult: """ This function generates the HTTP[s] url for the given bucket ot the path in the bucket with or without basic authentication (http[s]://user:password@host:port) @@ -78,12 +90,17 @@ def generate_bucket_http_url(bucket_config: BucketConfig, path_in_bucket: Union[ :return: HTTP[S] URL of the bucket or the path in the bucket as string """ url = generate_bucketfs_http_url(bucket_config.bucketfs_config, with_credentials) - url = url + f"/{bucket_config.bucket_name}" if path_in_bucket is not None: if path_in_bucket.startswith("/"): path_in_bucket = path_in_bucket[1:] - url += f"/{path_in_bucket}" - return url + encoded_bucket_and_path_in_bucket = \ + "/".join( + _encode_url_part(part) + for part in + PurePosixPath(bucket_config.bucket_name, path_in_bucket).parts) + url = urllib.parse.urljoin(url.geturl(), encoded_bucket_and_path_in_bucket) + urlparse = urllib.parse.urlparse(url) + return urlparse def create_auth_object(bucket_config: BucketConfig) -> HTTPBasicAuth: diff --git a/exasol_bucketfs_utils_python/download.py b/exasol_bucketfs_utils_python/download.py index a4ac28c7..f8066cc4 100644 --- a/exasol_bucketfs_utils_python/download.py +++ b/exasol_bucketfs_utils_python/download.py @@ -16,18 +16,22 @@ def download_from_bucketfs_to_file(bucket_config: BucketConfig, bucket_file_path def download_from_bucketfs_to_fileobj(bucket_config: BucketConfig, bucket_file_path: str, fileobj: typing.IO): + if bucket_file_path is None: + raise ValueError("bucket_file_path can't be None") url = generate_bucket_http_url(bucket_config, bucket_file_path) auth = bucketfs_utils.create_auth_object(bucket_config) - with requests.get(url, stream=True, auth=auth) as response: + with requests.get(url.geturl(), stream=True, auth=auth) as response: response.raise_for_status() for chunk in response.iter_content(chunk_size=8192): fileobj.write(chunk) def download_from_bucketfs_to_string(bucket_config: BucketConfig, bucket_file_path: str) -> str: + if bucket_file_path is None: + raise ValueError("bucket_file_path can't be None") url = generate_bucket_http_url(bucket_config, bucket_file_path) auth = bucketfs_utils.create_auth_object(bucket_config) - response = requests.get(url, auth=auth) + response = requests.get(url.geturl(), auth=auth) response.raise_for_status() return response.text diff --git a/exasol_bucketfs_utils_python/upload.py b/exasol_bucketfs_utils_python/upload.py index bb5747df..4f082bfb 100644 --- a/exasol_bucketfs_utils_python/upload.py +++ b/exasol_bucketfs_utils_python/upload.py @@ -12,22 +12,26 @@ def upload_file_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, local_file_path: Path): with local_file_path.open("rb") as f: - upload_fileobj_to_bucketfs(bucket_config, bucket_file_path, f) + return upload_fileobj_to_bucketfs(bucket_config, bucket_file_path, f) def upload_fileobj_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, fileobj: typing.IO): + if bucket_file_path is None: + raise ValueError("bucket_file_path can't be None") url = generate_bucket_http_url(bucket_config, bucket_file_path) auth = bucketfs_utils.create_auth_object(bucket_config) - response = requests.put(url, data=fileobj, auth=auth) + response = requests.put(url.geturl(), data=fileobj, auth=auth) response.raise_for_status() path = generate_bucket_udf_path(bucket_config, bucket_file_path) return url, path def upload_string_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, string: str): + if bucket_file_path is None: + raise ValueError("bucket_file_path can't be None") url = generate_bucket_http_url(bucket_config, bucket_file_path) auth = bucketfs_utils.create_auth_object(bucket_config) - response = requests.put(url, data=string.encode("UTF-8"), auth=auth) + response = requests.put(url.geturl(), data=string.encode("UTF-8"), auth=auth) response.raise_for_status() path = generate_bucket_udf_path(bucket_config, bucket_file_path) return url, path diff --git a/tests/test_bucketfs_utils.py b/tests/test_bucketfs_utils.py index a2177d9b..4299efe0 100644 --- a/tests/test_bucketfs_utils.py +++ b/tests/test_bucketfs_utils.py @@ -5,7 +5,7 @@ def test_generate_bucket_udf_path_non_archive_file(): - connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) bucketfs_config = BucketFsConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) @@ -30,7 +30,7 @@ def test_generate_bucket_udf_path_trailing_slash(): @pytest.mark.parametrize("extension", ["tar.gz", "zip", "tar.bz2", "tar"]) def test_generate_bucket_udf_path_archive_tar(extension): - connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) bucketfs_config = BucketFsConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) @@ -41,8 +41,8 @@ def test_generate_bucket_udf_path_archive_tar(extension): assert udf_path == "/buckets/bfsdefault/default/path/in/bucket/test_file" -def test_generate_bucket_url_file(): - connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) +def test_generate_bucket_url_file_write_access(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) bucketfs_config = BucketFsConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) @@ -50,11 +50,11 @@ def test_generate_bucket_url_file(): bucket_config=bucket_config, path_in_bucket="path/in/bucket/test_file.txt" ) - assert udf_path == "http://localhost:6666/default/path/in/bucket/test_file.txt" + assert udf_path.geturl() == "http://localhost:6666/default/path/in/bucket/test_file.txt" def test_generate_bucket_url_file_trailing_slash(): - connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) bucketfs_config = BucketFsConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) @@ -62,11 +62,51 @@ def test_generate_bucket_url_file_trailing_slash(): bucket_config=bucket_config, path_in_bucket="/path/in/bucket/test_file.txt" ) - assert udf_path == "http://localhost:6666/default/path/in/bucket/test_file.txt" + assert udf_path.geturl() == "http://localhost:6666/default/path/in/bucket/test_file.txt" -def test_generate_bucket_url_file_with_credentialsh(): - connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) +def test_generate_bucket_url_file_with_credentials(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "http://w:write@localhost:6666/default/path/in/bucket/test_file.txt" + + +def test_generate_bucket_url_file_with_ip(): + connection_config = BucketFSConnectionConfig(host="127.0.0.1", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "http://w:write@127.0.0.1:6666/default/path/in/bucket/test_file.txt" + + +def test_generate_bucket_url_file_with_whitespace_in_host(): + connection_config = BucketFSConnectionConfig(host="local host", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "http://w:write@local%20host:6666/default/path/in/bucket/test_file.txt" + + +def test_generate_bucket_url_file_with_whitespace_in_password(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write write", + is_https=False) bucketfs_config = BucketFsConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) @@ -75,4 +115,48 @@ def test_generate_bucket_url_file_with_credentialsh(): path_in_bucket="path/in/bucket/test_file.txt", with_credentials=True ) - assert udf_path == "http://w:write@localhost:6666/default/path/in/bucket/test_file.txt" + assert udf_path.geturl() == "http://w:write%20write@localhost:6666/default/path/in/bucket/test_file.txt" + +def test_generate_bucket_url_file_with_whitespace_in_bucket_name(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "http://w:write@localhost:6666/default%20default/path/in/bucket/test_file.txt" + +def test_generate_bucket_url_file_with_whitespace_in_path_in_bucket(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "http://w:write@localhost:6666/default/path/in/bucket/test%20file.txt" + +def test_generate_bucket_url_file_read_only_access(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="r", pwd="read", is_https=False) + bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "http://r:read@localhost:6666/default/path/in/bucket/test_file.txt" + + +def test_generate_bucket_url_file_with_not_allowed_user(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="abc", pwd="write", + is_https=False) From a8ea397d610c5142597e29cdcadc466647eb2880 Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Wed, 3 Mar 2021 15:04:45 +0100 Subject: [PATCH 10/24] Harden path generation using pathlib and add more checks for Config initialization --- .../bucketfs_config.py | 10 +++- .../bucketfs_utils.py | 49 ++++++++++++------- tests/test_bucketfs_utils.py | 11 +++-- 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/exasol_bucketfs_utils_python/bucketfs_config.py b/exasol_bucketfs_utils_python/bucketfs_config.py index 0d179528..5b2ef9cb 100644 --- a/exasol_bucketfs_utils_python/bucketfs_config.py +++ b/exasol_bucketfs_utils_python/bucketfs_config.py @@ -9,11 +9,15 @@ class BucketFSConnectionConfig: def __init__(self, host: str, port: int, user: str, pwd: str, is_https=False): self.is_https = is_https + if host == "": + raise ValueError("Host can't be an empty string") self.host = host self.port = port - if user not in ["w", "r"]: # The BucketFs currently supports only these two users + if user not in ["w", "r"]: # The BucketFs currently supports only these two users raise ValueError(f"User can only be, 'w' (read-write access) or 'r' (read-only access), but got {user}") self.user = user + if pwd == "": + raise ValueError("Password can't be an empty string") self.pwd = pwd @@ -27,6 +31,8 @@ class BucketFsConfig: def __init__(self, bucketfs_name: str, connection_config: Union[BucketFSConnectionConfig, None] = None): self.connection_config = connection_config + if bucketfs_name == "": + raise ValueError("BucketFS name can't be an empty string") self.bucketfs_name = bucketfs_name @@ -39,5 +45,7 @@ class BucketConfig: def __init__(self, bucket_name: str, bucketfs_config: BucketFsConfig): if bucketfs_config is None: raise TypeError("bucketfs_config can't be None") + if bucket_name == "": + raise ValueError("Bucket name can't be an empty string") self.bucket_name = bucket_name self.bucketfs_config = bucketfs_config diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py index 2d0e6d87..b025c94b 100644 --- a/exasol_bucketfs_utils_python/bucketfs_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -1,5 +1,5 @@ import urllib.parse -from pathlib import Path, PurePosixPath +from pathlib import PurePosixPath from typing import Union from requests.auth import HTTPBasicAuth @@ -14,40 +14,52 @@ def _encode_url_part(part: str) -> str: return urlencoded -def _correct_path_in_bucket_for_archives(path_in_bucket: str) -> str: +def _correct_path_in_bucket_for_archives(path_in_bucket: PurePosixPath) -> PurePosixPath: for extension in ARCHIVE_EXTENSIONS: - if path_in_bucket.endswith(extension): - path_in_bucket = path_in_bucket[:-len(extension)] + print(path_in_bucket.name) + if path_in_bucket.name.endswith(extension): + path_in_bucket = PurePosixPath(path_in_bucket.parent, + path_in_bucket.name[:-len(extension)]) + print(path_in_bucket) break return path_in_bucket -def generate_bucketfs_udf_path(bucketfs_config: BucketFsConfig) -> str: +def _make_path_relative(path_in_bucket: Union[None, str, PurePosixPath]) -> PurePosixPath: + path_in_bucket = PurePosixPath(path_in_bucket) + if path_in_bucket.is_absolute(): + path_in_bucket = path_in_bucket.relative_to(PurePosixPath("/")) + return path_in_bucket + + +def generate_bucketfs_udf_path(bucketfs_config: BucketFsConfig) -> PurePosixPath: """ This function generates the path where UDFs can access the content of a BucketFS in there file system :param bucketfs_config: Config of the BucketFS, the BucketFSConnectionConfig in the BucketFSConfig can None - :return: Path of the given BucketFS in the file system of UDFs as string + :return: Path of the given BucketFS in the file system of UDFs """ - path = f"/buckets/{bucketfs_config.bucketfs_name}" + path = PurePosixPath("/buckets/", bucketfs_config.bucketfs_name) return path -def generate_bucket_udf_path(bucket_config: BucketConfig, path_in_bucket: Union[None, str]) -> str: +def generate_bucket_udf_path(bucket_config: BucketConfig, + path_in_bucket: Union[None, str, PurePosixPath]) -> PurePosixPath: """ This function generates the path where UDFs can access the content of a bucket or the given Path in a bucket in there file system :param bucket_config: Config of the Bucket, the BucketFSConnectionConfig in the BucketFSConfig can be None :param path_in_bucket: If not None, path_in_bucket gets concatenated to the path of the bucket - :return: Path of the bucket or the file in the Bucket in the file system of UDFs as string + :return: Path of the bucket or the file in the Bucket in the file system of UDFs """ bucketfs_path = generate_bucketfs_udf_path(bucket_config.bucketfs_config) - path = f"{bucketfs_path}/{bucket_config.bucket_name}" + path = PurePosixPath(bucketfs_path, bucket_config.bucket_name) if path_in_bucket is not None: + path_in_bucket = _make_path_relative(path_in_bucket) path_in_bucket = _correct_path_in_bucket_for_archives(path_in_bucket) - if path_in_bucket.startswith("/"): - path_in_bucket = path_in_bucket[1:] - path = f"{path}/{path_in_bucket}" + else: + path_in_bucket = "" + path = PurePosixPath(path, path_in_bucket) return path @@ -58,7 +70,7 @@ def generate_bucketfs_http_url(bucketfs_config: BucketFsConfig, with or without basic authentication (http[s]://user:password@host:port) :param bucketfs_config: A BucketFSConfig with a non None BucketFSConnectionConfig :param with_credentials: If True, this function generates a url with basic authentication, default False - :return: HTTP[S] URL of the BucketFS as string + :return: HTTP[S] URL of the BucketFS """ if bucketfs_config.connection_config is None: raise ValueError("bucket_config.bucketfs_config.connection_config can't be None for this operations") @@ -79,7 +91,7 @@ def generate_bucketfs_http_url(bucketfs_config: BucketFsConfig, return urlparse -def generate_bucket_http_url(bucket_config: BucketConfig, path_in_bucket: Union[None, str], +def generate_bucket_http_url(bucket_config: BucketConfig, path_in_bucket: Union[None, str, PurePosixPath], with_credentials: bool = False) -> urllib.parse.ParseResult: """ This function generates the HTTP[s] url for the given bucket ot the path in the bucket @@ -87,12 +99,13 @@ def generate_bucket_http_url(bucket_config: BucketConfig, path_in_bucket: Union[ :param bucket_config: Config of the Bucket, the BucketFSConnectionConfig in the BucketFSConfig must be not None :param path_in_bucket: If not None, path_in_bucket gets concatenated to the path of the bucket :param with_credentials: If True, this function generates a url with basic authentication, default False - :return: HTTP[S] URL of the bucket or the path in the bucket as string + :return: HTTP[S] URL of the bucket or the path in the bucket """ url = generate_bucketfs_http_url(bucket_config.bucketfs_config, with_credentials) if path_in_bucket is not None: - if path_in_bucket.startswith("/"): - path_in_bucket = path_in_bucket[1:] + path_in_bucket = _make_path_relative(path_in_bucket) + else: + path_in_bucket = "" encoded_bucket_and_path_in_bucket = \ "/".join( _encode_url_part(part) diff --git a/tests/test_bucketfs_utils.py b/tests/test_bucketfs_utils.py index 4299efe0..85982d69 100644 --- a/tests/test_bucketfs_utils.py +++ b/tests/test_bucketfs_utils.py @@ -13,11 +13,11 @@ def test_generate_bucket_udf_path_non_archive_file(): bucket_config=bucket_config, path_in_bucket="path/in/bucket/test_file.txt" ) - assert udf_path == "/buckets/bfsdefault/default/path/in/bucket/test_file.txt" + assert str(udf_path) == "/buckets/bfsdefault/default/path/in/bucket/test_file.txt" def test_generate_bucket_udf_path_trailing_slash(): - connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) bucketfs_config = BucketFsConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) @@ -25,7 +25,7 @@ def test_generate_bucket_udf_path_trailing_slash(): bucket_config=bucket_config, path_in_bucket="/path/in/bucket/test_file.txt" ) - assert udf_path == "/buckets/bfsdefault/default/path/in/bucket/test_file.txt" + assert str(udf_path) == "/buckets/bfsdefault/default/path/in/bucket/test_file.txt" @pytest.mark.parametrize("extension", ["tar.gz", "zip", "tar.bz2", "tar"]) @@ -38,7 +38,7 @@ def test_generate_bucket_udf_path_archive_tar(extension): bucket_config=bucket_config, path_in_bucket=f"path/in/bucket/test_file.{extension}" ) - assert udf_path == "/buckets/bfsdefault/default/path/in/bucket/test_file" + assert str(udf_path) == "/buckets/bfsdefault/default/path/in/bucket/test_file" def test_generate_bucket_url_file_write_access(): @@ -117,6 +117,7 @@ def test_generate_bucket_url_file_with_whitespace_in_password(): ) assert udf_path.geturl() == "http://w:write%20write@localhost:6666/default/path/in/bucket/test_file.txt" + def test_generate_bucket_url_file_with_whitespace_in_bucket_name(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) @@ -130,6 +131,7 @@ def test_generate_bucket_url_file_with_whitespace_in_bucket_name(): ) assert udf_path.geturl() == "http://w:write@localhost:6666/default%20default/path/in/bucket/test_file.txt" + def test_generate_bucket_url_file_with_whitespace_in_path_in_bucket(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) @@ -143,6 +145,7 @@ def test_generate_bucket_url_file_with_whitespace_in_path_in_bucket(): ) assert udf_path.geturl() == "http://w:write@localhost:6666/default/path/in/bucket/test%20file.txt" + def test_generate_bucket_url_file_read_only_access(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="r", pwd="read", is_https=False) bucketfs_config = BucketFsConfig(connection_config=connection_config, From a7517f2ea4a7ff3efeaa532a140af68359e28e0a Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Wed, 3 Mar 2021 16:22:05 +0100 Subject: [PATCH 11/24] Hardening Config objects by runtime type checking and read only properties --- .../bucketfs_config.py | 65 ++++- .../bucketfs_utils.py | 6 +- pyproject.toml | 1 + tests/test_bucketfs_config.py | 270 ++++++++++++++++++ tests/test_bucketfs_utils.py | 42 +-- tests/test_upload_download.py | 8 +- 6 files changed, 354 insertions(+), 38 deletions(-) create mode 100644 tests/test_bucketfs_config.py diff --git a/exasol_bucketfs_utils_python/bucketfs_config.py b/exasol_bucketfs_utils_python/bucketfs_config.py index 5b2ef9cb..26881e01 100644 --- a/exasol_bucketfs_utils_python/bucketfs_config.py +++ b/exasol_bucketfs_utils_python/bucketfs_config.py @@ -1,5 +1,7 @@ from typing import Union +from typeguard import typechecked + class BucketFSConnectionConfig: """ @@ -7,21 +9,42 @@ class BucketFSConnectionConfig: to connect to the BucketFS Server via HTTP[s] """ + @typechecked(always=True) def __init__(self, host: str, port: int, user: str, pwd: str, is_https=False): - self.is_https = is_https + self._is_https = is_https if host == "": raise ValueError("Host can't be an empty string") - self.host = host - self.port = port + self._host = host + self._port = port if user not in ["w", "r"]: # The BucketFs currently supports only these two users raise ValueError(f"User can only be, 'w' (read-write access) or 'r' (read-only access), but got {user}") - self.user = user + self._user = user if pwd == "": raise ValueError("Password can't be an empty string") - self.pwd = pwd + self._pwd = pwd + + @property + def is_https(self) -> bool: + return self._is_https + + @property + def host(self) -> str: + return self._host + + @property + def port(self) -> int: + return self._port + + @property + def user(self) -> str: + return self._user + @property + def pwd(self) -> str: + return self._pwd -class BucketFsConfig: + +class BucketFSConfig: """ The BucketFSConfig contains all required information to access it either via HTTP[S] or in the file system inside of UDFs. @@ -29,11 +52,20 @@ class BucketFsConfig: because in UDF we sometimes don't want to use HTTP[S]. """ + @typechecked(always=True) def __init__(self, bucketfs_name: str, connection_config: Union[BucketFSConnectionConfig, None] = None): - self.connection_config = connection_config + self._connection_config = connection_config if bucketfs_name == "": raise ValueError("BucketFS name can't be an empty string") - self.bucketfs_name = bucketfs_name + self._bucketfs_name = bucketfs_name + + @property + def bucketfs_name(self) -> str: + return self._bucketfs_name + + @property + def connection_config(self) -> Union[BucketFSConnectionConfig, None]: + return self._connection_config class BucketConfig: @@ -42,10 +74,17 @@ class BucketConfig: to access it either via HTTP[S] or in the file system inside of UDFs. """ - def __init__(self, bucket_name: str, bucketfs_config: BucketFsConfig): - if bucketfs_config is None: - raise TypeError("bucketfs_config can't be None") + @typechecked(always=True) + def __init__(self, bucket_name: str, bucketfs_config: BucketFSConfig): if bucket_name == "": raise ValueError("Bucket name can't be an empty string") - self.bucket_name = bucket_name - self.bucketfs_config = bucketfs_config + self._bucket_name = bucket_name + self._bucketfs_config = bucketfs_config + + @property + def bucket_name(self) -> str: + return self._bucket_name + + @property + def bucketfs_config(self) -> BucketFSConfig: + return self._bucketfs_config diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py index b025c94b..18aacd26 100644 --- a/exasol_bucketfs_utils_python/bucketfs_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -4,7 +4,7 @@ from requests.auth import HTTPBasicAuth -from exasol_bucketfs_utils_python.bucketfs_config import BucketConfig, BucketFsConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketConfig, BucketFSConfig ARCHIVE_EXTENSIONS = [".tar.gz", ".tar.bz2", ".zip", ".tar"] @@ -32,7 +32,7 @@ def _make_path_relative(path_in_bucket: Union[None, str, PurePosixPath]) -> Pure return path_in_bucket -def generate_bucketfs_udf_path(bucketfs_config: BucketFsConfig) -> PurePosixPath: +def generate_bucketfs_udf_path(bucketfs_config: BucketFSConfig) -> PurePosixPath: """ This function generates the path where UDFs can access the content of a BucketFS in there file system :param bucketfs_config: Config of the BucketFS, the BucketFSConnectionConfig in the BucketFSConfig can None @@ -63,7 +63,7 @@ def generate_bucket_udf_path(bucket_config: BucketConfig, return path -def generate_bucketfs_http_url(bucketfs_config: BucketFsConfig, +def generate_bucketfs_http_url(bucketfs_config: BucketFSConfig, with_credentials: bool = False) -> urllib.parse.ParseResult: """ This function generates the HTTP[s] url for the given BucketFSConfig diff --git a/pyproject.toml b/pyproject.toml index 3e8ab0dc..382e9766 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ keywords = ['exasol', 'bucketfs'] python = ">=3.6.1" requests = "^2.24.0" joblib="^1.0.1" +typeguard = "^2.11.1" [tool.poetry.dev-dependencies] pytest = "^6.1.1" diff --git a/tests/test_bucketfs_config.py b/tests/test_bucketfs_config.py new file mode 100644 index 00000000..eae0a5d8 --- /dev/null +++ b/tests/test_bucketfs_config.py @@ -0,0 +1,270 @@ +import pytest + +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConnectionConfig, BucketFSConfig, BucketConfig + + +def test_bucketfs_connection_config_with_read_user(): + host = "localhost" + port = 6666 + user = "r" + pwd = "read" + is_https = False + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + assert connection_config.host == host and \ + connection_config.port == port and \ + connection_config.user == user and \ + connection_config.pwd == pwd and \ + connection_config.is_https == is_https + + +def test_bucketfs_connection_config_with_write_user(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = False + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + assert connection_config.host == host and \ + connection_config.port == port and \ + connection_config.user == user and \ + connection_config.pwd == pwd and \ + connection_config.is_https == is_https + + +def test_bucketfs_connection_config_with_https(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + assert connection_config.host == host and \ + connection_config.port == port and \ + connection_config.user == user and \ + connection_config.pwd == pwd and \ + connection_config.is_https == is_https + + +def test_bucketfs_connection_config_set_https(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.is_https = False + + +def test_bucketfs_connection_config_set_host(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.host = "testhost" + + +def test_bucketfs_connection_config_set_port(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.port = 7777 + + +def test_bucketfs_connection_config_set_user(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.user = "r" + + +def test_bucketfs_connection_config_set_pwd(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.pwd = "abc" + + +def test_bucketfs_connection_config_with_not_allowed_user(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="abc", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_empty_host(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="", port=6666, user="w", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_empty_user(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_empty_password(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="", + is_https=False) + + +def test_bucketfs_connection_config_with_none_as_host(): + with pytest.raises(TypeError): + connection_config = BucketFSConnectionConfig(host=None, port=6666, user="w", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_none_as_port(): + with pytest.raises(TypeError): + connection_config = BucketFSConnectionConfig(host="localhost", port=None, user="w", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_none_as_user(): + with pytest.raises(TypeError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user=None, pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_none_as_password(): + with pytest.raises(TypeError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd=None, + is_https=False) + + +def test_bucketfs_config_with_empty_bucketfs_name(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_config = BucketFSConfig(bucketfs_name="", connection_config=connection_config) + + +def test_bucketfs_config_with_bucketfs_connection_config(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_name = "bfsdefault" + bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name, connection_config=connection_config) + + assert bucketfs_config.bucketfs_name == bucketfs_name and \ + bucketfs_config.connection_config == connection_config + + +def test_bucketfs_config_without_bucketfs_connection_config(): + bucketfs_name = "bfsdefault" + bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name) + assert bucketfs_config.bucketfs_name == bucketfs_name and \ + bucketfs_config.connection_config == None + + +def test_bucketfs_config_with_none_as_bucketfs_name(): + with pytest.raises(TypeError): + bucketfs_name = None + bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name) + + +def test_bucket_config_with_bucketfs_config(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + + bucket_name = "default" + bucket_config = BucketConfig(bucket_name=bucket_name, bucketfs_config=bucketfs_config) + + assert bucket_config.bucket_name == bucket_name and \ + bucket_config.bucketfs_config == bucketfs_config + + +def test_bucketfs_config_set_bucketfs_name(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_name = "bfsdefault" + bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name, connection_config=connection_config) + + with pytest.raises(AttributeError): + bucketfs_config.bucketfs_name = "test" + + +def test_bucketfs_config_set_bucketfs_connection_config(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_name = "bfsdefault" + bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name, connection_config=connection_config) + + with pytest.raises(AttributeError): + bucketfs_config.connection_config = None + + +def test_bucket_config_with_empty_bucket_name(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + + with pytest.raises(ValueError): + bucket_config = BucketConfig(bucket_name="", bucketfs_config=bucketfs_config) + + +def test_bucket_config_set_bucket_name(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + + with pytest.raises(AttributeError): + bucket_config.bucket_name = "test" + + +def test_bucket_config_set_bucketfs_config(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + + with pytest.raises(AttributeError): + bucket_config.bucketfs_config = bucketfs_config + + +def test_bucket_config_with_empty_bucketfs_config(): + with pytest.raises(TypeError): + bucket_config = BucketConfig(bucket_name="", bucketfs_config=None) + + +def test_bucket_config_with_None_as_bucket_name(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + + with pytest.raises(TypeError): + bucket_config = BucketConfig(bucket_name=None, bucketfs_config=bucketfs_config) diff --git a/tests/test_bucketfs_utils.py b/tests/test_bucketfs_utils.py index 85982d69..6efc0790 100644 --- a/tests/test_bucketfs_utils.py +++ b/tests/test_bucketfs_utils.py @@ -1,12 +1,12 @@ import pytest from exasol_bucketfs_utils_python import bucketfs_utils -from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConnectionConfig, BucketFsConfig, BucketConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConnectionConfig, BucketFSConfig, BucketConfig def test_generate_bucket_udf_path_non_archive_file(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) udf_path = bucketfs_utils.generate_bucket_udf_path( @@ -18,7 +18,7 @@ def test_generate_bucket_udf_path_non_archive_file(): def test_generate_bucket_udf_path_trailing_slash(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) udf_path = bucketfs_utils.generate_bucket_udf_path( @@ -31,7 +31,7 @@ def test_generate_bucket_udf_path_trailing_slash(): @pytest.mark.parametrize("extension", ["tar.gz", "zip", "tar.bz2", "tar"]) def test_generate_bucket_udf_path_archive_tar(extension): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) udf_path = bucketfs_utils.generate_bucket_udf_path( @@ -43,7 +43,7 @@ def test_generate_bucket_udf_path_archive_tar(extension): def test_generate_bucket_url_file_write_access(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) udf_path = bucketfs_utils.generate_bucket_http_url( @@ -55,7 +55,7 @@ def test_generate_bucket_url_file_write_access(): def test_generate_bucket_url_file_trailing_slash(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) udf_path = bucketfs_utils.generate_bucket_http_url( @@ -67,7 +67,7 @@ def test_generate_bucket_url_file_trailing_slash(): def test_generate_bucket_url_file_with_credentials(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) udf_path = bucketfs_utils.generate_bucket_http_url( @@ -80,7 +80,7 @@ def test_generate_bucket_url_file_with_credentials(): def test_generate_bucket_url_file_with_ip(): connection_config = BucketFSConnectionConfig(host="127.0.0.1", port=6666, user="w", pwd="write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) udf_path = bucketfs_utils.generate_bucket_http_url( @@ -93,7 +93,7 @@ def test_generate_bucket_url_file_with_ip(): def test_generate_bucket_url_file_with_whitespace_in_host(): connection_config = BucketFSConnectionConfig(host="local host", port=6666, user="w", pwd="write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) udf_path = bucketfs_utils.generate_bucket_http_url( @@ -107,7 +107,7 @@ def test_generate_bucket_url_file_with_whitespace_in_host(): def test_generate_bucket_url_file_with_whitespace_in_password(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) udf_path = bucketfs_utils.generate_bucket_http_url( @@ -121,7 +121,7 @@ def test_generate_bucket_url_file_with_whitespace_in_password(): def test_generate_bucket_url_file_with_whitespace_in_bucket_name(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default default", bucketfs_config=bucketfs_config) udf_path = bucketfs_utils.generate_bucket_http_url( @@ -135,7 +135,7 @@ def test_generate_bucket_url_file_with_whitespace_in_bucket_name(): def test_generate_bucket_url_file_with_whitespace_in_path_in_bucket(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) udf_path = bucketfs_utils.generate_bucket_http_url( @@ -148,7 +148,7 @@ def test_generate_bucket_url_file_with_whitespace_in_path_in_bucket(): def test_generate_bucket_url_file_read_only_access(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="r", pwd="read", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) udf_path = bucketfs_utils.generate_bucket_http_url( @@ -158,8 +158,14 @@ def test_generate_bucket_url_file_read_only_access(): ) assert udf_path.geturl() == "http://r:read@localhost:6666/default/path/in/bucket/test_file.txt" - -def test_generate_bucket_url_file_with_not_allowed_user(): - with pytest.raises(ValueError): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="abc", pwd="write", - is_https=False) +def test_generate_bucket_url_file_https(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="r", pwd="read", is_https=True) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "https://r:read@localhost:6666/default/path/in/bucket/test_file.txt" diff --git a/tests/test_upload_download.py b/tests/test_upload_download.py index ecd0ecf3..c79e3fd7 100644 --- a/tests/test_upload_download.py +++ b/tests/test_upload_download.py @@ -2,12 +2,12 @@ from tempfile import NamedTemporaryFile from exasol_bucketfs_utils_python import upload, download -from exasol_bucketfs_utils_python.bucketfs_config import BucketFsConfig, BucketFSConnectionConfig, BucketConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig, BucketFSConnectionConfig, BucketConfig def test_file_upload_download(): connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) with NamedTemporaryFile() as input_temp_file: @@ -32,7 +32,7 @@ def test_file_upload_download(): def test_fileobj_upload_download(): connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) with NamedTemporaryFile() as input_temp_file: @@ -59,7 +59,7 @@ def test_fileobj_upload_download(): def test_string_upload_download(): connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) - bucketfs_config = BucketFsConfig(connection_config=connection_config, + bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) test_string = "test_string" From 342af5dbcc325a7c30179d377e6c09e7cf441208 Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Wed, 3 Mar 2021 16:39:17 +0100 Subject: [PATCH 12/24] Introduce runtime type checking to bucketfs_utils.py --- exasol_bucketfs_utils_python/bucketfs_utils.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py index 18aacd26..82336a79 100644 --- a/exasol_bucketfs_utils_python/bucketfs_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -3,6 +3,7 @@ from typing import Union from requests.auth import HTTPBasicAuth +from typeguard import typechecked from exasol_bucketfs_utils_python.bucketfs_config import BucketConfig, BucketFSConfig @@ -31,7 +32,7 @@ def _make_path_relative(path_in_bucket: Union[None, str, PurePosixPath]) -> Pure path_in_bucket = path_in_bucket.relative_to(PurePosixPath("/")) return path_in_bucket - +@typechecked(always=True) def generate_bucketfs_udf_path(bucketfs_config: BucketFSConfig) -> PurePosixPath: """ This function generates the path where UDFs can access the content of a BucketFS in there file system @@ -41,7 +42,7 @@ def generate_bucketfs_udf_path(bucketfs_config: BucketFSConfig) -> PurePosixPath path = PurePosixPath("/buckets/", bucketfs_config.bucketfs_name) return path - +@typechecked(always=True) def generate_bucket_udf_path(bucket_config: BucketConfig, path_in_bucket: Union[None, str, PurePosixPath]) -> PurePosixPath: """ @@ -62,7 +63,7 @@ def generate_bucket_udf_path(bucket_config: BucketConfig, path = PurePosixPath(path, path_in_bucket) return path - +@typechecked(always=True) def generate_bucketfs_http_url(bucketfs_config: BucketFSConfig, with_credentials: bool = False) -> urllib.parse.ParseResult: """ @@ -73,7 +74,7 @@ def generate_bucketfs_http_url(bucketfs_config: BucketFSConfig, :return: HTTP[S] URL of the BucketFS """ if bucketfs_config.connection_config is None: - raise ValueError("bucket_config.bucketfs_config.connection_config can't be None for this operations") + raise ValueError("bucket_config.bucketfs_config.connection_config can't be None for this operation") if with_credentials: encoded_password = _encode_url_part(bucketfs_config.connection_config.pwd) encoded_user = _encode_url_part(bucketfs_config.connection_config.user) @@ -90,7 +91,7 @@ def generate_bucketfs_http_url(bucketfs_config: BucketFSConfig, urlparse = urllib.parse.urlparse(url) return urlparse - +@typechecked(always=True) def generate_bucket_http_url(bucket_config: BucketConfig, path_in_bucket: Union[None, str, PurePosixPath], with_credentials: bool = False) -> urllib.parse.ParseResult: """ @@ -118,7 +119,7 @@ def generate_bucket_http_url(bucket_config: BucketConfig, path_in_bucket: Union[ def create_auth_object(bucket_config: BucketConfig) -> HTTPBasicAuth: if bucket_config.bucketfs_config.connection_config is None: - raise TypeError("bucket_config.bucketfs_config.connection_config can't be None for this operations") + raise TypeError("bucket_config.bucketfs_config.connection_config can't be None for this operation") auth = HTTPBasicAuth( bucket_config.bucketfs_config.connection_config.user, bucket_config.bucketfs_config.connection_config.pwd) From 76ef23b4364a8d8a3c81de7a1873d7a89e941904 Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Wed, 3 Mar 2021 17:21:10 +0100 Subject: [PATCH 13/24] Fix port type in test_upload_download.py --- tests/test_upload_download.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_upload_download.py b/tests/test_upload_download.py index c79e3fd7..202df299 100644 --- a/tests/test_upload_download.py +++ b/tests/test_upload_download.py @@ -6,7 +6,7 @@ def test_file_upload_download(): - connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) @@ -31,7 +31,7 @@ def test_file_upload_download(): def test_fileobj_upload_download(): - connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) @@ -58,7 +58,7 @@ def test_fileobj_upload_download(): def test_string_upload_download(): - connection_config = BucketFSConnectionConfig(host="localhost", port="6666", user="w", pwd="write", is_https=False) + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) From b9a222082df9a0096be9cb8439ec7e7cdcfa621b Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Wed, 3 Mar 2021 17:28:06 +0100 Subject: [PATCH 14/24] Add --cached to git diff of setup.py in check_setup_py.yaml --- .github/workflows/check_setup_py.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check_setup_py.yaml b/.github/workflows/check_setup_py.yaml index b512a1b5..eec2d710 100644 --- a/.github/workflows/check_setup_py.yaml +++ b/.github/workflows/check_setup_py.yaml @@ -21,7 +21,7 @@ jobs: - name: Show changes on working copy run: git status --porcelain=v1 -uno - name: Show diff on working copy - run: git diff + run: git diff --cached; cat setup.py - name: Check if setup.py changed run: | [ -z "$(git status --porcelain=v1 -uno 2>/dev/null)" ] From b1b4ce073671ccf1609fd75225168d05632a5ccd Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Wed, 3 Mar 2021 17:32:07 +0100 Subject: [PATCH 15/24] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f4f0fe12..cbd1a2c9 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ {'': ['*']} install_requires = \ -['joblib>=1.0.1,<2.0.0', 'requests>=2.24.0,<3.0.0'] +['joblib>=1.0.1,<2.0.0', 'requests>=2.24.0,<3.0.0', 'typeguard>=2.11.1,<3.0.0'] setup_kwargs = { 'name': 'exasol-bucketfs-utils-python', From 9ee3f224b19fda27a84c4c86f9c13d1186d155d8 Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Wed, 3 Mar 2021 17:38:07 +0100 Subject: [PATCH 16/24] Extract BucketConfig and BucketFSConnectionConfig from bucket_config.py --- exasol_bucketfs_utils_python/bucket_config.py | 25 ++ .../bucketfs_config.py | 61 +---- .../bucketfs_connection_config.py | 42 ++++ .../bucketfs_utils.py | 3 +- exasol_bucketfs_utils_python/download.py | 2 +- exasol_bucketfs_utils_python/upload.py | 2 +- tests/test_bucket_config.py | 65 ++++++ tests/test_bucketfs_config.py | 216 +----------------- tests/test_bucketfs_connection_config.py | 156 +++++++++++++ tests/test_bucketfs_utils.py | 4 +- tests/test_upload_download.py | 4 +- 11 files changed, 301 insertions(+), 279 deletions(-) create mode 100644 exasol_bucketfs_utils_python/bucket_config.py create mode 100644 exasol_bucketfs_utils_python/bucketfs_connection_config.py create mode 100644 tests/test_bucket_config.py create mode 100644 tests/test_bucketfs_connection_config.py diff --git a/exasol_bucketfs_utils_python/bucket_config.py b/exasol_bucketfs_utils_python/bucket_config.py new file mode 100644 index 00000000..df0e62bd --- /dev/null +++ b/exasol_bucketfs_utils_python/bucket_config.py @@ -0,0 +1,25 @@ +from typeguard import typechecked + +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig + + +class BucketConfig: + """ + The BucketConfig contains all required information about a BucketFS + to access it either via HTTP[S] or in the file system inside of UDFs. + """ + + @typechecked(always=True) + def __init__(self, bucket_name: str, bucketfs_config: BucketFSConfig): + if bucket_name == "": + raise ValueError("Bucket name can't be an empty string") + self._bucket_name = bucket_name + self._bucketfs_config = bucketfs_config + + @property + def bucket_name(self) -> str: + return self._bucket_name + + @property + def bucketfs_config(self) -> BucketFSConfig: + return self._bucketfs_config \ No newline at end of file diff --git a/exasol_bucketfs_utils_python/bucketfs_config.py b/exasol_bucketfs_utils_python/bucketfs_config.py index 26881e01..95876cea 100644 --- a/exasol_bucketfs_utils_python/bucketfs_config.py +++ b/exasol_bucketfs_utils_python/bucketfs_config.py @@ -2,46 +2,7 @@ from typeguard import typechecked - -class BucketFSConnectionConfig: - """ - The BucketFSConnectionConfig contains all necessary information - to connect to the BucketFS Server via HTTP[s] - """ - - @typechecked(always=True) - def __init__(self, host: str, port: int, user: str, pwd: str, is_https=False): - self._is_https = is_https - if host == "": - raise ValueError("Host can't be an empty string") - self._host = host - self._port = port - if user not in ["w", "r"]: # The BucketFs currently supports only these two users - raise ValueError(f"User can only be, 'w' (read-write access) or 'r' (read-only access), but got {user}") - self._user = user - if pwd == "": - raise ValueError("Password can't be an empty string") - self._pwd = pwd - - @property - def is_https(self) -> bool: - return self._is_https - - @property - def host(self) -> str: - return self._host - - @property - def port(self) -> int: - return self._port - - @property - def user(self) -> str: - return self._user - - @property - def pwd(self) -> str: - return self._pwd +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig class BucketFSConfig: @@ -68,23 +29,3 @@ def connection_config(self) -> Union[BucketFSConnectionConfig, None]: return self._connection_config -class BucketConfig: - """ - The BucketConfig contains all required information about a BucketFS - to access it either via HTTP[S] or in the file system inside of UDFs. - """ - - @typechecked(always=True) - def __init__(self, bucket_name: str, bucketfs_config: BucketFSConfig): - if bucket_name == "": - raise ValueError("Bucket name can't be an empty string") - self._bucket_name = bucket_name - self._bucketfs_config = bucketfs_config - - @property - def bucket_name(self) -> str: - return self._bucket_name - - @property - def bucketfs_config(self) -> BucketFSConfig: - return self._bucketfs_config diff --git a/exasol_bucketfs_utils_python/bucketfs_connection_config.py b/exasol_bucketfs_utils_python/bucketfs_connection_config.py new file mode 100644 index 00000000..1b679f3f --- /dev/null +++ b/exasol_bucketfs_utils_python/bucketfs_connection_config.py @@ -0,0 +1,42 @@ +from typeguard import typechecked + + +class BucketFSConnectionConfig: + """ + The BucketFSConnectionConfig contains all necessary information + to connect to the BucketFS Server via HTTP[s] + """ + + @typechecked(always=True) + def __init__(self, host: str, port: int, user: str, pwd: str, is_https=False): + self._is_https = is_https + if host == "": + raise ValueError("Host can't be an empty string") + self._host = host + self._port = port + if user not in ["w", "r"]: # The BucketFs currently supports only these two users + raise ValueError(f"User can only be, 'w' (read-write access) or 'r' (read-only access), but got {user}") + self._user = user + if pwd == "": + raise ValueError("Password can't be an empty string") + self._pwd = pwd + + @property + def is_https(self) -> bool: + return self._is_https + + @property + def host(self) -> str: + return self._host + + @property + def port(self) -> int: + return self._port + + @property + def user(self) -> str: + return self._user + + @property + def pwd(self) -> str: + return self._pwd \ No newline at end of file diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py index 82336a79..a9c3a93b 100644 --- a/exasol_bucketfs_utils_python/bucketfs_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -5,7 +5,8 @@ from requests.auth import HTTPBasicAuth from typeguard import typechecked -from exasol_bucketfs_utils_python.bucketfs_config import BucketConfig, BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucket_config import BucketConfig ARCHIVE_EXTENSIONS = [".tar.gz", ".tar.bz2", ".zip", ".tar"] diff --git a/exasol_bucketfs_utils_python/download.py b/exasol_bucketfs_utils_python/download.py index f8066cc4..daa5866a 100644 --- a/exasol_bucketfs_utils_python/download.py +++ b/exasol_bucketfs_utils_python/download.py @@ -6,7 +6,7 @@ import requests from exasol_bucketfs_utils_python import bucketfs_utils -from exasol_bucketfs_utils_python.bucketfs_config import BucketConfig +from exasol_bucketfs_utils_python.bucket_config import BucketConfig from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_http_url diff --git a/exasol_bucketfs_utils_python/upload.py b/exasol_bucketfs_utils_python/upload.py index 4f082bfb..be80cbbf 100644 --- a/exasol_bucketfs_utils_python/upload.py +++ b/exasol_bucketfs_utils_python/upload.py @@ -6,7 +6,7 @@ import requests from exasol_bucketfs_utils_python import bucketfs_utils -from exasol_bucketfs_utils_python.bucketfs_config import BucketConfig +from exasol_bucketfs_utils_python.bucket_config import BucketConfig from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_http_url, generate_bucket_udf_path diff --git a/tests/test_bucket_config.py b/tests/test_bucket_config.py new file mode 100644 index 00000000..ad07dfc6 --- /dev/null +++ b/tests/test_bucket_config.py @@ -0,0 +1,65 @@ +import pytest + +from exasol_bucketfs_utils_python.bucket_config import BucketConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig + + +def test_bucket_config_with_bucketfs_config(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + + bucket_name = "default" + bucket_config = BucketConfig(bucket_name=bucket_name, bucketfs_config=bucketfs_config) + + assert bucket_config.bucket_name == bucket_name and \ + bucket_config.bucketfs_config == bucketfs_config + + +def test_bucket_config_with_empty_bucket_name(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + + with pytest.raises(ValueError): + bucket_config = BucketConfig(bucket_name="", bucketfs_config=bucketfs_config) + + +def test_bucket_config_set_bucket_name(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + + with pytest.raises(AttributeError): + bucket_config.bucket_name = "test" + + +def test_bucket_config_set_bucketfs_config(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + + with pytest.raises(AttributeError): + bucket_config.bucketfs_config = bucketfs_config + + +def test_bucket_config_with_empty_bucketfs_config(): + with pytest.raises(TypeError): + bucket_config = BucketConfig(bucket_name="", bucketfs_config=None) + + +def test_bucket_config_with_None_as_bucket_name(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + + bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + + with pytest.raises(TypeError): + bucket_config = BucketConfig(bucket_name=None, bucketfs_config=bucketfs_config) diff --git a/tests/test_bucketfs_config.py b/tests/test_bucketfs_config.py index eae0a5d8..b05e8949 100644 --- a/tests/test_bucketfs_config.py +++ b/tests/test_bucketfs_config.py @@ -1,159 +1,7 @@ import pytest -from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConnectionConfig, BucketFSConfig, BucketConfig - - -def test_bucketfs_connection_config_with_read_user(): - host = "localhost" - port = 6666 - user = "r" - pwd = "read" - is_https = False - connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, - is_https=is_https) - assert connection_config.host == host and \ - connection_config.port == port and \ - connection_config.user == user and \ - connection_config.pwd == pwd and \ - connection_config.is_https == is_https - - -def test_bucketfs_connection_config_with_write_user(): - host = "localhost" - port = 6666 - user = "w" - pwd = "write" - is_https = False - connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, - is_https=is_https) - assert connection_config.host == host and \ - connection_config.port == port and \ - connection_config.user == user and \ - connection_config.pwd == pwd and \ - connection_config.is_https == is_https - - -def test_bucketfs_connection_config_with_https(): - host = "localhost" - port = 6666 - user = "w" - pwd = "write" - is_https = True - connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, - is_https=is_https) - assert connection_config.host == host and \ - connection_config.port == port and \ - connection_config.user == user and \ - connection_config.pwd == pwd and \ - connection_config.is_https == is_https - - -def test_bucketfs_connection_config_set_https(): - host = "localhost" - port = 6666 - user = "w" - pwd = "write" - is_https = True - connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, - is_https=is_https) - with pytest.raises(AttributeError): - connection_config.is_https = False - - -def test_bucketfs_connection_config_set_host(): - host = "localhost" - port = 6666 - user = "w" - pwd = "write" - is_https = True - connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, - is_https=is_https) - with pytest.raises(AttributeError): - connection_config.host = "testhost" - - -def test_bucketfs_connection_config_set_port(): - host = "localhost" - port = 6666 - user = "w" - pwd = "write" - is_https = True - connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, - is_https=is_https) - with pytest.raises(AttributeError): - connection_config.port = 7777 - - -def test_bucketfs_connection_config_set_user(): - host = "localhost" - port = 6666 - user = "w" - pwd = "write" - is_https = True - connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, - is_https=is_https) - with pytest.raises(AttributeError): - connection_config.user = "r" - - -def test_bucketfs_connection_config_set_pwd(): - host = "localhost" - port = 6666 - user = "w" - pwd = "write" - is_https = True - connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, - is_https=is_https) - with pytest.raises(AttributeError): - connection_config.pwd = "abc" - - -def test_bucketfs_connection_config_with_not_allowed_user(): - with pytest.raises(ValueError): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="abc", pwd="write", - is_https=False) - - -def test_bucketfs_connection_config_with_empty_host(): - with pytest.raises(ValueError): - connection_config = BucketFSConnectionConfig(host="", port=6666, user="w", pwd="write", - is_https=False) - - -def test_bucketfs_connection_config_with_empty_user(): - with pytest.raises(ValueError): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="", pwd="write", - is_https=False) - - -def test_bucketfs_connection_config_with_empty_password(): - with pytest.raises(ValueError): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="", - is_https=False) - - -def test_bucketfs_connection_config_with_none_as_host(): - with pytest.raises(TypeError): - connection_config = BucketFSConnectionConfig(host=None, port=6666, user="w", pwd="write", - is_https=False) - - -def test_bucketfs_connection_config_with_none_as_port(): - with pytest.raises(TypeError): - connection_config = BucketFSConnectionConfig(host="localhost", port=None, user="w", pwd="write", - is_https=False) - - -def test_bucketfs_connection_config_with_none_as_user(): - with pytest.raises(TypeError): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user=None, pwd="write", - is_https=False) - - -def test_bucketfs_connection_config_with_none_as_password(): - with pytest.raises(TypeError): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd=None, - is_https=False) +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig def test_bucketfs_config_with_empty_bucketfs_name(): @@ -188,19 +36,6 @@ def test_bucketfs_config_with_none_as_bucketfs_name(): bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name) -def test_bucket_config_with_bucketfs_config(): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", - is_https=False) - - bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) - - bucket_name = "default" - bucket_config = BucketConfig(bucket_name=bucket_name, bucketfs_config=bucketfs_config) - - assert bucket_config.bucket_name == bucket_name and \ - bucket_config.bucketfs_config == bucketfs_config - - def test_bucketfs_config_set_bucketfs_name(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) @@ -221,50 +56,3 @@ def test_bucketfs_config_set_bucketfs_connection_config(): with pytest.raises(AttributeError): bucketfs_config.connection_config = None - - -def test_bucket_config_with_empty_bucket_name(): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", - is_https=False) - - bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) - - with pytest.raises(ValueError): - bucket_config = BucketConfig(bucket_name="", bucketfs_config=bucketfs_config) - - -def test_bucket_config_set_bucket_name(): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", - is_https=False) - - bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) - bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) - - with pytest.raises(AttributeError): - bucket_config.bucket_name = "test" - - -def test_bucket_config_set_bucketfs_config(): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", - is_https=False) - - bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) - bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) - - with pytest.raises(AttributeError): - bucket_config.bucketfs_config = bucketfs_config - - -def test_bucket_config_with_empty_bucketfs_config(): - with pytest.raises(TypeError): - bucket_config = BucketConfig(bucket_name="", bucketfs_config=None) - - -def test_bucket_config_with_None_as_bucket_name(): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", - is_https=False) - - bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) - - with pytest.raises(TypeError): - bucket_config = BucketConfig(bucket_name=None, bucketfs_config=bucketfs_config) diff --git a/tests/test_bucketfs_connection_config.py b/tests/test_bucketfs_connection_config.py new file mode 100644 index 00000000..b1eca01b --- /dev/null +++ b/tests/test_bucketfs_connection_config.py @@ -0,0 +1,156 @@ +import pytest + +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig + + +def test_bucketfs_connection_config_with_read_user(): + host = "localhost" + port = 6666 + user = "r" + pwd = "read" + is_https = False + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + assert connection_config.host == host and \ + connection_config.port == port and \ + connection_config.user == user and \ + connection_config.pwd == pwd and \ + connection_config.is_https == is_https + + +def test_bucketfs_connection_config_with_write_user(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = False + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + assert connection_config.host == host and \ + connection_config.port == port and \ + connection_config.user == user and \ + connection_config.pwd == pwd and \ + connection_config.is_https == is_https + + +def test_bucketfs_connection_config_with_https(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + assert connection_config.host == host and \ + connection_config.port == port and \ + connection_config.user == user and \ + connection_config.pwd == pwd and \ + connection_config.is_https == is_https + + +def test_bucketfs_connection_config_set_https(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.is_https = False + + +def test_bucketfs_connection_config_set_host(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.host = "testhost" + + +def test_bucketfs_connection_config_set_port(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.port = 7777 + + +def test_bucketfs_connection_config_set_user(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.user = "r" + + +def test_bucketfs_connection_config_set_pwd(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.pwd = "abc" + + +def test_bucketfs_connection_config_with_not_allowed_user(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="abc", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_empty_host(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="", port=6666, user="w", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_empty_user(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_empty_password(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="", + is_https=False) + + +def test_bucketfs_connection_config_with_none_as_host(): + with pytest.raises(TypeError): + connection_config = BucketFSConnectionConfig(host=None, port=6666, user="w", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_none_as_port(): + with pytest.raises(TypeError): + connection_config = BucketFSConnectionConfig(host="localhost", port=None, user="w", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_none_as_user(): + with pytest.raises(TypeError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user=None, pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_none_as_password(): + with pytest.raises(TypeError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd=None, + is_https=False) diff --git a/tests/test_bucketfs_utils.py b/tests/test_bucketfs_utils.py index 6efc0790..6413bf76 100644 --- a/tests/test_bucketfs_utils.py +++ b/tests/test_bucketfs_utils.py @@ -1,7 +1,9 @@ import pytest from exasol_bucketfs_utils_python import bucketfs_utils -from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConnectionConfig, BucketFSConfig, BucketConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig +from exasol_bucketfs_utils_python.bucket_config import BucketConfig def test_generate_bucket_udf_path_non_archive_file(): diff --git a/tests/test_upload_download.py b/tests/test_upload_download.py index 202df299..de3b5111 100644 --- a/tests/test_upload_download.py +++ b/tests/test_upload_download.py @@ -2,7 +2,9 @@ from tempfile import NamedTemporaryFile from exasol_bucketfs_utils_python import upload, download -from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig, BucketFSConnectionConfig, BucketConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig +from exasol_bucketfs_utils_python.bucket_config import BucketConfig def test_file_upload_download(): From 20254914baa4ce5765225a8c5b5dae53a65cbe3b Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Tue, 9 Mar 2021 15:44:12 +0100 Subject: [PATCH 17/24] Apply suggestions from code review Co-authored-by: Anastasiia Sergienko <46891819+AnastasiiaSergienko@users.noreply.github.com> --- exasol_bucketfs_utils_python/bucketfs_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py index a9c3a93b..7f7ed695 100644 --- a/exasol_bucketfs_utils_python/bucketfs_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -37,7 +37,7 @@ def _make_path_relative(path_in_bucket: Union[None, str, PurePosixPath]) -> Pure def generate_bucketfs_udf_path(bucketfs_config: BucketFSConfig) -> PurePosixPath: """ This function generates the path where UDFs can access the content of a BucketFS in there file system - :param bucketfs_config: Config of the BucketFS, the BucketFSConnectionConfig in the BucketFSConfig can None + :param bucketfs_config: Config of the BucketFS, the BucketFSConnectionConfig in the BucketFSConfig can be None :return: Path of the given BucketFS in the file system of UDFs """ path = PurePosixPath("/buckets/", bucketfs_config.bucketfs_name) From 8eb56bacc313676d29cd30c201639b4fa01830cd Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Tue, 9 Mar 2021 15:58:40 +0100 Subject: [PATCH 18/24] Fix bucketfs archive file extensions --- exasol_bucketfs_utils_python/bucketfs_utils.py | 2 +- tests/test_bucketfs_utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py index 7f7ed695..bbe9e807 100644 --- a/exasol_bucketfs_utils_python/bucketfs_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -8,7 +8,7 @@ from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig from exasol_bucketfs_utils_python.bucket_config import BucketConfig -ARCHIVE_EXTENSIONS = [".tar.gz", ".tar.bz2", ".zip", ".tar"] +ARCHIVE_EXTENSIONS = [".tar.gz", ".tgz", ".zip", ".tar"] def _encode_url_part(part: str) -> str: diff --git a/tests/test_bucketfs_utils.py b/tests/test_bucketfs_utils.py index 6413bf76..0fba077d 100644 --- a/tests/test_bucketfs_utils.py +++ b/tests/test_bucketfs_utils.py @@ -30,8 +30,8 @@ def test_generate_bucket_udf_path_trailing_slash(): assert str(udf_path) == "/buckets/bfsdefault/default/path/in/bucket/test_file.txt" -@pytest.mark.parametrize("extension", ["tar.gz", "zip", "tar.bz2", "tar"]) -def test_generate_bucket_udf_path_archive_tar(extension): +@pytest.mark.parametrize("extension", ["tar.gz", "zip", "tgz", "tar"]) +def test_generate_bucket_udf_path_archive(extension): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") From 4f64bbeb28ffdc2f4f7745877c5d6a87de4a5f31 Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Tue, 9 Mar 2021 16:12:36 +0100 Subject: [PATCH 19/24] Refactor tests --- tests/test_bucket_config.py | 26 ++++++++++---------------- tests/test_bucketfs_config.py | 21 +++++++++++---------- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/tests/test_bucket_config.py b/tests/test_bucket_config.py index ad07dfc6..28ab476e 100644 --- a/tests/test_bucket_config.py +++ b/tests/test_bucket_config.py @@ -5,11 +5,15 @@ from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig -def test_bucket_config_with_bucketfs_config(): +def create_test_bucketfs_config(): connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) - bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + return bucketfs_config + + +def test_bucket_config_with_bucketfs_config(): + bucketfs_config = create_test_bucketfs_config() bucket_name = "default" bucket_config = BucketConfig(bucket_name=bucket_name, bucketfs_config=bucketfs_config) @@ -19,20 +23,15 @@ def test_bucket_config_with_bucketfs_config(): def test_bucket_config_with_empty_bucket_name(): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", - is_https=False) - - bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + bucketfs_config = create_test_bucketfs_config() with pytest.raises(ValueError): bucket_config = BucketConfig(bucket_name="", bucketfs_config=bucketfs_config) def test_bucket_config_set_bucket_name(): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", - is_https=False) + bucketfs_config = create_test_bucketfs_config() - bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) with pytest.raises(AttributeError): @@ -40,10 +39,8 @@ def test_bucket_config_set_bucket_name(): def test_bucket_config_set_bucketfs_config(): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", - is_https=False) + bucketfs_config = create_test_bucketfs_config() - bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) with pytest.raises(AttributeError): @@ -56,10 +53,7 @@ def test_bucket_config_with_empty_bucketfs_config(): def test_bucket_config_with_None_as_bucket_name(): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", - is_https=False) - - bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + bucketfs_config = create_test_bucketfs_config() with pytest.raises(TypeError): bucket_config = BucketConfig(bucket_name=None, bucketfs_config=bucketfs_config) diff --git a/tests/test_bucketfs_config.py b/tests/test_bucketfs_config.py index b05e8949..d5c51481 100644 --- a/tests/test_bucketfs_config.py +++ b/tests/test_bucketfs_config.py @@ -4,17 +4,20 @@ from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig +def create_test_connection_config(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + return connection_config + + def test_bucketfs_config_with_empty_bucketfs_name(): + connection_config = create_test_connection_config() with pytest.raises(ValueError): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", - is_https=False) - bucketfs_config = BucketFSConfig(bucketfs_name="", connection_config=connection_config) def test_bucketfs_config_with_bucketfs_connection_config(): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", - is_https=False) + connection_config = create_test_connection_config() bucketfs_name = "bfsdefault" bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name, connection_config=connection_config) @@ -31,14 +34,13 @@ def test_bucketfs_config_without_bucketfs_connection_config(): def test_bucketfs_config_with_none_as_bucketfs_name(): + bucketfs_name = None with pytest.raises(TypeError): - bucketfs_name = None bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name) def test_bucketfs_config_set_bucketfs_name(): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", - is_https=False) + connection_config = create_test_connection_config() bucketfs_name = "bfsdefault" bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name, connection_config=connection_config) @@ -48,8 +50,7 @@ def test_bucketfs_config_set_bucketfs_name(): def test_bucketfs_config_set_bucketfs_connection_config(): - connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", - is_https=False) + connection_config = create_test_connection_config() bucketfs_name = "bfsdefault" bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name, connection_config=connection_config) From 1e5386248230187d6b843a6c99c6db7867846f50 Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Tue, 9 Mar 2021 16:16:22 +0100 Subject: [PATCH 20/24] Fix review suggestions in bucketfs_utils.py --- exasol_bucketfs_utils_python/bucketfs_utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py index bbe9e807..d79fe5e9 100644 --- a/exasol_bucketfs_utils_python/bucketfs_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -5,8 +5,8 @@ from requests.auth import HTTPBasicAuth from typeguard import typechecked -from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig from exasol_bucketfs_utils_python.bucket_config import BucketConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig ARCHIVE_EXTENSIONS = [".tar.gz", ".tgz", ".zip", ".tar"] @@ -18,11 +18,9 @@ def _encode_url_part(part: str) -> str: def _correct_path_in_bucket_for_archives(path_in_bucket: PurePosixPath) -> PurePosixPath: for extension in ARCHIVE_EXTENSIONS: - print(path_in_bucket.name) if path_in_bucket.name.endswith(extension): path_in_bucket = PurePosixPath(path_in_bucket.parent, path_in_bucket.name[:-len(extension)]) - print(path_in_bucket) break return path_in_bucket @@ -33,6 +31,7 @@ def _make_path_relative(path_in_bucket: Union[None, str, PurePosixPath]) -> Pure path_in_bucket = path_in_bucket.relative_to(PurePosixPath("/")) return path_in_bucket + @typechecked(always=True) def generate_bucketfs_udf_path(bucketfs_config: BucketFSConfig) -> PurePosixPath: """ @@ -43,6 +42,7 @@ def generate_bucketfs_udf_path(bucketfs_config: BucketFSConfig) -> PurePosixPath path = PurePosixPath("/buckets/", bucketfs_config.bucketfs_name) return path + @typechecked(always=True) def generate_bucket_udf_path(bucket_config: BucketConfig, path_in_bucket: Union[None, str, PurePosixPath]) -> PurePosixPath: @@ -64,6 +64,7 @@ def generate_bucket_udf_path(bucket_config: BucketConfig, path = PurePosixPath(path, path_in_bucket) return path + @typechecked(always=True) def generate_bucketfs_http_url(bucketfs_config: BucketFSConfig, with_credentials: bool = False) -> urllib.parse.ParseResult: @@ -92,6 +93,7 @@ def generate_bucketfs_http_url(bucketfs_config: BucketFSConfig, urlparse = urllib.parse.urlparse(url) return urlparse + @typechecked(always=True) def generate_bucket_http_url(bucket_config: BucketConfig, path_in_bucket: Union[None, str, PurePosixPath], with_credentials: bool = False) -> urllib.parse.ParseResult: @@ -118,6 +120,7 @@ def generate_bucket_http_url(bucket_config: BucketConfig, path_in_bucket: Union[ return urlparse +@typechecked(always=True) def create_auth_object(bucket_config: BucketConfig) -> HTTPBasicAuth: if bucket_config.bucketfs_config.connection_config is None: raise TypeError("bucket_config.bucketfs_config.connection_config can't be None for this operation") From 3721e9370220c7f17a838fbdc6867e4771f840f5 Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Tue, 9 Mar 2021 16:19:54 +0100 Subject: [PATCH 21/24] Correct typos in bucketfs_utils.py --- exasol_bucketfs_utils_python/bucketfs_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py index d79fe5e9..d5d4c6d8 100644 --- a/exasol_bucketfs_utils_python/bucketfs_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -35,9 +35,9 @@ def _make_path_relative(path_in_bucket: Union[None, str, PurePosixPath]) -> Pure @typechecked(always=True) def generate_bucketfs_udf_path(bucketfs_config: BucketFSConfig) -> PurePosixPath: """ - This function generates the path where UDFs can access the content of a BucketFS in there file system + This function generates the path where UDFs can access the content of a BucketFS in their file system :param bucketfs_config: Config of the BucketFS, the BucketFSConnectionConfig in the BucketFSConfig can be None - :return: Path of the given BucketFS in the file system of UDFs + :return: Path of the given BucketFS in the file system of the UDFs """ path = PurePosixPath("/buckets/", bucketfs_config.bucketfs_name) return path @@ -48,7 +48,7 @@ def generate_bucket_udf_path(bucket_config: BucketConfig, path_in_bucket: Union[None, str, PurePosixPath]) -> PurePosixPath: """ This function generates the path where UDFs can access the content of a bucket or - the given Path in a bucket in there file system + the given Path in a bucket in their file system :param bucket_config: Config of the Bucket, the BucketFSConnectionConfig in the BucketFSConfig can be None :param path_in_bucket: If not None, path_in_bucket gets concatenated to the path of the bucket :return: Path of the bucket or the file in the Bucket in the file system of UDFs From 7e4f75415ae243cc7d0d6a2fd308e912043ba03e Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Fri, 19 Mar 2021 10:33:19 +0100 Subject: [PATCH 22/24] Add docstrings and type annotations to return types to upload.py. upload_object_to_bucketfs_via_joblib forwards keyword arguments to joblib.dump. --- exasol_bucketfs_utils_python/upload.py | 55 ++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/exasol_bucketfs_utils_python/upload.py b/exasol_bucketfs_utils_python/upload.py index be80cbbf..ca2fef81 100644 --- a/exasol_bucketfs_utils_python/upload.py +++ b/exasol_bucketfs_utils_python/upload.py @@ -1,6 +1,7 @@ -import typing -from pathlib import Path +from pathlib import Path, PurePosixPath from tempfile import NamedTemporaryFile +from typing import Tuple, IO, Any +from urllib.parse import ParseResult import joblib import requests @@ -10,12 +11,29 @@ from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_http_url, generate_bucket_udf_path -def upload_file_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, local_file_path: Path): +def upload_file_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, local_file_path: Path) \ + -> Tuple[ParseResult, PurePosixPath]: + """ + This function uploads a file to the specified path in bucket of the BucketFS. + :param bucket_config: BucketConfig for the destination bucket + :param bucket_file_path: Path in the bucket to upload the file to + :param local_file_path: File path to the local file + :return: The URL and path in the UDF Filesystem to the uploaded file + """ with local_file_path.open("rb") as f: return upload_fileobj_to_bucketfs(bucket_config, bucket_file_path, f) -def upload_fileobj_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, fileobj: typing.IO): +def upload_fileobj_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, fileobj: IO) \ + -> Tuple[ParseResult, PurePosixPath]: + """ + This function uploads a file object `file object `_ + to the specified path in bucket of the BucketFS. + :param bucket_config: BucketConfig for the destination bucket + :param bucket_file_path: Path in the bucket to upload the file to + :param fileobj: File object which should be uploaded + :return: The URL and path in the UDF Filesystem to the uploaded file + """ if bucket_file_path is None: raise ValueError("bucket_file_path can't be None") url = generate_bucket_http_url(bucket_config, bucket_file_path) @@ -26,7 +44,15 @@ def upload_fileobj_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: st return url, path -def upload_string_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, string: str): +def upload_string_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, string: str) \ + -> Tuple[ParseResult, PurePosixPath]: + """ + This function uploads a string to the specified path in bucket of the BucketFS. + :param bucket_config: BucketConfig for the destination bucket + :param bucket_file_path: Path in the bucket to upload the file to + :param string: String which should be uploaded + :return: The URL and path in the UDF Filesystem to the uploaded file + """ if bucket_file_path is None: raise ValueError("bucket_file_path can't be None") url = generate_bucket_http_url(bucket_config, bucket_file_path) @@ -37,9 +63,22 @@ def upload_string_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str return url, path -def upload_object_to_bucketfs_via_joblib(object, bucket_config: BucketConfig, bucket_file_path: str, compress=True): +def upload_object_to_bucketfs_via_joblib(object: Any, + bucket_config: BucketConfig, bucket_file_path: str, + **kwargs) \ + -> Tuple[ParseResult, PurePosixPath]: + """ + This function serializes a python object with + `joblib.dump `_ + and uploads it to the specified path in bucket of the BucketFS. + :param object: Object which gets serialized and uploaed via joblib.dump + :param bucket_config: BucketConfig for the destination bucket + :param bucket_file_path: Path in the bucket to upload the file to + :param kwargs: Keyword arguments which get forwared to joblib.dump + :return: The URL and path in the UDF Filesystem to the uploaded file + """ with NamedTemporaryFile() as temp_file: - joblib.dump(object, temp_file.name, compress=compress) + joblib.dump(object, temp_file.name, **kwargs) temp_file.flush() temp_file.seek(0) - upload_fileobj_to_bucketfs(bucket_config, bucket_file_path, temp_file) + return upload_fileobj_to_bucketfs(bucket_config, bucket_file_path, temp_file) From 55572614dd2dc3234e35cf68efc0ec3aeb3423ba Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Fri, 19 Mar 2021 10:54:17 +0100 Subject: [PATCH 23/24] Add docstrings and type annotations to return types to download.py. Fix small typo in upload.py --- exasol_bucketfs_utils_python/download.py | 30 +++++++++++++++++++++++- exasol_bucketfs_utils_python/upload.py | 2 +- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/exasol_bucketfs_utils_python/download.py b/exasol_bucketfs_utils_python/download.py index daa5866a..4da6592a 100644 --- a/exasol_bucketfs_utils_python/download.py +++ b/exasol_bucketfs_utils_python/download.py @@ -11,11 +11,26 @@ def download_from_bucketfs_to_file(bucket_config: BucketConfig, bucket_file_path: str, local_file_path: Path): + """ + Download a file from the specified path in the bucket in the BucketFs into a local file + :param bucket_config: BucketConfig for the bucket to download from + :param bucket_file_path: Path in the bucket to download the file from + :param local_file_path: File path to the local file to store the downloaded data + :return: None + """ with local_file_path.open("wb") as f: download_from_bucketfs_to_fileobj(bucket_config, bucket_file_path, f) def download_from_bucketfs_to_fileobj(bucket_config: BucketConfig, bucket_file_path: str, fileobj: typing.IO): + """ + Download a file from the specified path in the bucket in the BucketFs into a given + `file object `_ + :param bucket_config: BucketConfig for the bucket to download from + :param bucket_file_path: Path in the bucket to download the file from + :param fileobj: File object where the data of the file in the BucketFS is downloaded to + :return: None + """ if bucket_file_path is None: raise ValueError("bucket_file_path can't be None") url = generate_bucket_http_url(bucket_config, bucket_file_path) @@ -27,6 +42,12 @@ def download_from_bucketfs_to_fileobj(bucket_config: BucketConfig, bucket_file_p def download_from_bucketfs_to_string(bucket_config: BucketConfig, bucket_file_path: str) -> str: + """ + Download a file from the specified path in the bucket in the BucketFs into a string + :param bucket_config: BucketConfig for the bucket to download from + :param bucket_file_path: Path in the bucket to download the file from + :return: The content of the file in the BucketFS as string + """ if bucket_file_path is None: raise ValueError("bucket_file_path can't be None") url = generate_bucket_http_url(bucket_config, bucket_file_path) @@ -36,7 +57,14 @@ def download_from_bucketfs_to_string(bucket_config: BucketConfig, bucket_file_pa return response.text -def download_object_from_bucketfs_via_joblib(bucket_config: BucketConfig, bucket_file_path: str): +def download_object_from_bucketfs_via_joblib(bucket_config: BucketConfig, bucket_file_path: str)-> typing.Any: + """ + Download a file from the specified path in the bucket in the BucketFs and deserialize it via + `joblib.load `_ + :param bucket_config: BucketConfig for the bucket to download from + :param bucket_file_path: Path in the bucket to download the file from + :return: The deserialized object which was downloaded from the BucketFS + """ with NamedTemporaryFile() as temp_file: download_from_bucketfs_to_fileobj(bucket_config, bucket_file_path, temp_file) temp_file.flush() diff --git a/exasol_bucketfs_utils_python/upload.py b/exasol_bucketfs_utils_python/upload.py index ca2fef81..76107ce9 100644 --- a/exasol_bucketfs_utils_python/upload.py +++ b/exasol_bucketfs_utils_python/upload.py @@ -27,7 +27,7 @@ def upload_file_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, def upload_fileobj_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, fileobj: IO) \ -> Tuple[ParseResult, PurePosixPath]: """ - This function uploads a file object `file object `_ + This function uploads a `file object `_ to the specified path in bucket of the BucketFS. :param bucket_config: BucketConfig for the destination bucket :param bucket_file_path: Path in the bucket to upload the file to From afac65dffd2cc40b393b8bb890207a8b431e5713 Mon Sep 17 00:00:00 2001 From: Torsten Kilias Date: Fri, 19 Mar 2021 12:15:06 +0100 Subject: [PATCH 24/24] Apply suggestions from code review Mainly typos and some improvements of the docstrings Co-authored-by: Anastasiia Sergienko <46891819+AnastasiiaSergienko@users.noreply.github.com> --- exasol_bucketfs_utils_python/bucketfs_utils.py | 10 +++++----- exasol_bucketfs_utils_python/download.py | 2 +- exasol_bucketfs_utils_python/upload.py | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py index d5d4c6d8..81e73a1b 100644 --- a/exasol_bucketfs_utils_python/bucketfs_utils.py +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -48,7 +48,7 @@ def generate_bucket_udf_path(bucket_config: BucketConfig, path_in_bucket: Union[None, str, PurePosixPath]) -> PurePosixPath: """ This function generates the path where UDFs can access the content of a bucket or - the given Path in a bucket in their file system + the given path in a bucket in their file system :param bucket_config: Config of the Bucket, the BucketFSConnectionConfig in the BucketFSConfig can be None :param path_in_bucket: If not None, path_in_bucket gets concatenated to the path of the bucket :return: Path of the bucket or the file in the Bucket in the file system of UDFs @@ -69,8 +69,8 @@ def generate_bucket_udf_path(bucket_config: BucketConfig, def generate_bucketfs_http_url(bucketfs_config: BucketFSConfig, with_credentials: bool = False) -> urllib.parse.ParseResult: """ - This function generates the HTTP[s] url for the given BucketFSConfig - with or without basic authentication (http[s]://user:password@host:port) + This function generates an HTTP[s] url for the given BucketFSConfig + with or without basic authentication (a template: http[s]://user:password@host:port) :param bucketfs_config: A BucketFSConfig with a non None BucketFSConnectionConfig :param with_credentials: If True, this function generates a url with basic authentication, default False :return: HTTP[S] URL of the BucketFS @@ -98,8 +98,8 @@ def generate_bucketfs_http_url(bucketfs_config: BucketFSConfig, def generate_bucket_http_url(bucket_config: BucketConfig, path_in_bucket: Union[None, str, PurePosixPath], with_credentials: bool = False) -> urllib.parse.ParseResult: """ - This function generates the HTTP[s] url for the given bucket ot the path in the bucket - with or without basic authentication (http[s]://user:password@host:port) + This function generates an HTTP[s] url for the given bucket or the path in the bucket + with or without basic authentication (a template: http[s]://user:password@host:port) :param bucket_config: Config of the Bucket, the BucketFSConnectionConfig in the BucketFSConfig must be not None :param path_in_bucket: If not None, path_in_bucket gets concatenated to the path of the bucket :param with_credentials: If True, this function generates a url with basic authentication, default False diff --git a/exasol_bucketfs_utils_python/download.py b/exasol_bucketfs_utils_python/download.py index 4da6592a..98de4b2f 100644 --- a/exasol_bucketfs_utils_python/download.py +++ b/exasol_bucketfs_utils_python/download.py @@ -12,7 +12,7 @@ def download_from_bucketfs_to_file(bucket_config: BucketConfig, bucket_file_path: str, local_file_path: Path): """ - Download a file from the specified path in the bucket in the BucketFs into a local file + Download a file from the specified path in the bucket in the BucketFs and save as a local file :param bucket_config: BucketConfig for the bucket to download from :param bucket_file_path: Path in the bucket to download the file from :param local_file_path: File path to the local file to store the downloaded data diff --git a/exasol_bucketfs_utils_python/upload.py b/exasol_bucketfs_utils_python/upload.py index 76107ce9..fcf7a504 100644 --- a/exasol_bucketfs_utils_python/upload.py +++ b/exasol_bucketfs_utils_python/upload.py @@ -14,7 +14,7 @@ def upload_file_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, local_file_path: Path) \ -> Tuple[ParseResult, PurePosixPath]: """ - This function uploads a file to the specified path in bucket of the BucketFS. + This function uploads a file to the specified path in a bucket of the BucketFS. :param bucket_config: BucketConfig for the destination bucket :param bucket_file_path: Path in the bucket to upload the file to :param local_file_path: File path to the local file @@ -28,7 +28,7 @@ def upload_fileobj_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: st -> Tuple[ParseResult, PurePosixPath]: """ This function uploads a `file object `_ - to the specified path in bucket of the BucketFS. + to the specified path in a bucket of the BucketFS. :param bucket_config: BucketConfig for the destination bucket :param bucket_file_path: Path in the bucket to upload the file to :param fileobj: File object which should be uploaded @@ -47,7 +47,7 @@ def upload_fileobj_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: st def upload_string_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, string: str) \ -> Tuple[ParseResult, PurePosixPath]: """ - This function uploads a string to the specified path in bucket of the BucketFS. + This function uploads a string to the specified path in a bucket of the BucketFS. :param bucket_config: BucketConfig for the destination bucket :param bucket_file_path: Path in the bucket to upload the file to :param string: String which should be uploaded @@ -70,11 +70,11 @@ def upload_object_to_bucketfs_via_joblib(object: Any, """ This function serializes a python object with `joblib.dump `_ - and uploads it to the specified path in bucket of the BucketFS. - :param object: Object which gets serialized and uploaed via joblib.dump + and uploads it to the specified path in a bucket of the BucketFS. + :param object: Object which gets serialized and uploaded via joblib.dump :param bucket_config: BucketConfig for the destination bucket :param bucket_file_path: Path in the bucket to upload the file to - :param kwargs: Keyword arguments which get forwared to joblib.dump + :param kwargs: Keyword arguments which get forwarded to joblib.dump :return: The URL and path in the UDF Filesystem to the uploaded file """ with NamedTemporaryFile() as temp_file: