diff --git a/.github/workflows/check_setup_py.yaml b/.github/workflows/check_setup_py.yaml index b512a1b5..eec2d710 100644 --- a/.github/workflows/check_setup_py.yaml +++ b/.github/workflows/check_setup_py.yaml @@ -21,7 +21,7 @@ jobs: - name: Show changes on working copy run: git status --porcelain=v1 -uno - name: Show diff on working copy - run: git diff + run: git diff --cached; cat setup.py - name: Check if setup.py changed run: | [ -z "$(git status --porcelain=v1 -uno 2>/dev/null)" ] diff --git a/exasol_bucketfs_utils_python/bucket_config.py b/exasol_bucketfs_utils_python/bucket_config.py new file mode 100644 index 00000000..df0e62bd --- /dev/null +++ b/exasol_bucketfs_utils_python/bucket_config.py @@ -0,0 +1,25 @@ +from typeguard import typechecked + +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig + + +class BucketConfig: + """ + The BucketConfig contains all required information about a BucketFS + to access it either via HTTP[S] or in the file system inside of UDFs. + """ + + @typechecked(always=True) + def __init__(self, bucket_name: str, bucketfs_config: BucketFSConfig): + if bucket_name == "": + raise ValueError("Bucket name can't be an empty string") + self._bucket_name = bucket_name + self._bucketfs_config = bucketfs_config + + @property + def bucket_name(self) -> str: + return self._bucket_name + + @property + def bucketfs_config(self) -> BucketFSConfig: + return self._bucketfs_config \ No newline at end of file diff --git a/exasol_bucketfs_utils_python/bucketfs_config.py b/exasol_bucketfs_utils_python/bucketfs_config.py new file mode 100644 index 00000000..95876cea --- /dev/null +++ b/exasol_bucketfs_utils_python/bucketfs_config.py @@ -0,0 +1,31 @@ +from typing import Union + +from typeguard import typechecked + +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig + + +class BucketFSConfig: + """ + The BucketFSConfig contains all required information + to access it either via HTTP[S] or in the file system inside of UDFs. + The BucketFSConnectionConfig is here by optional, + because in UDF we sometimes don't want to use HTTP[S]. + """ + + @typechecked(always=True) + def __init__(self, bucketfs_name: str, connection_config: Union[BucketFSConnectionConfig, None] = None): + self._connection_config = connection_config + if bucketfs_name == "": + raise ValueError("BucketFS name can't be an empty string") + self._bucketfs_name = bucketfs_name + + @property + def bucketfs_name(self) -> str: + return self._bucketfs_name + + @property + def connection_config(self) -> Union[BucketFSConnectionConfig, None]: + return self._connection_config + + diff --git a/exasol_bucketfs_utils_python/bucketfs_connection_config.py b/exasol_bucketfs_utils_python/bucketfs_connection_config.py new file mode 100644 index 00000000..1b679f3f --- /dev/null +++ b/exasol_bucketfs_utils_python/bucketfs_connection_config.py @@ -0,0 +1,42 @@ +from typeguard import typechecked + + +class BucketFSConnectionConfig: + """ + The BucketFSConnectionConfig contains all necessary information + to connect to the BucketFS Server via HTTP[s] + """ + + @typechecked(always=True) + def __init__(self, host: str, port: int, user: str, pwd: str, is_https=False): + self._is_https = is_https + if host == "": + raise ValueError("Host can't be an empty string") + self._host = host + self._port = port + if user not in ["w", "r"]: # The BucketFs currently supports only these two users + raise ValueError(f"User can only be, 'w' (read-write access) or 'r' (read-only access), but got {user}") + self._user = user + if pwd == "": + raise ValueError("Password can't be an empty string") + self._pwd = pwd + + @property + def is_https(self) -> bool: + return self._is_https + + @property + def host(self) -> str: + return self._host + + @property + def port(self) -> int: + return self._port + + @property + def user(self) -> str: + return self._user + + @property + def pwd(self) -> str: + return self._pwd \ No newline at end of file diff --git a/exasol_bucketfs_utils_python/bucketfs_utils.py b/exasol_bucketfs_utils_python/bucketfs_utils.py new file mode 100644 index 00000000..81e73a1b --- /dev/null +++ b/exasol_bucketfs_utils_python/bucketfs_utils.py @@ -0,0 +1,130 @@ +import urllib.parse +from pathlib import PurePosixPath +from typing import Union + +from requests.auth import HTTPBasicAuth +from typeguard import typechecked + +from exasol_bucketfs_utils_python.bucket_config import BucketConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig + +ARCHIVE_EXTENSIONS = [".tar.gz", ".tgz", ".zip", ".tar"] + + +def _encode_url_part(part: str) -> str: + urlencoded = urllib.parse.quote(part) + return urlencoded + + +def _correct_path_in_bucket_for_archives(path_in_bucket: PurePosixPath) -> PurePosixPath: + for extension in ARCHIVE_EXTENSIONS: + if path_in_bucket.name.endswith(extension): + path_in_bucket = PurePosixPath(path_in_bucket.parent, + path_in_bucket.name[:-len(extension)]) + break + return path_in_bucket + + +def _make_path_relative(path_in_bucket: Union[None, str, PurePosixPath]) -> PurePosixPath: + path_in_bucket = PurePosixPath(path_in_bucket) + if path_in_bucket.is_absolute(): + path_in_bucket = path_in_bucket.relative_to(PurePosixPath("/")) + return path_in_bucket + + +@typechecked(always=True) +def generate_bucketfs_udf_path(bucketfs_config: BucketFSConfig) -> PurePosixPath: + """ + This function generates the path where UDFs can access the content of a BucketFS in their file system + :param bucketfs_config: Config of the BucketFS, the BucketFSConnectionConfig in the BucketFSConfig can be None + :return: Path of the given BucketFS in the file system of the UDFs + """ + path = PurePosixPath("/buckets/", bucketfs_config.bucketfs_name) + return path + + +@typechecked(always=True) +def generate_bucket_udf_path(bucket_config: BucketConfig, + path_in_bucket: Union[None, str, PurePosixPath]) -> PurePosixPath: + """ + This function generates the path where UDFs can access the content of a bucket or + the given path in a bucket in their file system + :param bucket_config: Config of the Bucket, the BucketFSConnectionConfig in the BucketFSConfig can be None + :param path_in_bucket: If not None, path_in_bucket gets concatenated to the path of the bucket + :return: Path of the bucket or the file in the Bucket in the file system of UDFs + """ + bucketfs_path = generate_bucketfs_udf_path(bucket_config.bucketfs_config) + path = PurePosixPath(bucketfs_path, bucket_config.bucket_name) + + if path_in_bucket is not None: + path_in_bucket = _make_path_relative(path_in_bucket) + path_in_bucket = _correct_path_in_bucket_for_archives(path_in_bucket) + else: + path_in_bucket = "" + path = PurePosixPath(path, path_in_bucket) + return path + + +@typechecked(always=True) +def generate_bucketfs_http_url(bucketfs_config: BucketFSConfig, + with_credentials: bool = False) -> urllib.parse.ParseResult: + """ + This function generates an HTTP[s] url for the given BucketFSConfig + with or without basic authentication (a template: http[s]://user:password@host:port) + :param bucketfs_config: A BucketFSConfig with a non None BucketFSConnectionConfig + :param with_credentials: If True, this function generates a url with basic authentication, default False + :return: HTTP[S] URL of the BucketFS + """ + if bucketfs_config.connection_config is None: + raise ValueError("bucket_config.bucketfs_config.connection_config can't be None for this operation") + if with_credentials: + encoded_password = _encode_url_part(bucketfs_config.connection_config.pwd) + encoded_user = _encode_url_part(bucketfs_config.connection_config.user) + credentials = f"{encoded_user}:{encoded_password}@" + else: + credentials = "" + if bucketfs_config.connection_config.is_https: + protocol = "https" + else: + protocol = "http" + encoded_host = _encode_url_part(bucketfs_config.connection_config.host) + url = f"{protocol}://{credentials}" \ + f"{encoded_host}:{bucketfs_config.connection_config.port}" + urlparse = urllib.parse.urlparse(url) + return urlparse + + +@typechecked(always=True) +def generate_bucket_http_url(bucket_config: BucketConfig, path_in_bucket: Union[None, str, PurePosixPath], + with_credentials: bool = False) -> urllib.parse.ParseResult: + """ + This function generates an HTTP[s] url for the given bucket or the path in the bucket + with or without basic authentication (a template: http[s]://user:password@host:port) + :param bucket_config: Config of the Bucket, the BucketFSConnectionConfig in the BucketFSConfig must be not None + :param path_in_bucket: If not None, path_in_bucket gets concatenated to the path of the bucket + :param with_credentials: If True, this function generates a url with basic authentication, default False + :return: HTTP[S] URL of the bucket or the path in the bucket + """ + url = generate_bucketfs_http_url(bucket_config.bucketfs_config, with_credentials) + if path_in_bucket is not None: + path_in_bucket = _make_path_relative(path_in_bucket) + else: + path_in_bucket = "" + encoded_bucket_and_path_in_bucket = \ + "/".join( + _encode_url_part(part) + for part in + PurePosixPath(bucket_config.bucket_name, path_in_bucket).parts) + url = urllib.parse.urljoin(url.geturl(), encoded_bucket_and_path_in_bucket) + urlparse = urllib.parse.urlparse(url) + return urlparse + + +@typechecked(always=True) +def create_auth_object(bucket_config: BucketConfig) -> HTTPBasicAuth: + if bucket_config.bucketfs_config.connection_config is None: + raise TypeError("bucket_config.bucketfs_config.connection_config can't be None for this operation") + auth = HTTPBasicAuth( + bucket_config.bucketfs_config.connection_config.user, + bucket_config.bucketfs_config.connection_config.pwd) + return auth diff --git a/exasol_bucketfs_utils_python/download.py b/exasol_bucketfs_utils_python/download.py new file mode 100644 index 00000000..98de4b2f --- /dev/null +++ b/exasol_bucketfs_utils_python/download.py @@ -0,0 +1,73 @@ +import typing +from pathlib import Path +from tempfile import NamedTemporaryFile + +import joblib +import requests + +from exasol_bucketfs_utils_python import bucketfs_utils +from exasol_bucketfs_utils_python.bucket_config import BucketConfig +from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_http_url + + +def download_from_bucketfs_to_file(bucket_config: BucketConfig, bucket_file_path: str, local_file_path: Path): + """ + Download a file from the specified path in the bucket in the BucketFs and save as a local file + :param bucket_config: BucketConfig for the bucket to download from + :param bucket_file_path: Path in the bucket to download the file from + :param local_file_path: File path to the local file to store the downloaded data + :return: None + """ + with local_file_path.open("wb") as f: + download_from_bucketfs_to_fileobj(bucket_config, bucket_file_path, f) + + +def download_from_bucketfs_to_fileobj(bucket_config: BucketConfig, bucket_file_path: str, fileobj: typing.IO): + """ + Download a file from the specified path in the bucket in the BucketFs into a given + `file object `_ + :param bucket_config: BucketConfig for the bucket to download from + :param bucket_file_path: Path in the bucket to download the file from + :param fileobj: File object where the data of the file in the BucketFS is downloaded to + :return: None + """ + if bucket_file_path is None: + raise ValueError("bucket_file_path can't be None") + url = generate_bucket_http_url(bucket_config, bucket_file_path) + auth = bucketfs_utils.create_auth_object(bucket_config) + with requests.get(url.geturl(), stream=True, auth=auth) as response: + response.raise_for_status() + for chunk in response.iter_content(chunk_size=8192): + fileobj.write(chunk) + + +def download_from_bucketfs_to_string(bucket_config: BucketConfig, bucket_file_path: str) -> str: + """ + Download a file from the specified path in the bucket in the BucketFs into a string + :param bucket_config: BucketConfig for the bucket to download from + :param bucket_file_path: Path in the bucket to download the file from + :return: The content of the file in the BucketFS as string + """ + if bucket_file_path is None: + raise ValueError("bucket_file_path can't be None") + url = generate_bucket_http_url(bucket_config, bucket_file_path) + auth = bucketfs_utils.create_auth_object(bucket_config) + response = requests.get(url.geturl(), auth=auth) + response.raise_for_status() + return response.text + + +def download_object_from_bucketfs_via_joblib(bucket_config: BucketConfig, bucket_file_path: str)-> typing.Any: + """ + Download a file from the specified path in the bucket in the BucketFs and deserialize it via + `joblib.load `_ + :param bucket_config: BucketConfig for the bucket to download from + :param bucket_file_path: Path in the bucket to download the file from + :return: The deserialized object which was downloaded from the BucketFS + """ + with NamedTemporaryFile() as temp_file: + download_from_bucketfs_to_fileobj(bucket_config, bucket_file_path, temp_file) + temp_file.flush() + temp_file.seek(0) + obj = joblib.load(temp_file) + return obj diff --git a/exasol_bucketfs_utils_python/upload.py b/exasol_bucketfs_utils_python/upload.py new file mode 100644 index 00000000..fcf7a504 --- /dev/null +++ b/exasol_bucketfs_utils_python/upload.py @@ -0,0 +1,84 @@ +from pathlib import Path, PurePosixPath +from tempfile import NamedTemporaryFile +from typing import Tuple, IO, Any +from urllib.parse import ParseResult + +import joblib +import requests + +from exasol_bucketfs_utils_python import bucketfs_utils +from exasol_bucketfs_utils_python.bucket_config import BucketConfig +from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_http_url, generate_bucket_udf_path + + +def upload_file_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, local_file_path: Path) \ + -> Tuple[ParseResult, PurePosixPath]: + """ + This function uploads a file to the specified path in a bucket of the BucketFS. + :param bucket_config: BucketConfig for the destination bucket + :param bucket_file_path: Path in the bucket to upload the file to + :param local_file_path: File path to the local file + :return: The URL and path in the UDF Filesystem to the uploaded file + """ + with local_file_path.open("rb") as f: + return upload_fileobj_to_bucketfs(bucket_config, bucket_file_path, f) + + +def upload_fileobj_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, fileobj: IO) \ + -> Tuple[ParseResult, PurePosixPath]: + """ + This function uploads a `file object `_ + to the specified path in a bucket of the BucketFS. + :param bucket_config: BucketConfig for the destination bucket + :param bucket_file_path: Path in the bucket to upload the file to + :param fileobj: File object which should be uploaded + :return: The URL and path in the UDF Filesystem to the uploaded file + """ + if bucket_file_path is None: + raise ValueError("bucket_file_path can't be None") + url = generate_bucket_http_url(bucket_config, bucket_file_path) + auth = bucketfs_utils.create_auth_object(bucket_config) + response = requests.put(url.geturl(), data=fileobj, auth=auth) + response.raise_for_status() + path = generate_bucket_udf_path(bucket_config, bucket_file_path) + return url, path + + +def upload_string_to_bucketfs(bucket_config: BucketConfig, bucket_file_path: str, string: str) \ + -> Tuple[ParseResult, PurePosixPath]: + """ + This function uploads a string to the specified path in a bucket of the BucketFS. + :param bucket_config: BucketConfig for the destination bucket + :param bucket_file_path: Path in the bucket to upload the file to + :param string: String which should be uploaded + :return: The URL and path in the UDF Filesystem to the uploaded file + """ + if bucket_file_path is None: + raise ValueError("bucket_file_path can't be None") + url = generate_bucket_http_url(bucket_config, bucket_file_path) + auth = bucketfs_utils.create_auth_object(bucket_config) + response = requests.put(url.geturl(), data=string.encode("UTF-8"), auth=auth) + response.raise_for_status() + path = generate_bucket_udf_path(bucket_config, bucket_file_path) + return url, path + + +def upload_object_to_bucketfs_via_joblib(object: Any, + bucket_config: BucketConfig, bucket_file_path: str, + **kwargs) \ + -> Tuple[ParseResult, PurePosixPath]: + """ + This function serializes a python object with + `joblib.dump `_ + and uploads it to the specified path in a bucket of the BucketFS. + :param object: Object which gets serialized and uploaded via joblib.dump + :param bucket_config: BucketConfig for the destination bucket + :param bucket_file_path: Path in the bucket to upload the file to + :param kwargs: Keyword arguments which get forwarded to joblib.dump + :return: The URL and path in the UDF Filesystem to the uploaded file + """ + with NamedTemporaryFile() as temp_file: + joblib.dump(object, temp_file.name, **kwargs) + temp_file.flush() + temp_file.seek(0) + return upload_fileobj_to_bucketfs(bucket_config, bucket_file_path, temp_file) diff --git a/pyproject.toml b/pyproject.toml index 7a40e031..382e9766 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,8 @@ keywords = ['exasol', 'bucketfs'] [tool.poetry.dependencies] python = ">=3.6.1" requests = "^2.24.0" +joblib="^1.0.1" +typeguard = "^2.11.1" [tool.poetry.dev-dependencies] pytest = "^6.1.1" diff --git a/setup.py b/setup.py index 6a9bc01d..cbd1a2c9 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ {'': ['*']} install_requires = \ -['requests>=2.24.0,<3.0.0'] +['joblib>=1.0.1,<2.0.0', 'requests>=2.24.0,<3.0.0', 'typeguard>=2.11.1,<3.0.0'] setup_kwargs = { 'name': 'exasol-bucketfs-utils-python', diff --git a/tests/test_bucket_config.py b/tests/test_bucket_config.py new file mode 100644 index 00000000..28ab476e --- /dev/null +++ b/tests/test_bucket_config.py @@ -0,0 +1,59 @@ +import pytest + +from exasol_bucketfs_utils_python.bucket_config import BucketConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig + + +def create_test_bucketfs_config(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + bucketfs_config = BucketFSConfig(bucketfs_name="bfsdefault", connection_config=connection_config) + return bucketfs_config + + +def test_bucket_config_with_bucketfs_config(): + bucketfs_config = create_test_bucketfs_config() + + bucket_name = "default" + bucket_config = BucketConfig(bucket_name=bucket_name, bucketfs_config=bucketfs_config) + + assert bucket_config.bucket_name == bucket_name and \ + bucket_config.bucketfs_config == bucketfs_config + + +def test_bucket_config_with_empty_bucket_name(): + bucketfs_config = create_test_bucketfs_config() + + with pytest.raises(ValueError): + bucket_config = BucketConfig(bucket_name="", bucketfs_config=bucketfs_config) + + +def test_bucket_config_set_bucket_name(): + bucketfs_config = create_test_bucketfs_config() + + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + + with pytest.raises(AttributeError): + bucket_config.bucket_name = "test" + + +def test_bucket_config_set_bucketfs_config(): + bucketfs_config = create_test_bucketfs_config() + + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + + with pytest.raises(AttributeError): + bucket_config.bucketfs_config = bucketfs_config + + +def test_bucket_config_with_empty_bucketfs_config(): + with pytest.raises(TypeError): + bucket_config = BucketConfig(bucket_name="", bucketfs_config=None) + + +def test_bucket_config_with_None_as_bucket_name(): + bucketfs_config = create_test_bucketfs_config() + + with pytest.raises(TypeError): + bucket_config = BucketConfig(bucket_name=None, bucketfs_config=bucketfs_config) diff --git a/tests/test_bucketfs_config.py b/tests/test_bucketfs_config.py new file mode 100644 index 00000000..d5c51481 --- /dev/null +++ b/tests/test_bucketfs_config.py @@ -0,0 +1,59 @@ +import pytest + +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig + + +def create_test_connection_config(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + return connection_config + + +def test_bucketfs_config_with_empty_bucketfs_name(): + connection_config = create_test_connection_config() + with pytest.raises(ValueError): + bucketfs_config = BucketFSConfig(bucketfs_name="", connection_config=connection_config) + + +def test_bucketfs_config_with_bucketfs_connection_config(): + connection_config = create_test_connection_config() + + bucketfs_name = "bfsdefault" + bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name, connection_config=connection_config) + + assert bucketfs_config.bucketfs_name == bucketfs_name and \ + bucketfs_config.connection_config == connection_config + + +def test_bucketfs_config_without_bucketfs_connection_config(): + bucketfs_name = "bfsdefault" + bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name) + assert bucketfs_config.bucketfs_name == bucketfs_name and \ + bucketfs_config.connection_config == None + + +def test_bucketfs_config_with_none_as_bucketfs_name(): + bucketfs_name = None + with pytest.raises(TypeError): + bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name) + + +def test_bucketfs_config_set_bucketfs_name(): + connection_config = create_test_connection_config() + + bucketfs_name = "bfsdefault" + bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name, connection_config=connection_config) + + with pytest.raises(AttributeError): + bucketfs_config.bucketfs_name = "test" + + +def test_bucketfs_config_set_bucketfs_connection_config(): + connection_config = create_test_connection_config() + + bucketfs_name = "bfsdefault" + bucketfs_config = BucketFSConfig(bucketfs_name=bucketfs_name, connection_config=connection_config) + + with pytest.raises(AttributeError): + bucketfs_config.connection_config = None diff --git a/tests/test_bucketfs_connection_config.py b/tests/test_bucketfs_connection_config.py new file mode 100644 index 00000000..b1eca01b --- /dev/null +++ b/tests/test_bucketfs_connection_config.py @@ -0,0 +1,156 @@ +import pytest + +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig + + +def test_bucketfs_connection_config_with_read_user(): + host = "localhost" + port = 6666 + user = "r" + pwd = "read" + is_https = False + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + assert connection_config.host == host and \ + connection_config.port == port and \ + connection_config.user == user and \ + connection_config.pwd == pwd and \ + connection_config.is_https == is_https + + +def test_bucketfs_connection_config_with_write_user(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = False + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + assert connection_config.host == host and \ + connection_config.port == port and \ + connection_config.user == user and \ + connection_config.pwd == pwd and \ + connection_config.is_https == is_https + + +def test_bucketfs_connection_config_with_https(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + assert connection_config.host == host and \ + connection_config.port == port and \ + connection_config.user == user and \ + connection_config.pwd == pwd and \ + connection_config.is_https == is_https + + +def test_bucketfs_connection_config_set_https(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.is_https = False + + +def test_bucketfs_connection_config_set_host(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.host = "testhost" + + +def test_bucketfs_connection_config_set_port(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.port = 7777 + + +def test_bucketfs_connection_config_set_user(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.user = "r" + + +def test_bucketfs_connection_config_set_pwd(): + host = "localhost" + port = 6666 + user = "w" + pwd = "write" + is_https = True + connection_config = BucketFSConnectionConfig(host=host, port=port, user=user, pwd=pwd, + is_https=is_https) + with pytest.raises(AttributeError): + connection_config.pwd = "abc" + + +def test_bucketfs_connection_config_with_not_allowed_user(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="abc", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_empty_host(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="", port=6666, user="w", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_empty_user(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_empty_password(): + with pytest.raises(ValueError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="", + is_https=False) + + +def test_bucketfs_connection_config_with_none_as_host(): + with pytest.raises(TypeError): + connection_config = BucketFSConnectionConfig(host=None, port=6666, user="w", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_none_as_port(): + with pytest.raises(TypeError): + connection_config = BucketFSConnectionConfig(host="localhost", port=None, user="w", pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_none_as_user(): + with pytest.raises(TypeError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user=None, pwd="write", + is_https=False) + + +def test_bucketfs_connection_config_with_none_as_password(): + with pytest.raises(TypeError): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd=None, + is_https=False) diff --git a/tests/test_bucketfs_utils.py b/tests/test_bucketfs_utils.py new file mode 100644 index 00000000..0fba077d --- /dev/null +++ b/tests/test_bucketfs_utils.py @@ -0,0 +1,173 @@ +import pytest + +from exasol_bucketfs_utils_python import bucketfs_utils +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig +from exasol_bucketfs_utils_python.bucket_config import BucketConfig + + +def test_generate_bucket_udf_path_non_archive_file(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_udf_path( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt" + ) + assert str(udf_path) == "/buckets/bfsdefault/default/path/in/bucket/test_file.txt" + + +def test_generate_bucket_udf_path_trailing_slash(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_udf_path( + bucket_config=bucket_config, + path_in_bucket="/path/in/bucket/test_file.txt" + ) + assert str(udf_path) == "/buckets/bfsdefault/default/path/in/bucket/test_file.txt" + + +@pytest.mark.parametrize("extension", ["tar.gz", "zip", "tgz", "tar"]) +def test_generate_bucket_udf_path_archive(extension): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_udf_path( + bucket_config=bucket_config, + path_in_bucket=f"path/in/bucket/test_file.{extension}" + ) + assert str(udf_path) == "/buckets/bfsdefault/default/path/in/bucket/test_file" + + +def test_generate_bucket_url_file_write_access(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt" + ) + assert udf_path.geturl() == "http://localhost:6666/default/path/in/bucket/test_file.txt" + + +def test_generate_bucket_url_file_trailing_slash(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="/path/in/bucket/test_file.txt" + ) + assert udf_path.geturl() == "http://localhost:6666/default/path/in/bucket/test_file.txt" + + +def test_generate_bucket_url_file_with_credentials(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "http://w:write@localhost:6666/default/path/in/bucket/test_file.txt" + + +def test_generate_bucket_url_file_with_ip(): + connection_config = BucketFSConnectionConfig(host="127.0.0.1", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "http://w:write@127.0.0.1:6666/default/path/in/bucket/test_file.txt" + + +def test_generate_bucket_url_file_with_whitespace_in_host(): + connection_config = BucketFSConnectionConfig(host="local host", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "http://w:write@local%20host:6666/default/path/in/bucket/test_file.txt" + + +def test_generate_bucket_url_file_with_whitespace_in_password(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write write", + is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "http://w:write%20write@localhost:6666/default/path/in/bucket/test_file.txt" + + +def test_generate_bucket_url_file_with_whitespace_in_bucket_name(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "http://w:write@localhost:6666/default%20default/path/in/bucket/test_file.txt" + + +def test_generate_bucket_url_file_with_whitespace_in_path_in_bucket(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", + is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "http://w:write@localhost:6666/default/path/in/bucket/test%20file.txt" + + +def test_generate_bucket_url_file_read_only_access(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="r", pwd="read", is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "http://r:read@localhost:6666/default/path/in/bucket/test_file.txt" + +def test_generate_bucket_url_file_https(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="r", pwd="read", is_https=True) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + udf_path = bucketfs_utils.generate_bucket_http_url( + bucket_config=bucket_config, + path_in_bucket="path/in/bucket/test_file.txt", + with_credentials=True + ) + assert udf_path.geturl() == "https://r:read@localhost:6666/default/path/in/bucket/test_file.txt" diff --git a/tests/test_upload_download.py b/tests/test_upload_download.py new file mode 100644 index 00000000..de3b5111 --- /dev/null +++ b/tests/test_upload_download.py @@ -0,0 +1,79 @@ +from pathlib import Path +from tempfile import NamedTemporaryFile + +from exasol_bucketfs_utils_python import upload, download +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig +from exasol_bucketfs_utils_python.bucket_config import BucketConfig + + +def test_file_upload_download(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + with NamedTemporaryFile() as input_temp_file: + test_byte_string = b"test_byte_string" + input_temp_file.write(test_byte_string) + input_temp_file.flush() + + path_in_bucket = "path/in/bucket/file.txt" + upload.upload_file_to_bucketfs( + bucket_config=bucket_config, + bucket_file_path=path_in_bucket, + local_file_path=Path(input_temp_file.name)) + + with NamedTemporaryFile() as output_temp_file: + download.download_from_bucketfs_to_file( + bucket_config=bucket_config, + bucket_file_path=path_in_bucket, + local_file_path=Path(output_temp_file.name)) + output_test_byte_string = output_temp_file.read() + assert test_byte_string == output_test_byte_string + + +def test_fileobj_upload_download(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + with NamedTemporaryFile() as input_temp_file: + test_byte_string = b"test_byte_string" + input_temp_file.write(test_byte_string) + input_temp_file.flush() + input_temp_file.seek(0) + path_in_bucket = "path/in/bucket/file.txt" + upload.upload_fileobj_to_bucketfs( + bucket_config=bucket_config, + bucket_file_path=path_in_bucket, + fileobj=input_temp_file) + + with NamedTemporaryFile() as output_temp_file: + download.download_from_bucketfs_to_fileobj( + bucket_config=bucket_config, + bucket_file_path=path_in_bucket, + fileobj=output_temp_file) + output_temp_file.flush() + output_temp_file.seek(0) + output_test_byte_string = output_temp_file.read() + assert test_byte_string == output_test_byte_string + + +def test_string_upload_download(): + connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFSConfig(connection_config=connection_config, + bucketfs_name="bfsdefault") + bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + test_string = "test_string" + path_in_bucket = "path/in/bucket/file.txt" + upload.upload_string_to_bucketfs( + bucket_config=bucket_config, + bucket_file_path=path_in_bucket, + string=test_string) + + output_test_string = \ + download.download_from_bucketfs_to_string( + bucket_config=bucket_config, + bucket_file_path=path_in_bucket) + + assert test_string == output_test_string