From 63f95b1275fe7d6dcd38683cb1248741e92b614c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cumitbuyuksahin=E2=80=9D?= Date: Thu, 31 Mar 2022 14:35:28 +0200 Subject: [PATCH 1/5] Added listing method --- .../abstract_bucketfs_location.py | 5 +++ .../bucketfs_location.py | 10 ++++- exasol_bucketfs_utils_python/list_files.py | 41 +++++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 exasol_bucketfs_utils_python/list_files.py diff --git a/exasol_bucketfs_utils_python/abstract_bucketfs_location.py b/exasol_bucketfs_utils_python/abstract_bucketfs_location.py index 9b31f6f2..aefbd808 100644 --- a/exasol_bucketfs_utils_python/abstract_bucketfs_location.py +++ b/exasol_bucketfs_utils_python/abstract_bucketfs_location.py @@ -65,3 +65,8 @@ def read_file_from_bucketfs_to_fileobj(self, def read_file_from_bucketfs_via_joblib(self, bucket_file_path: str) -> Any: pass + + @abstractmethod + def list_files_in_bucketfs(self, + bucket_file_path: str) -> str: + pass \ No newline at end of file diff --git a/exasol_bucketfs_utils_python/bucketfs_location.py b/exasol_bucketfs_utils_python/bucketfs_location.py index 305bba17..e316d5e8 100644 --- a/exasol_bucketfs_utils_python/bucketfs_location.py +++ b/exasol_bucketfs_utils_python/bucketfs_location.py @@ -1,7 +1,7 @@ from typing import Any, Tuple, IO from pathlib import PurePosixPath, Path from urllib.parse import ParseResult -from exasol_bucketfs_utils_python import download, upload +from exasol_bucketfs_utils_python import download, upload, list_files from exasol_bucketfs_utils_python import load_file_from_local_fs as from_BFS from exasol_bucketfs_utils_python.bucket_config import BucketConfig @@ -111,3 +111,11 @@ def read_file_from_bucketfs_via_joblib(self, self.bucket_config ) return result + + def list_files_in_bucketfs(self, + bucket_file_path: str) -> str: + result = list_files.list_files_in_bucketfs( + self.bucket_config, + self.get_complete_file_path_in_bucket(bucket_file_path) + ) + return result diff --git a/exasol_bucketfs_utils_python/list_files.py b/exasol_bucketfs_utils_python/list_files.py new file mode 100644 index 00000000..16e9714f --- /dev/null +++ b/exasol_bucketfs_utils_python/list_files.py @@ -0,0 +1,41 @@ +import requests +from pathlib import Path +from exasol_bucketfs_utils_python.bucket_config import BucketConfig +from exasol_bucketfs_utils_python import bucketfs_utils +from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_http_url + + +def list_files_in_bucketfs(bucket_config: BucketConfig, + bucket_file_path: str = "") -> str: + """ + List files at the specified path in the bucket in BucketFs, line by line. + + :param bucket_config: BucketConfig for the bucket to download from + :param bucket_file_path: Path in the bucket to download the file from + :return: The list of the files in the BucketFS as string. + """ + if bucket_file_path is None: + raise ValueError("bucket_file_path can't be None") + url = generate_bucket_http_url(bucket_config, "") + auth = bucketfs_utils.create_auth_object(bucket_config) + response = requests.get(url.geturl(), auth=auth) + response.raise_for_status() + + bucket_file_path_parts = Path(bucket_file_path).parts + files = [] + for path in response.text.split(): + path_parts = Path(path).parts + if path_parts[:len(bucket_file_path_parts)] == bucket_file_path_parts: + files.append(str(Path(*path_parts[len(bucket_file_path_parts):]))) + + return "\n".join(files) + + +from exasol_bucketfs_utils_python.bucket_config import BucketConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig +connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) +bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") +bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) + +print(not list_files_in_bucketfs(bucket_config, "path/in/bucket/file.txt")) \ No newline at end of file From 5d9fb126f7cb3305d68b28e9e94f6df25a13ad5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cumitbuyuksahin=E2=80=9D?= Date: Thu, 31 Mar 2022 14:36:24 +0200 Subject: [PATCH 2/5] Added test for listing method --- tests/test_upload_list.py | 41 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tests/test_upload_list.py diff --git a/tests/test_upload_list.py b/tests/test_upload_list.py new file mode 100644 index 00000000..3a73abe4 --- /dev/null +++ b/tests/test_upload_list.py @@ -0,0 +1,41 @@ +from exasol_bucketfs_utils_python import upload, list_files +from exasol_bucketfs_utils_python.bucket_config import BucketConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig +from tests.test_load_fs_file_from_udf import delete_testfile_from_bucketfs + + +def test_list_files(): + connection_config = BucketFSConnectionConfig( + host="localhost", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFSConfig( + connection_config=connection_config, bucketfs_name="bfsdefault") + bucket_config = BucketConfig( + bucket_name="default", bucketfs_config=bucketfs_config) + test_string = "test_string" + + path_list = ["path/in/bucket/file.txt", "path/file2.txt"] + try: + for path_in_bucket in path_list: + upload.upload_string_to_bucketfs( + bucket_config=bucket_config, + bucket_file_path=path_in_bucket, + string=test_string) + + bucket_file_path_map = { + "path": "\n".join(("in/bucket/file.txt", "file2.txt")), + "path/": "\n".join(("in/bucket/file.txt", "file2.txt")), + "path/in": "bucket/file.txt", + "path/in/": "bucket/file.txt", + "path/in/bucket": "file.txt", + "path/in/bucket/": "file.txt", + "path/in/bucket/file.txt": "." + } + for bucket_path, expected in bucket_file_path_map.items(): + assert list_files.list_files_in_bucketfs( + bucket_config, bucket_path) == expected + finally: + for path_in_bucket in path_list: + delete_testfile_from_bucketfs( + file_path=path_in_bucket, + bucket_config=bucket_config) From 836e1f5e37275dc94754cd5d939627f8ecd6cdbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cumitbuyuksahin=E2=80=9D?= Date: Thu, 31 Mar 2022 15:46:31 +0200 Subject: [PATCH 3/5] Returned list instead of string --- .../abstract_bucketfs_location.py | 4 +- .../bucketfs_location.py | 92 +++++++++---------- exasol_bucketfs_utils_python/list_files.py | 18 +--- .../localfs_mock_bucketfs_location.py | 6 ++ tests/test_upload_list.py | 18 ++-- 5 files changed, 68 insertions(+), 70 deletions(-) diff --git a/exasol_bucketfs_utils_python/abstract_bucketfs_location.py b/exasol_bucketfs_utils_python/abstract_bucketfs_location.py index aefbd808..4c0c9497 100644 --- a/exasol_bucketfs_utils_python/abstract_bucketfs_location.py +++ b/exasol_bucketfs_utils_python/abstract_bucketfs_location.py @@ -68,5 +68,5 @@ def read_file_from_bucketfs_via_joblib(self, @abstractmethod def list_files_in_bucketfs(self, - bucket_file_path: str) -> str: - pass \ No newline at end of file + bucket_file_path: str) -> list: + pass diff --git a/exasol_bucketfs_utils_python/bucketfs_location.py b/exasol_bucketfs_utils_python/bucketfs_location.py index e316d5e8..cca5bc35 100644 --- a/exasol_bucketfs_utils_python/bucketfs_location.py +++ b/exasol_bucketfs_utils_python/bucketfs_location.py @@ -24,98 +24,98 @@ def __init__(self, bucket_config: BucketConfig, base_path: PurePosixPath): self.base_path = base_path self.bucket_config = bucket_config - def get_complete_file_path_in_bucket(self, - bucket_file_path: str) -> str: + def get_complete_file_path_in_bucket( + self, + bucket_file_path: str) -> str: return str(PurePosixPath(self.base_path, bucket_file_path)) - def download_from_bucketfs_to_string(self, - bucket_file_path: str) -> str: - result = download.download_from_bucketfs_to_string( + def download_from_bucketfs_to_string( + self, + bucket_file_path: str) -> str: + return download.download_from_bucketfs_to_string( self.bucket_config, self.get_complete_file_path_in_bucket(bucket_file_path) ) - return result - def download_object_from_bucketfs_via_joblib(self, - bucket_file_path: str) -> Any: - result = download.download_object_from_bucketfs_via_joblib( + def download_object_from_bucketfs_via_joblib( + self, + bucket_file_path: str) -> Any: + return download.download_object_from_bucketfs_via_joblib( self.bucket_config, self.get_complete_file_path_in_bucket(bucket_file_path) ) - return result - def upload_string_to_bucketfs(self, - bucket_file_path: str, - string: str) -> \ - Tuple[ParseResult, PurePosixPath]: - result = upload.upload_string_to_bucketfs( + def upload_string_to_bucketfs( + self, + bucket_file_path: str, + string: str) -> Tuple[ParseResult, PurePosixPath]: + return upload.upload_string_to_bucketfs( self.bucket_config, self.get_complete_file_path_in_bucket(bucket_file_path), string ) - return result - def upload_object_to_bucketfs_via_joblib(self, object: Any, - bucket_file_path: str, - **kwargs) -> \ - Tuple[ParseResult, PurePosixPath]: - result = upload.upload_object_to_bucketfs_via_joblib( + def upload_object_to_bucketfs_via_joblib( + self, object: Any, + bucket_file_path: str, + **kwargs) -> Tuple[ParseResult, PurePosixPath]: + return upload.upload_object_to_bucketfs_via_joblib( object, self.bucket_config, self.get_complete_file_path_in_bucket(bucket_file_path), **kwargs ) - return result - def upload_fileobj_to_bucketfs(self, - fileobj: IO, - bucket_file_path: str) -> \ - Tuple[ParseResult, PurePosixPath]: - result = upload.upload_fileobj_to_bucketfs( + def upload_fileobj_to_bucketfs( + self, + fileobj: IO, + bucket_file_path: str) -> Tuple[ParseResult, PurePosixPath]: + return upload.upload_fileobj_to_bucketfs( self.bucket_config, self.get_complete_file_path_in_bucket(bucket_file_path), fileobj ) - return result - def read_file_from_bucketfs_to_string(self, - bucket_file_path: str) -> str: - result = from_BFS.read_file_from_bucketfs_to_string( + def read_file_from_bucketfs_to_string( + self, + bucket_file_path: str) -> str: + return from_BFS.read_file_from_bucketfs_to_string( self.get_complete_file_path_in_bucket(bucket_file_path), self.bucket_config ) - return result - def read_file_from_bucketfs_to_file(self, - bucket_file_path: str, - local_file_path: Path) -> None: + def read_file_from_bucketfs_to_file( + self, + bucket_file_path: str, + local_file_path: Path) -> None: from_BFS.read_file_from_bucketfs_to_file( self.get_complete_file_path_in_bucket(bucket_file_path), self.bucket_config, local_file_path ) - def read_file_from_bucketfs_to_fileobj(self, - bucket_file_path: str, - fileobj: IO) -> None: + def read_file_from_bucketfs_to_fileobj( + self, + bucket_file_path: str, + fileobj: IO) -> None: from_BFS.read_file_from_bucketfs_to_fileobj( self.get_complete_file_path_in_bucket(bucket_file_path), self.bucket_config, fileobj ) - def read_file_from_bucketfs_via_joblib(self, - bucket_file_path: str) -> Any: - result = from_BFS.read_file_from_bucketfs_via_joblib( + def read_file_from_bucketfs_via_joblib( + self, + bucket_file_path: str) -> Any: + return from_BFS.read_file_from_bucketfs_via_joblib( self.get_complete_file_path_in_bucket(bucket_file_path), self.bucket_config ) - return result - def list_files_in_bucketfs(self, - bucket_file_path: str) -> str: - result = list_files.list_files_in_bucketfs( + def list_files_in_bucketfs( + self, + bucket_file_path: str) -> list: + return list_files.list_files_in_bucketfs( self.bucket_config, self.get_complete_file_path_in_bucket(bucket_file_path) ) - return result diff --git a/exasol_bucketfs_utils_python/list_files.py b/exasol_bucketfs_utils_python/list_files.py index 16e9714f..d7e68e0f 100644 --- a/exasol_bucketfs_utils_python/list_files.py +++ b/exasol_bucketfs_utils_python/list_files.py @@ -6,7 +6,7 @@ def list_files_in_bucketfs(bucket_config: BucketConfig, - bucket_file_path: str = "") -> str: + bucket_file_path: str = "") -> list: """ List files at the specified path in the bucket in BucketFs, line by line. @@ -26,16 +26,8 @@ def list_files_in_bucketfs(bucket_config: BucketConfig, for path in response.text.split(): path_parts = Path(path).parts if path_parts[:len(bucket_file_path_parts)] == bucket_file_path_parts: - files.append(str(Path(*path_parts[len(bucket_file_path_parts):]))) + relevant_parts = path_parts[len(bucket_file_path_parts):] + relevant_path = str(Path(*relevant_parts)) + files.append(relevant_path) - return "\n".join(files) - - -from exasol_bucketfs_utils_python.bucket_config import BucketConfig -from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig -from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig -connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False) -bucketfs_config = BucketFSConfig(connection_config=connection_config, bucketfs_name="bfsdefault") -bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config) - -print(not list_files_in_bucketfs(bucket_config, "path/in/bucket/file.txt")) \ No newline at end of file + return files diff --git a/exasol_bucketfs_utils_python/localfs_mock_bucketfs_location.py b/exasol_bucketfs_utils_python/localfs_mock_bucketfs_location.py index 4d0e2a6b..38f8fa39 100644 --- a/exasol_bucketfs_utils_python/localfs_mock_bucketfs_location.py +++ b/exasol_bucketfs_utils_python/localfs_mock_bucketfs_location.py @@ -81,3 +81,9 @@ def read_file_from_bucketfs_via_joblib(self, result = joblib.load( self.get_complete_file_path_in_bucket(bucket_file_path)) return result + + def list_files_in_bucketfs(self, + bucket_file_path: str) -> list: + path = self.get_complete_file_path_in_bucket(bucket_file_path) + Path(path).parent.mkdir(parents=True, exist_ok=True) + return ["."] diff --git a/tests/test_upload_list.py b/tests/test_upload_list.py index 3a73abe4..caf76887 100644 --- a/tests/test_upload_list.py +++ b/tests/test_upload_list.py @@ -23,17 +23,17 @@ def test_list_files(): string=test_string) bucket_file_path_map = { - "path": "\n".join(("in/bucket/file.txt", "file2.txt")), - "path/": "\n".join(("in/bucket/file.txt", "file2.txt")), - "path/in": "bucket/file.txt", - "path/in/": "bucket/file.txt", - "path/in/bucket": "file.txt", - "path/in/bucket/": "file.txt", - "path/in/bucket/file.txt": "." + "path": ["in/bucket/file.txt", "file2.txt"], + "path/": ["in/bucket/file.txt", "file2.txt"], + "path/in": ["bucket/file.txt"], + "path/in/": ["bucket/file.txt"], + "path/in/bucket": ["file.txt"], + "path/in/bucket/": ["file.txt"], + "path/in/bucket/file.txt": ["."] } for bucket_path, expected in bucket_file_path_map.items(): - assert list_files.list_files_in_bucketfs( - bucket_config, bucket_path) == expected + assert expected == list_files.list_files_in_bucketfs( + bucket_config, bucket_path) finally: for path_in_bucket in path_list: delete_testfile_from_bucketfs( From 49f747b02036ae6ae5d5a61bfc41e9f310114d9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cumitbuyuksahin=E2=80=9D?= Date: Thu, 31 Mar 2022 16:52:52 +0200 Subject: [PATCH 4/5] Added documentation --- doc/changes/changes_0.2.0.md | 2 ++ doc/user_guide/list_files_in_bucket.py | 28 +++++++++++++++++++++++++ doc/user_guide/list_files_in_bucket.rst | 14 +++++++++++++ doc/user_guide/user_guide.rst | 1 + 4 files changed, 45 insertions(+) create mode 100644 doc/user_guide/list_files_in_bucket.py create mode 100644 doc/user_guide/list_files_in_bucket.rst diff --git a/doc/changes/changes_0.2.0.md b/doc/changes/changes_0.2.0.md index c22c8db7..8c2c9da7 100644 --- a/doc/changes/changes_0.2.0.md +++ b/doc/changes/changes_0.2.0.md @@ -6,6 +6,8 @@ Code name: t.b.d ## Features / Enhancements + - #55: Added method to list files in bucket + ## Bug Fixes - #54: Removed PosixPath conversion from alter session string diff --git a/doc/user_guide/list_files_in_bucket.py b/doc/user_guide/list_files_in_bucket.py new file mode 100644 index 00000000..7606ccd4 --- /dev/null +++ b/doc/user_guide/list_files_in_bucket.py @@ -0,0 +1,28 @@ +from pathlib import Path +from exasol_bucketfs_utils_python import upload, list_files +from exasol_bucketfs_utils_python.bucket_config import BucketConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig + +connection_config = BucketFSConnectionConfig( + host="localhost", port=6666, + user="w", pwd="write", + is_https=False) +bucketfs_config = BucketFSConfig( + connection_config=connection_config, + bucketfs_name="bfsdefault") +bucket_config = BucketConfig( + bucket_name="default", + bucketfs_config=bucketfs_config) + +local_input_file_path = Path("local_input_file.txt") +path_in_bucket = "path/in/bucket/file.txt" +upload.upload_file_to_bucketfs( + bucket_config=bucket_config, + bucket_file_path=path_in_bucket, + local_file_path=local_input_file_path) + +bucket_file_path = Path("path/in/bucket") +files = list_files.list_files_in_bucketfs( + bucket_config=bucket_config, + bucket_file_path=path_in_bucket) diff --git a/doc/user_guide/list_files_in_bucket.rst b/doc/user_guide/list_files_in_bucket.rst new file mode 100644 index 00000000..c0799bb9 --- /dev/null +++ b/doc/user_guide/list_files_in_bucket.rst @@ -0,0 +1,14 @@ + +##################################### +Listing files in bucket +##################################### + +This library provides a function to list the files in the bucket under a given +path. As in the example below, the list of files in the specified bucket +directory is obtained by the provided listing method. + + +Example: + +.. literalinclude:: list_files_in_bucket.py + :language: python3 \ No newline at end of file diff --git a/doc/user_guide/user_guide.rst b/doc/user_guide/user_guide.rst index 5f38541f..903f2153 100644 --- a/doc/user_guide/user_guide.rst +++ b/doc/user_guide/user_guide.rst @@ -10,4 +10,5 @@ For a detailed explanation of the API, please refer to our :doc:`API Documentati upload_download_functions upload_github_release_to_bucket + list_files_in_bucket From 80a7a552dd4f9280996162ba7a257c2cae3855e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cumitbuyuksahin=E2=80=9D?= Date: Fri, 1 Apr 2022 09:09:52 +0200 Subject: [PATCH 5/5] Updated type hints --- exasol_bucketfs_utils_python/abstract_bucketfs_location.py | 4 ++-- exasol_bucketfs_utils_python/list_files.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/exasol_bucketfs_utils_python/abstract_bucketfs_location.py b/exasol_bucketfs_utils_python/abstract_bucketfs_location.py index 4c0c9497..dee05390 100644 --- a/exasol_bucketfs_utils_python/abstract_bucketfs_location.py +++ b/exasol_bucketfs_utils_python/abstract_bucketfs_location.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, Tuple, IO +from typing import Any, Tuple, IO, Iterable from pathlib import PurePosixPath, Path from urllib.parse import ParseResult @@ -68,5 +68,5 @@ def read_file_from_bucketfs_via_joblib(self, @abstractmethod def list_files_in_bucketfs(self, - bucket_file_path: str) -> list: + bucket_file_path: str) -> Iterable[str]: pass diff --git a/exasol_bucketfs_utils_python/list_files.py b/exasol_bucketfs_utils_python/list_files.py index d7e68e0f..2119edc0 100644 --- a/exasol_bucketfs_utils_python/list_files.py +++ b/exasol_bucketfs_utils_python/list_files.py @@ -1,3 +1,4 @@ +from typing import Iterable import requests from pathlib import Path from exasol_bucketfs_utils_python.bucket_config import BucketConfig @@ -6,7 +7,7 @@ def list_files_in_bucketfs(bucket_config: BucketConfig, - bucket_file_path: str = "") -> list: + bucket_file_path: str = "") -> Iterable[str]: """ List files at the specified path in the bucket in BucketFs, line by line.