Skip to content
This repository was archived by the owner on Sep 26, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/changes/changes_0.2.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ Code name: t.b.d

## Features / Enhancements

- #55: Added method to list files in bucket

## Bug Fixes

- #54: Removed PosixPath conversion from alter session string
Expand Down
28 changes: 28 additions & 0 deletions doc/user_guide/list_files_in_bucket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from pathlib import Path
from exasol_bucketfs_utils_python import upload, list_files
from exasol_bucketfs_utils_python.bucket_config import BucketConfig
from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig
from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig

connection_config = BucketFSConnectionConfig(
host="localhost", port=6666,
user="w", pwd="write",
is_https=False)
bucketfs_config = BucketFSConfig(
connection_config=connection_config,
bucketfs_name="bfsdefault")
bucket_config = BucketConfig(
bucket_name="default",
bucketfs_config=bucketfs_config)

local_input_file_path = Path("local_input_file.txt")
path_in_bucket = "path/in/bucket/file.txt"
upload.upload_file_to_bucketfs(
bucket_config=bucket_config,
bucket_file_path=path_in_bucket,
local_file_path=local_input_file_path)

bucket_file_path = Path("path/in/bucket")
files = list_files.list_files_in_bucketfs(
bucket_config=bucket_config,
bucket_file_path=path_in_bucket)
14 changes: 14 additions & 0 deletions doc/user_guide/list_files_in_bucket.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

#####################################
Listing files in bucket
#####################################

This library provides a function to list the files in the bucket under a given
path. As in the example below, the list of files in the specified bucket
directory is obtained by the provided listing method.


Example:

.. literalinclude:: list_files_in_bucket.py
:language: python3
1 change: 1 addition & 0 deletions doc/user_guide/user_guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ For a detailed explanation of the API, please refer to our :doc:`API Documentati

upload_download_functions
upload_github_release_to_bucket
list_files_in_bucket

7 changes: 6 additions & 1 deletion exasol_bucketfs_utils_python/abstract_bucketfs_location.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Any, Tuple, IO
from typing import Any, Tuple, IO, Iterable
from pathlib import PurePosixPath, Path
from urllib.parse import ParseResult

Expand Down Expand Up @@ -65,3 +65,8 @@ def read_file_from_bucketfs_to_fileobj(self,
def read_file_from_bucketfs_via_joblib(self,
bucket_file_path: str) -> Any:
pass

@abstractmethod
def list_files_in_bucketfs(self,
bucket_file_path: str) -> Iterable[str]:
pass
94 changes: 51 additions & 43 deletions exasol_bucketfs_utils_python/bucketfs_location.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Any, Tuple, IO
from pathlib import PurePosixPath, Path
from urllib.parse import ParseResult
from exasol_bucketfs_utils_python import download, upload
from exasol_bucketfs_utils_python import download, upload, list_files
from exasol_bucketfs_utils_python import load_file_from_local_fs as from_BFS
from exasol_bucketfs_utils_python.bucket_config import BucketConfig

Expand All @@ -24,90 +24,98 @@ def __init__(self, bucket_config: BucketConfig, base_path: PurePosixPath):
self.base_path = base_path
self.bucket_config = bucket_config

def get_complete_file_path_in_bucket(self,
bucket_file_path: str) -> str:
def get_complete_file_path_in_bucket(
self,
bucket_file_path: str) -> str:
return str(PurePosixPath(self.base_path, bucket_file_path))

def download_from_bucketfs_to_string(self,
bucket_file_path: str) -> str:
result = download.download_from_bucketfs_to_string(
def download_from_bucketfs_to_string(
self,
bucket_file_path: str) -> str:
return download.download_from_bucketfs_to_string(
self.bucket_config,
self.get_complete_file_path_in_bucket(bucket_file_path)
)
return result

def download_object_from_bucketfs_via_joblib(self,
bucket_file_path: str) -> Any:
result = download.download_object_from_bucketfs_via_joblib(
def download_object_from_bucketfs_via_joblib(
self,
bucket_file_path: str) -> Any:
return download.download_object_from_bucketfs_via_joblib(
self.bucket_config,
self.get_complete_file_path_in_bucket(bucket_file_path)
)
return result

def upload_string_to_bucketfs(self,
bucket_file_path: str,
string: str) -> \
Tuple[ParseResult, PurePosixPath]:
result = upload.upload_string_to_bucketfs(
def upload_string_to_bucketfs(
self,
bucket_file_path: str,
string: str) -> Tuple[ParseResult, PurePosixPath]:
return upload.upload_string_to_bucketfs(
self.bucket_config,
self.get_complete_file_path_in_bucket(bucket_file_path),
string
)
return result

def upload_object_to_bucketfs_via_joblib(self, object: Any,
bucket_file_path: str,
**kwargs) -> \
Tuple[ParseResult, PurePosixPath]:
result = upload.upload_object_to_bucketfs_via_joblib(
def upload_object_to_bucketfs_via_joblib(
self, object: Any,
bucket_file_path: str,
**kwargs) -> Tuple[ParseResult, PurePosixPath]:
return upload.upload_object_to_bucketfs_via_joblib(
object,
self.bucket_config,
self.get_complete_file_path_in_bucket(bucket_file_path),
**kwargs
)
return result

def upload_fileobj_to_bucketfs(self,
fileobj: IO,
bucket_file_path: str) -> \
Tuple[ParseResult, PurePosixPath]:
result = upload.upload_fileobj_to_bucketfs(
def upload_fileobj_to_bucketfs(
self,
fileobj: IO,
bucket_file_path: str) -> Tuple[ParseResult, PurePosixPath]:
return upload.upload_fileobj_to_bucketfs(
self.bucket_config,
self.get_complete_file_path_in_bucket(bucket_file_path),
fileobj
)
return result

def read_file_from_bucketfs_to_string(self,
bucket_file_path: str) -> str:
result = from_BFS.read_file_from_bucketfs_to_string(
def read_file_from_bucketfs_to_string(
self,
bucket_file_path: str) -> str:
return from_BFS.read_file_from_bucketfs_to_string(
self.get_complete_file_path_in_bucket(bucket_file_path),
self.bucket_config
)
return result

def read_file_from_bucketfs_to_file(self,
bucket_file_path: str,
local_file_path: Path) -> None:
def read_file_from_bucketfs_to_file(
self,
bucket_file_path: str,
local_file_path: Path) -> None:
from_BFS.read_file_from_bucketfs_to_file(
self.get_complete_file_path_in_bucket(bucket_file_path),
self.bucket_config,
local_file_path
)

def read_file_from_bucketfs_to_fileobj(self,
bucket_file_path: str,
fileobj: IO) -> None:
def read_file_from_bucketfs_to_fileobj(
self,
bucket_file_path: str,
fileobj: IO) -> None:
from_BFS.read_file_from_bucketfs_to_fileobj(
self.get_complete_file_path_in_bucket(bucket_file_path),
self.bucket_config,
fileobj
)

def read_file_from_bucketfs_via_joblib(self,
bucket_file_path: str) -> Any:
result = from_BFS.read_file_from_bucketfs_via_joblib(
def read_file_from_bucketfs_via_joblib(
self,
bucket_file_path: str) -> Any:
return from_BFS.read_file_from_bucketfs_via_joblib(
self.get_complete_file_path_in_bucket(bucket_file_path),
self.bucket_config
)
return result

def list_files_in_bucketfs(
self,
bucket_file_path: str) -> list:
return list_files.list_files_in_bucketfs(
self.bucket_config,
self.get_complete_file_path_in_bucket(bucket_file_path)
)
34 changes: 34 additions & 0 deletions exasol_bucketfs_utils_python/list_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from typing import Iterable
import requests
from pathlib import Path
from exasol_bucketfs_utils_python.bucket_config import BucketConfig
from exasol_bucketfs_utils_python import bucketfs_utils
from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_http_url


def list_files_in_bucketfs(bucket_config: BucketConfig,
bucket_file_path: str = "") -> Iterable[str]:
"""
List files at the specified path in the bucket in BucketFs, line by line.

:param bucket_config: BucketConfig for the bucket to download from
:param bucket_file_path: Path in the bucket to download the file from
:return: The list of the files in the BucketFS as string.
"""
if bucket_file_path is None:
raise ValueError("bucket_file_path can't be None")
url = generate_bucket_http_url(bucket_config, "")
auth = bucketfs_utils.create_auth_object(bucket_config)
response = requests.get(url.geturl(), auth=auth)
response.raise_for_status()

bucket_file_path_parts = Path(bucket_file_path).parts
files = []
for path in response.text.split():
path_parts = Path(path).parts
if path_parts[:len(bucket_file_path_parts)] == bucket_file_path_parts:
relevant_parts = path_parts[len(bucket_file_path_parts):]
relevant_path = str(Path(*relevant_parts))
files.append(relevant_path)

return files
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,9 @@ def read_file_from_bucketfs_via_joblib(self,
result = joblib.load(
self.get_complete_file_path_in_bucket(bucket_file_path))
return result

def list_files_in_bucketfs(self,
bucket_file_path: str) -> list:
path = self.get_complete_file_path_in_bucket(bucket_file_path)
Path(path).parent.mkdir(parents=True, exist_ok=True)
return ["."]
41 changes: 41 additions & 0 deletions tests/test_upload_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from exasol_bucketfs_utils_python import upload, list_files
from exasol_bucketfs_utils_python.bucket_config import BucketConfig
from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig
from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig
from tests.test_load_fs_file_from_udf import delete_testfile_from_bucketfs


def test_list_files():
connection_config = BucketFSConnectionConfig(
host="localhost", port=6666, user="w", pwd="write", is_https=False)
bucketfs_config = BucketFSConfig(
connection_config=connection_config, bucketfs_name="bfsdefault")
bucket_config = BucketConfig(
bucket_name="default", bucketfs_config=bucketfs_config)
test_string = "test_string"

path_list = ["path/in/bucket/file.txt", "path/file2.txt"]
try:
for path_in_bucket in path_list:
upload.upload_string_to_bucketfs(
bucket_config=bucket_config,
bucket_file_path=path_in_bucket,
string=test_string)

bucket_file_path_map = {
"path": ["in/bucket/file.txt", "file2.txt"],
"path/": ["in/bucket/file.txt", "file2.txt"],
"path/in": ["bucket/file.txt"],
"path/in/": ["bucket/file.txt"],
"path/in/bucket": ["file.txt"],
"path/in/bucket/": ["file.txt"],
"path/in/bucket/file.txt": ["."]
}
for bucket_path, expected in bucket_file_path_map.items():
assert expected == list_files.list_files_in_bucketfs(
bucket_config, bucket_path)
finally:
for path_in_bucket in path_list:
delete_testfile_from_bucketfs(
file_path=path_in_bucket,
bucket_config=bucket_config)