diff --git a/.gitignore b/.gitignore index 39e9db25..73102171 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*.so *.whl # Generated by Cargo diff --git a/Cargo.toml b/Cargo.toml index a3d49dd9..356539a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,14 +3,12 @@ members = ["object-store-rs", "pyo3-object_store"] resolver = "2" [workspace.package] -# Package version for arro3-*, not for pyo3-arrow -version = "0.4.2" -authors = ["Kyle Barron "] +authors = ["Kyle Barron "] edition = "2021" -homepage = "https://kylebarron.dev/arro3" -repository = "https://github.com/kylebarron/arro3" +homepage = "https://developmentseed.org/object-store-rs" +repository = "https://github.com/developmentseed/object-store-rs" license = "MIT OR Apache-2.0" -keywords = ["python", "arrow"] +keywords = ["python"] categories = [] rust-version = "1.75" diff --git a/README.md b/README.md index 6ec5e246..965c2b83 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,100 @@ # object-store-rs -A Python interface and pyo3 integration to the object-store crate + +A Python interface and pyo3 integration to the Rust [`object_store`](https://docs.rs/object_store/latest/object_store/) crate. This crate provides a uniform API for interacting with object storage services and local files. Using this library, the same code can run in multiple clouds and local test environments, via a simple runtime configuration change. + + + +- Easy to install with no Python dependencies. +- Full static type hinting +- Full sync and async API +- Helpers for constructing from environment variables and `boto3.Session` objects + +Among the included backend are: + +- Amazon S3 and S3-compliant APIs like Cloudflare R2 +- Google Cloud Storage +- Azure Blob Gen1 and Gen2 accounts (including ADLS Gen2) +- Local filesystem +- In-memory storage + + + +## Installation + +```sh +pip install object-store-rs +``` + +## Comparison to object-store-python + +- More maintainable API than object-store-python. +- Fewer classes. Use native Python (typed) dicts and objects where possible. + +## Usage + +### Constructing a store + +For ease of use and accurate validation, there are separate classes for each backend. + +TODO: finish doc here + +#### Configuration + +- Each store concept has their own configuration. This is covered in the docs, and string literals are in the type hints. + +### Interacting with a store + +All methods for interacting with a store are exported as top-level functions, +such as `get`, `put`, `list`, and `delete`. + +```py +import object_store_rs as obs + +store = obs.store.MemoryStore() + +obs.put_file(store, "file.txt", b"hello world!") +response = obs.get(store, "file.txt") +response.meta +# {'size': 12, +# 'last_modified': datetime.datetime(2024, 10, 18, 4, 8, 12, 57046, tzinfo=datetime.timezone.utc), +# 'version': None, +# 'e_tag': '0', +# 'location': 'file.txt'} + +assert response.bytes() == b"hello world!" + +byte_range = obs.get_range(store, "file.txt", offset=0, length=5) +assert byte_range == b"hello" + +obs.copy(store, "file.txt", "other.txt") +assert obs.get(store, "other.txt").bytes() == b"hello world!" +``` + +All of these methods also have `async` counterparts, suffixed with `_async`. + +```py +import object_store_rs as obs + +store = obs.store.MemoryStore() + +await obs.put_file_async(store, "file.txt", b"hello world!") +response = await obs.get_async(store, "file.txt") +response.meta +# { +# "last_modified": datetime.datetime( +# 2024, 10, 18, 4, 14, 39, 630310, tzinfo=datetime.timezone.utc +# ), +# "size": 12, +# "location": "file.txt", +# "version": None, +# "e_tag": "0", +# } +assert await response.bytes_async() == b"hello world!" + +byte_range = await obs.get_range_async(store, "file.txt", offset=0, length=5) +assert byte_range == b"hello" + +await obs.copy_async(store, "file.txt", "other.txt") +resp = await obs.get_async(store, "other.txt") +assert await resp.bytes_async() == b"hello world!" +``` diff --git a/object-store-rs/Cargo.toml b/object-store-rs/Cargo.toml index eae4d4b6..0480ad6a 100644 --- a/object-store-rs/Cargo.toml +++ b/object-store-rs/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "object-store-rs" -version = { workspace = true } +version = "0.1.0-beta.1" authors = { workspace = true } edition = { workspace = true } description = "Core library for representing Arrow data in Python." diff --git a/object-store-rs/python/object_store_rs/__pycache__/__init__.cpython-311.pyc b/object-store-rs/python/object_store_rs/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index 66bac1c7..00000000 Binary files a/object-store-rs/python/object_store_rs/__pycache__/__init__.cpython-311.pyc and /dev/null differ diff --git a/object-store-rs/python/object_store_rs/_object_store_rs.cpython-311-darwin.so b/object-store-rs/python/object_store_rs/_object_store_rs.cpython-311-darwin.so deleted file mode 100755 index 8bf4a2a1..00000000 Binary files a/object-store-rs/python/object_store_rs/_object_store_rs.cpython-311-darwin.so and /dev/null differ