Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion obstore/python/obstore/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,45 @@
from typing import TYPE_CHECKING

from . import _obstore, store # pyright:ignore[reportMissingModuleSource]
from ._attributes import Attribute, Attributes
from ._buffered_types import (
AsyncReadableFile,
AsyncWritableFile,
ReadableFile,
WritableFile,
)
from ._get_types import BytesStream, GetOptions, GetResult, OffsetRange, SuffixRange
from ._list_types import ListChunkType, ListResult, ListStream, ObjectMeta
from ._obstore import * # noqa: F403 # pyright:ignore[reportMissingModuleSource]
from ._put_types import PutMode, PutResult, UpdateVersion
from ._sign_types import HTTP_METHOD, SignCapableStore

if TYPE_CHECKING:
from . import exceptions # noqa: TC004


__all__ = ["exceptions", "store"]
__all__ = [
"HTTP_METHOD",
"AsyncReadableFile",
"AsyncWritableFile",
"Attribute",
"Attributes",
"BytesStream",
"GetOptions",
"GetResult",
"ListChunkType",
"ListResult",
"ListStream",
"ObjectMeta",
"OffsetRange",
"PutMode",
"PutResult",
"ReadableFile",
"SignCapableStore",
"SuffixRange",
"UpdateVersion",
"WritableFile",
"exceptions",
"store",
]
__all__ += _obstore.__all__
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,6 @@
See [Cache-Control](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control).

Any other string key specifies a user-defined metadata field for the object.

!!! warning "Not importable at runtime"

To use this type hint in your code, import it within a `TYPE_CHECKING` block:

```py
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from obstore import Attribute
```
"""

Attributes: TypeAlias = dict[Attribute, str]
Expand All @@ -61,15 +50,4 @@
retrieved from [`get`][obstore.get]/[`get_async`][obstore.get_async].

Unlike ObjectMeta, Attributes are not returned by listing APIs

!!! warning "Not importable at runtime"

To use this type hint in your code, import it within a `TYPE_CHECKING` block:

```py
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from obstore import Attributes
```
"""
252 changes: 6 additions & 246 deletions obstore/python/obstore/_buffered.pyi
Original file line number Diff line number Diff line change
@@ -1,21 +1,12 @@
import sys
from contextlib import AbstractAsyncContextManager, AbstractContextManager

from ._attributes import Attributes
from ._bytes import Bytes
from ._list import ObjectMeta
from ._buffered_types import (
AsyncReadableFile,
AsyncWritableFile,
ReadableFile,
WritableFile,
)
from ._store import ObjectStore

if sys.version_info >= (3, 11):
from typing import Self
else:
from typing_extensions import Self

if sys.version_info >= (3, 12):
from collections.abc import Buffer
else:
from typing_extensions import Buffer

def open_reader(
store: ObjectStore,
path: str,
Expand Down Expand Up @@ -47,170 +38,6 @@ async def open_reader_async(
Refer to the documentation for [open_reader][obstore.open_reader].
"""

class ReadableFile:
"""A synchronous-buffered reader that implements a similar interface as a Python
[`BufferedReader`][io.BufferedReader].

Internally this maintains a buffer of the requested size, and uses
[`get_range`][obstore.get_range] to populate its internal buffer once depleted. This
buffer is cleared on seek.

Whilst simple, this interface will typically be outperformed by the native `obstore`
methods that better map to the network APIs. This is because most object stores have
very [high first-byte latencies], on the order of 100-200ms, and so avoiding
unnecessary round-trips is critical to throughput.

Systems looking to sequentially scan a file should instead consider using
[`get`][obstore.get], or [`get_range`][obstore.get_range] to read a particular
range.

Systems looking to read multiple ranges of a file should instead consider using
[`get_ranges`][obstore.get_ranges], which will optimise the vectored IO.

[high first-byte latencies]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/optimizing-performance.html

!!! warning "Not importable at runtime"

To use this type hint in your code, import it within a `TYPE_CHECKING` block:

```py
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from obstore import ReadableFile
```
""" # noqa: D205

def close(self) -> None:
"""Close the current file.

This is currently a no-op.
"""

@property
def meta(self) -> ObjectMeta:
"""Access the metadata of the underlying file."""

def read(self, size: int | None = None, /) -> Bytes:
"""Read up to `size` bytes from the object and return them.

As a convenience, if size is unspecified or `None`, all bytes until EOF are
returned.
"""

def readall(self) -> Bytes:
"""Read and return all the bytes from the stream until EOF."""

def readline(self) -> Bytes:
"""Read a single line of the file, up until the next newline character."""

def readlines(self, hint: int = -1, /) -> list[Bytes]:
"""Read all remaining lines into a list of buffers."""

def seek(self, offset: int, whence: int = ..., /) -> int:
"""Change the stream position.

Change the stream position to the given byte `offset`, interpreted relative to
the position indicated by `whence`, and return the new absolute position. Values
for `whence` are:

- [`os.SEEK_SET`][] or 0: start of the stream (the default); `offset` should be zero or positive
- [`os.SEEK_CUR`][] or 1: current stream position; `offset` may be negative
- [`os.SEEK_END`][] or 2: end of the stream; `offset` is usually negative
"""

def seekable(self) -> bool:
"""Return True if the stream supports random access."""

@property
def size(self) -> int:
"""The size in bytes of the object."""

def tell(self) -> int:
"""Return the current stream position."""

class AsyncReadableFile:
"""An async-buffered reader that implements a similar interface as a Python
[`BufferedReader`][io.BufferedReader].

Internally this maintains a buffer of the requested size, and uses
[`get_range`][obstore.get_range] to populate its internal buffer once depleted. This
buffer is cleared on seek.

Whilst simple, this interface will typically be outperformed by the native `obstore`
methods that better map to the network APIs. This is because most object stores have
very [high first-byte latencies], on the order of 100-200ms, and so avoiding
unnecessary round-trips is critical to throughput.

Systems looking to sequentially scan a file should instead consider using
[`get`][obstore.get], or [`get_range`][obstore.get_range] to read a particular
range.

Systems looking to read multiple ranges of a file should instead consider using
[`get_ranges`][obstore.get_ranges], which will optimise the vectored IO.

[high first-byte latencies]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/optimizing-performance.html

!!! warning "Not importable at runtime"

To use this type hint in your code, import it within a `TYPE_CHECKING` block:

```py
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from obstore import AsyncReadableFile
```
""" # noqa: D205

def close(self) -> None:
"""Close the current file.

This is currently a no-op.
"""

@property
def meta(self) -> ObjectMeta:
"""Access the metadata of the underlying file."""

async def read(self, size: int | None = None, /) -> Bytes:
"""Read up to `size` bytes from the object and return them.

As a convenience, if size is unspecified or `None`, all bytes until EOF are
returned.
"""

async def readall(self) -> Bytes:
"""Read and return all the bytes from the stream until EOF."""

async def readline(self) -> Bytes:
"""Read a single line of the file, up until the next newline character."""

async def readlines(self, hint: int = -1, /) -> list[Bytes]:
"""Read all remaining lines into a list of buffers."""

async def seek(self, offset: int, whence: int = ..., /) -> int:
"""Change the stream position.

Change the stream position to the given byte `offset`, interpreted relative to
the position indicated by `whence`, and return the new absolute position. Values
for `whence` are:

- [`os.SEEK_SET`][] or 0: start of the stream (the default); `offset` should be zero or positive
- [`os.SEEK_CUR`][] or 1: current stream position; `offset` may be negative
- [`os.SEEK_END`][] or 2: end of the stream; `offset` is usually negative
"""

def seekable(self) -> bool:
"""Return True if the stream supports random access."""

@property
def size(self) -> int:
"""The size in bytes of the object."""

async def tell(self) -> int:
"""Return the current stream position."""

def open_writer(
store: ObjectStore,
path: str,
Expand Down Expand Up @@ -250,70 +77,3 @@ def open_writer_async(

Refer to the documentation for [open_writer][obstore.open_writer].
"""

class WritableFile(AbstractContextManager):
"""A buffered writable file object with synchronous operations.

This implements a similar interface as a Python
[`BufferedWriter`][io.BufferedWriter].

!!! warning "Not importable at runtime"

To use this type hint in your code, import it within a `TYPE_CHECKING` block:

```py
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from obstore import WritableFile
```
"""

def __enter__(self) -> Self: ...
def __exit__(self, exc_type, exc_value, traceback) -> None: ... # noqa: ANN001
def close(self) -> None:
"""Close the current file."""

def closed(self) -> bool:
"""Check whether this file has been closed.

Note that this is a method, not an attribute.
"""

def flush(self) -> None:
"""Flushes this output stream, ensuring that all intermediately buffered contents reach their destination."""

def write(self, buffer: bytes | Buffer, /) -> int:
"""Write the [bytes-like object](https://docs.python.org/3/glossary.html#term-bytes-like-object), `buffer`, and return the number of bytes written."""

class AsyncWritableFile(AbstractAsyncContextManager):
"""A buffered writable file object with **asynchronous** operations.

!!! warning "Not importable at runtime"

To use this type hint in your code, import it within a `TYPE_CHECKING` block:

```py
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from obstore import AsyncWritableFile
```
"""

async def __aenter__(self) -> Self: ...
async def __aexit__(self, exc_type, exc_value, traceback) -> None: ... # noqa: ANN001
async def close(self) -> None:
"""Close the current file."""

async def closed(self) -> bool:
"""Check whether this file has been closed.

Note that this is an async method, not an attribute.
"""

async def flush(self) -> None:
"""Flushes this output stream, ensuring that all intermediately buffered contents reach their destination."""

async def write(self, buffer: bytes | Buffer, /) -> int:
"""Write the [bytes-like object](https://docs.python.org/3/glossary.html#term-bytes-like-object), `buffer`, and return the number of bytes written."""
Loading
Loading