From 4cc1f284f68398204b6f9a11639aef01ff281979 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 10 Mar 2026 01:32:33 +0100 Subject: [PATCH 1/7] check stubs are included at wheel build time diff --git c/python/CMakeLists.txt i/python/CMakeLists.txt index 6395b3e1e7..f71a495e22 100644 --- c/python/CMakeLists.txt +++ i/python/CMakeLists.txt @@ -1042,9 +1042,9 @@ if(EXISTS "${PYARROW_STUBS_SOURCE_DIR}") install(CODE " execute_process( COMMAND \"${Python3_EXECUTABLE}\" - \"${CMAKE_CURRENT_SOURCE_DIR}/scripts/update_stub_docstrings.py\" + \"${CMAKE_SOURCE_DIR}/scripts/update_stub_docstrings.py\" \"${CMAKE_INSTALL_PREFIX}\" - \"${CMAKE_CURRENT_SOURCE_DIR}\" + \"${CMAKE_SOURCE_DIR}\" RESULT_VARIABLE _pyarrow_stub_docstrings_result ) if(NOT _pyarrow_stub_docstrings_result EQUAL 0) --- python/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 6395b3e1e7a..f71a495e224 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1042,9 +1042,9 @@ if(EXISTS "${PYARROW_STUBS_SOURCE_DIR}") install(CODE " execute_process( COMMAND \"${Python3_EXECUTABLE}\" - \"${CMAKE_CURRENT_SOURCE_DIR}/scripts/update_stub_docstrings.py\" + \"${CMAKE_SOURCE_DIR}/scripts/update_stub_docstrings.py\" \"${CMAKE_INSTALL_PREFIX}\" - \"${CMAKE_CURRENT_SOURCE_DIR}\" + \"${CMAKE_SOURCE_DIR}\" RESULT_VARIABLE _pyarrow_stub_docstrings_result ) if(NOT _pyarrow_stub_docstrings_result EQUAL 0) From 798716c90e34a6bed07a6afac0f8bbe952ce24c7 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 22 Dec 2025 00:44:32 +0100 Subject: [PATCH 2/7] Add internal types and helpers --- .../pyarrow-stubs/pyarrow/_stubs_typing.pyi | 133 +++ python/pyarrow-stubs/pyarrow/_types.pyi | 966 ++++++++++++++++++ python/pyarrow-stubs/pyarrow/error.pyi | 104 ++ python/pyarrow-stubs/pyarrow/io.pyi | 22 + python/pyarrow-stubs/pyarrow/lib.pyi | 25 + python/pyarrow-stubs/pyarrow/scalar.pyi | 22 + python/pyarrow/fs.py | 2 +- 7 files changed, 1273 insertions(+), 1 deletion(-) create mode 100644 python/pyarrow-stubs/pyarrow/_stubs_typing.pyi create mode 100644 python/pyarrow-stubs/pyarrow/_types.pyi create mode 100644 python/pyarrow-stubs/pyarrow/error.pyi create mode 100644 python/pyarrow-stubs/pyarrow/io.pyi create mode 100644 python/pyarrow-stubs/pyarrow/lib.pyi create mode 100644 python/pyarrow-stubs/pyarrow/scalar.pyi diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi new file mode 100644 index 00000000000..0715012fddc --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -0,0 +1,133 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt + +from collections.abc import Collection, Iterator, Sequence +from decimal import Decimal +from typing import Any, Literal, Protocol, TypeAlias, TypeVar + +import numpy as np + +from numpy.typing import NDArray + +from pyarrow.lib import BooleanArray, IntegerArray, ChunkedArray + +ArrayLike: TypeAlias = Any +ScalarLike: TypeAlias = Any +Order: TypeAlias = Literal["ascending", "descending"] +JoinType: TypeAlias = Literal[ + "left semi", + "right semi", + "left anti", + "right anti", + "inner", + "left outer", + "right outer", + "full outer", +] +Compression: TypeAlias = Literal[ + "gzip", "bz2", "brotli", "lz4", "lz4_frame", "lz4_raw", "zstd", "snappy" +] +NullEncoding: TypeAlias = Literal["mask", "encode"] +NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"] +TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"] +Mask: TypeAlias = ( + Sequence[bool | None] + | NDArray[np.bool_] + | BooleanArray + | ChunkedArray[Any] +) +Indices: TypeAlias = ( + Sequence[int | None] + | NDArray[np.integer[Any]] + | IntegerArray + | ChunkedArray[Any] +) + +PyScalar: TypeAlias = (bool | int | float | Decimal | str | bytes | + dt.date | dt.datetime | dt.time | dt.timedelta) + +_T = TypeVar("_T") +_V = TypeVar("_V", covariant=True) + +SingleOrList: TypeAlias = list[_T] | _T + + +class SupportEq(Protocol): + def __eq__(self, other) -> bool: ... + + +class SupportLt(Protocol): + def __lt__(self, other) -> bool: ... + + +class SupportGt(Protocol): + def __gt__(self, other) -> bool: ... + + +class SupportLe(Protocol): + def __le__(self, other) -> bool: ... + + +class SupportGe(Protocol): + def __ge__(self, other) -> bool: ... + + +FilterTuple: TypeAlias = ( + tuple[str, Literal["=", "==", "!="], SupportEq] + | tuple[str, Literal["<"], SupportLt] + | tuple[str, Literal[">"], SupportGt] + | tuple[str, Literal["<="], SupportLe] + | tuple[str, Literal[">="], SupportGe] + | tuple[str, Literal["in", "not in"], Collection] + | tuple[str, str, Any] # Allow general str for operator to avoid type errors +) + + +class Buffer(Protocol): + ... + + +class SupportPyBuffer(Protocol): + ... + + +class SupportArrowStream(Protocol): + def __arrow_c_stream__(self, requested_schema=None) -> Any: ... + + +class SupportPyArrowArray(Protocol): + def __arrow_array__(self, type=None) -> Any: ... + + +class SupportArrowArray(Protocol): + def __arrow_c_array__(self, requested_schema=None) -> Any: ... + + +class SupportArrowDeviceArray(Protocol): + def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ... + + +class SupportArrowSchema(Protocol): + def __arrow_c_schema__(self) -> Any: ... + + +class NullableCollection(Protocol[_V]): # type: ignore[reportInvalidTypeVarUse] + def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ... + def __len__(self) -> int: ... + def __contains__(self, item: Any, /) -> bool: ... diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi new file mode 100644 index 00000000000..3d802382ba1 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -0,0 +1,966 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 +import sys + +from collections.abc import Mapping, Sequence, Iterable, Iterator +from decimal import Decimal # noqa: F401 + +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + +from typing import Any, Generic, Literal + +import numpy as np +import pandas as pd + +from pyarrow._stubs_typing import SupportArrowSchema +from pyarrow.lib import ( # noqa: F401 + Array, + ChunkedArray, + ExtensionArray, + MemoryPool, + MonthDayNano, + Table, +) +from typing_extensions import TypeVar, deprecated + +from .io import Buffer +from .scalar import ExtensionScalar +from ._stubs_typing import TimeUnit + +class _Weakrefable: + ... + + +class _Metadata(_Weakrefable): + ... + + +class DataType(_Weakrefable): + def field(self, i: int) -> Field: ... + + @property + def id(self) -> int: ... + @property + def bit_width(self) -> int: ... + + @property + def byte_width(self) -> int: ... + + @property + def num_fields(self) -> int: ... + + @property + def num_buffers(self) -> int: ... + + @property + def has_variadic_buffers(self) -> bool: ... + + # Properties that exist on specific subtypes but accessed generically + @property + def list_size(self) -> int: ... + + def __hash__(self) -> int: ... + + def equals(self, other: DataType | str, *, + check_metadata: bool = False) -> bool: ... + + def to_pandas_dtype(self) -> np.generic: ... + + def _export_to_c(self, out_ptr: int) -> None: ... + + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + + def __arrow_c_schema__(self) -> Any: ... + + @classmethod + def _import_from_c_capsule(cls, schema) -> Self: ... + + +_AsPyType = TypeVar("_AsPyType") +_DataTypeT = TypeVar("_DataTypeT", bound=DataType) + + +class _BasicDataType(DataType, Generic[_AsPyType]): + ... + + +class NullType(_BasicDataType[None]): + ... + + +class BoolType(_BasicDataType[bool]): + ... + + +class UInt8Type(_BasicDataType[int]): + ... + + +class Int8Type(_BasicDataType[int]): + ... + + +class UInt16Type(_BasicDataType[int]): + ... + + +class Int16Type(_BasicDataType[int]): + ... + + +class UInt32Type(_BasicDataType[int]): + ... + + +class Int32Type(_BasicDataType[int]): + ... + + +class UInt64Type(_BasicDataType[int]): + ... + + +class Int64Type(_BasicDataType[int]): + ... + + +class Float16Type(_BasicDataType[float]): + ... + + +class Float32Type(_BasicDataType[float]): + ... + + +class Float64Type(_BasicDataType[float]): + ... + + +class Date32Type(_BasicDataType[dt.date]): + ... + + +class Date64Type(_BasicDataType[dt.date]): + ... + + +class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): + ... + + +class StringType(_BasicDataType[str]): + ... + + +class LargeStringType(_BasicDataType[str]): + ... + + +class StringViewType(_BasicDataType[str]): + ... + + +class BinaryType(_BasicDataType[bytes]): + ... + + +class LargeBinaryType(_BasicDataType[bytes]): + ... + + +class BinaryViewType(_BasicDataType[bytes]): + ... + + +_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) +_Tz = TypeVar("_Tz", str, None, default=None) + + +class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]): + + @property + def unit(self) -> _Unit: ... + + @property + def tz(self) -> _Tz: ... + + +_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) + + +class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): + @property + def unit(self) -> _Time32Unit: ... + + +_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) + + +class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): + @property + def unit(self) -> _Time64Unit: ... + + +class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): + @property + def unit(self) -> _Unit: ... + + +class FixedSizeBinaryType(_BasicDataType[Decimal]): + ... + + +_Precision = TypeVar("_Precision", default=Any) +_Scale = TypeVar("_Scale", default=Any) + + +class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + + @property + def scale(self) -> _Scale: ... + + +class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + + @property + def scale(self) -> _Scale: ... + + +class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + + @property + def scale(self) -> _Scale: ... + + +class Decimal256Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + + @property + def scale(self) -> _Scale: ... + + +class ListType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + + @property + def value_type(self) -> _DataTypeT: ... + + +class LargeListType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + @property + def value_type(self) -> _DataTypeT: ... + + +class ListViewType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + + @property + def value_type(self) -> _DataTypeT: ... + + +class LargeListViewType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + + @property + def value_type(self) -> _DataTypeT: ... + + +class FixedSizeListType(DataType, Generic[_DataTypeT, _Size]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + + @property + def value_type(self) -> _DataTypeT: ... + + @property + def list_size(self) -> int: ... + + +class DictionaryMemo(_Weakrefable): + ... + + +_IndexT = TypeVar( + "_IndexT", + UInt8Type, + Int8Type, + UInt16Type, + Int16Type, + UInt32Type, + Int32Type, + UInt64Type, + Int64Type, +) +_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType) +_ValueT = TypeVar("_ValueT", bound=DataType) +_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) + + +class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): + @property + def ordered(self) -> _Ordered: ... + + @property + def index_type(self) -> _IndexT: ... + + @property + def value_type(self) -> _BasicValueT: ... + + +_K = TypeVar("_K", bound=DataType) + + +class MapType(DataType, Generic[_K, _ValueT, _Ordered]): + @property + def key_field(self) -> Field[_K]: ... + + @property + def key_type(self) -> _K: ... + + @property + def item_field(self) -> Field[_ValueT]: ... + + @property + def item_type(self) -> _ValueT: ... + + @property + def keys_sorted(self) -> _Ordered: ... + + +_Size = TypeVar("_Size", default=int) + + +class StructType(DataType): + def get_field_index(self, name: str) -> int: ... + + def field(self, i: int | str) -> Field: ... + + def get_all_field_indices(self, name: str) -> list[int]: ... + + def __len__(self) -> int: ... + + def __iter__(self) -> Iterator[Field]: ... + + __getitem__ = field + @property + def names(self) -> list[str]: ... + + @property + def fields(self) -> list[Field]: ... + + +class UnionType(DataType): + @property + def mode(self) -> Literal["sparse", "dense"]: ... + + @property + def type_codes(self) -> list[int]: ... + + def __len__(self) -> int: ... + + def __iter__(self) -> Iterator[Field]: ... + + def field(self, i: int) -> Field: ... + + __getitem__ = field + + +class SparseUnionType(UnionType): + @property + def mode(self) -> Literal["sparse"]: ... + + +class DenseUnionType(UnionType): + @property + def mode(self) -> Literal["dense"]: ... + + +_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type) + + +class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]): + @property + def run_end_type(self) -> _RunEndType: ... + @property + def value_type(self) -> _BasicValueT: ... + + +_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray) + + +class BaseExtensionType(DataType): + def __arrow_ext_class__(self) -> type[ExtensionArray]: ... + + def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ... + + @property + def extension_name(self) -> str: ... + + @property + def storage_type(self) -> DataType: ... + + def wrap_array(self, storage: _StorageT) -> _StorageT: ... + + +class ExtensionType(BaseExtensionType): + def __init__(self, storage_type: DataType, extension_name: str) -> None: ... + + def __arrow_ext_serialize__(self) -> bytes: ... + + @classmethod + def __arrow_ext_deserialize__( + cls, storage_type: DataType, serialized: bytes) -> Self: ... + + +class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]): + @property + def value_type(self) -> _ValueT: ... + + @property + def shape(self) -> list[int]: ... + + @property + def dim_names(self) -> list[str] | None: ... + + @property + def permutation(self) -> list[int] | None: ... + + +class Bool8Type(BaseExtensionType): + ... + + +class UuidType(BaseExtensionType): + ... + + +class JsonType(BaseExtensionType): + ... + + +class OpaqueType(BaseExtensionType): + @property + def type_name(self) -> str: ... + + @property + def vendor_name(self) -> str: ... + + +class UnknownExtensionType(ExtensionType): + def __init__(self, storage_type: DataType, serialized: bytes) -> None: ... + + +def register_extension_type(ext_type: ExtensionType) -> None: ... + + +def unregister_extension_type(type_name: str) -> None: ... + + +class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): + def __init__( + self, __arg0__: Mapping[str | bytes, str | bytes] + | Iterable[tuple[str, str]] + | KeyValueMetadata + | None = None, **kwargs: str + ) -> None: ... + + def equals(self, other: KeyValueMetadata) -> bool: ... + + def __len__(self) -> int: ... + + def __contains__(self, /, __key: object) -> bool: ... # type: ignore[override] + + def __getitem__(self, /, __key: Any) -> Any: ... # type: ignore[override] + + def __iter__(self) -> Iterator[bytes]: ... + + def get_all(self, key: str) -> list[bytes]: ... + + def to_dict(self) -> dict[bytes, bytes]: ... + + +class Field(_Weakrefable, Generic[_DataTypeT]): + def equals(self, other: Field, check_metadata: bool = False) -> bool: ... + + def __hash__(self) -> int: ... + + @property + def nullable(self) -> bool: ... + + @property + def name(self) -> str: ... + + @property + def metadata(self) -> dict[bytes, bytes] | None: ... + + @property + def type(self) -> _DataTypeT: ... + def with_metadata(self, metadata: dict[bytes | str, bytes | str] | + Mapping[bytes | str, bytes | str] | Any) -> Self: ... + + def remove_metadata(self) -> Self: ... + + def with_type(self, new_type: DataType) -> Field: ... + + def with_name(self, name: str) -> Self: ... + + def with_nullable(self, nullable: bool) -> Field[_DataTypeT]: ... + + def flatten(self) -> list[Field]: ... + + def _export_to_c(self, out_ptr: int) -> None: ... + + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + + def __arrow_c_schema__(self) -> Any: ... + + @classmethod + def _import_from_c_capsule(cls, schema) -> Self: ... + + +class Schema(_Weakrefable): + def __len__(self) -> int: ... + + def __getitem__(self, key: str | int) -> Field: ... + + _field = __getitem__ + def __iter__(self) -> Iterator[Field]: ... + + def __hash__(self) -> int: ... + + def __sizeof__(self) -> int: ... + @property + def pandas_metadata(self) -> dict: ... + + @property + def names(self) -> list[str]: ... + + @property + def types(self) -> list[DataType]: ... + + @property + def metadata(self) -> dict[bytes, bytes]: ... + + def empty_table(self) -> Table: ... + + def equals(self, other: Schema, check_metadata: bool = False) -> bool: ... + + @classmethod + def from_pandas(cls, df: pd.DataFrame, preserve_index: bool | + None = None) -> Schema: ... + + def field(self, i: int | str | bytes) -> Field: ... + + @deprecated("Use 'field' instead") + def field_by_name(self, name: str) -> Field: ... + + def get_field_index(self, name: str) -> int: ... + + def get_all_field_indices(self, name: str) -> list[int]: ... + + def append(self, field: Field) -> Schema: ... + + def insert(self, i: int, field: Field) -> Schema: ... + + def remove(self, i: int) -> Schema: ... + + def set(self, i: int, field: Field) -> Schema: ... + + @deprecated("Use 'with_metadata' instead") + def add_metadata(self, metadata: dict) -> Schema: ... + + def with_metadata(self, metadata: dict) -> Schema: ... + + def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ... + + def remove_metadata(self) -> Schema: ... + + def to_string( + self, + truncate_metadata: bool = True, + show_field_metadata: bool = True, + show_schema_metadata: bool = True, + element_size_limit: int | None = None, + ) -> str: ... + + def _export_to_c(self, out_ptr: int) -> None: ... + + @classmethod + def _import_from_c(cls, in_ptr: int) -> Schema: ... + + def __arrow_c_schema__(self) -> Any: ... + + @staticmethod + def _import_from_c_capsule(schema: Any) -> Schema: ... + + +def unify_schemas( + schemas: Sequence[Schema], + *, + promote_options: Literal["default", "permissive"] = "default" +) -> Schema: ... + + +def field( + name: SupportArrowSchema | str | Any, type: _DataTypeT | str | None = None, + nullable: bool = ..., + metadata: dict[Any, Any] | None = None +) -> Field[_DataTypeT] | Field[Any]: ... + + +def null() -> NullType: ... + + +def bool_() -> BoolType: ... + + +def uint8() -> UInt8Type: ... + + +def int8() -> Int8Type: ... + + +def uint16() -> UInt16Type: ... + + +def int16() -> Int16Type: ... + + +def uint32() -> UInt32Type: ... + + +def int32() -> Int32Type: ... + + +def int64() -> Int64Type: ... + + +def uint64() -> UInt64Type: ... + + +def timestamp( + unit: _Unit | str, tz: _Tz | None = None) -> TimestampType[_Unit, _Tz]: ... + + +def time32(unit: _Time32Unit | str) -> Time32Type[_Time32Unit]: ... + + +def time64(unit: _Time64Unit | str) -> Time64Type[_Time64Unit]: ... + + +def duration(unit: _Unit | str) -> DurationType[_Unit]: ... + + +def month_day_nano_interval() -> MonthDayNanoIntervalType: ... + + +def date32() -> Date32Type: ... + + +def date64() -> Date64Type: ... + + +def float16() -> Float16Type: ... + + +def float32() -> Float32Type: ... + + +def float64() -> Float64Type: ... + + +def decimal32(precision: _Precision, scale: _Scale | + None = None) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... + + +def decimal64(precision: _Precision, scale: _Scale | + None = None) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... + + +def decimal128(precision: _Precision, scale: _Scale | + None = None) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... + + +def decimal256(precision: _Precision, scale: _Scale | + None = None) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... + + +def string() -> StringType: ... + + +utf8 = string + + +def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType: ... + + +def large_binary() -> LargeBinaryType: ... + + +def large_string() -> LargeStringType: ... + + +large_utf8 = large_string + + +def binary_view() -> BinaryViewType: ... + + +def string_view() -> StringViewType: ... + + +def list_( + value_type: _DataTypeT | Field[_DataTypeT] | None = None, + list_size: Literal[-1] | _Size | None = None +) -> ListType[_DataTypeT] | FixedSizeListType[_DataTypeT, _Size]: ... + + +def large_list(value_type: _DataTypeT | + Field[_DataTypeT] | None = None) -> LargeListType[_DataTypeT]: ... + + +def list_view(value_type: _DataTypeT | + Field[_DataTypeT] | None = None) -> ListViewType[_DataTypeT]: ... + + +def large_list_view( + value_type: _DataTypeT | Field[_DataTypeT] | None = None +) -> LargeListViewType[_DataTypeT]: ... + + +def map_( + key_type: _K | Field | str | None = None, + item_type: _ValueT | Field | str | None = None, + keys_sorted: bool | None = None +) -> MapType[_K, _ValueT, Literal[False]]: ... + + +def dictionary( + index_type: _IndexT | str, + value_type: _BasicValueT | str, + ordered: _Ordered | None = None +) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ... + + +def struct( + fields: Iterable[ + Field[Any] + | tuple[str, Field[Any] | None] + | tuple[str, DataType | None] + ] | Mapping[str, Field[Any] | DataType | None], +) -> StructType: ... + + +def sparse_union( + child_fields: list[Field[Any]], type_codes: list[int] | None = None +) -> SparseUnionType: ... + + +def dense_union( + child_fields: list[Field[Any]], type_codes: list[int] | None = None +) -> DenseUnionType: ... + + +def union( + child_fields: list[Field[Any]], mode: Literal["sparse", "dense"] | int | str, + type_codes: list[int] | None = None) -> SparseUnionType | DenseUnionType: ... + + +def run_end_encoded( + run_end_type: _RunEndType | str | None, value_type: _BasicValueT | str | None +) -> RunEndEncodedType[_RunEndType, _BasicValueT]: ... + + +def json_(storage_type: DataType = ...) -> JsonType: ... + + +def uuid() -> UuidType: ... + + +def fixed_shape_tensor( + value_type: _ValueT, + shape: Sequence[int], + dim_names: Sequence[str] | None = None, + permutation: Sequence[int] | None = None, +) -> FixedShapeTensorType[_ValueT]: ... + + +def bool8() -> Bool8Type: ... + + +def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType: ... + + +def type_for_alias(name: Any) -> DataType: ... + + +def schema( + fields: ( + Iterable[Field[Any]] + | Iterable[tuple[str, DataType | str | None]] + | Mapping[Any, DataType | str | None] + ), + metadata: Mapping[bytes, bytes] + | Mapping[str, str] + | Mapping[bytes, str] + | Mapping[str, bytes] | None = None, +) -> Schema: ... + + +def from_numpy_dtype(dtype: np.dtype[Any] | type | str) -> DataType: ... + + +__all__ = [ + "_Weakrefable", + "_Metadata", + "DataType", + "_BasicDataType", + "NullType", + "BoolType", + "UInt8Type", + "Int8Type", + "UInt16Type", + "Int16Type", + "UInt32Type", + "Int32Type", + "UInt64Type", + "Int64Type", + "Float16Type", + "Float32Type", + "Float64Type", + "Date32Type", + "Date64Type", + "MonthDayNanoIntervalType", + "StringType", + "LargeStringType", + "StringViewType", + "BinaryType", + "LargeBinaryType", + "BinaryViewType", + "TimestampType", + "Time32Type", + "Time64Type", + "DurationType", + "FixedSizeBinaryType", + "Decimal32Type", + "Decimal64Type", + "Decimal128Type", + "Decimal256Type", + "ListType", + "LargeListType", + "ListViewType", + "LargeListViewType", + "FixedSizeListType", + "DictionaryMemo", + "DictionaryType", + "MapType", + "StructType", + "UnionType", + "SparseUnionType", + "DenseUnionType", + "RunEndEncodedType", + "BaseExtensionType", + "ExtensionType", + "FixedShapeTensorType", + "Bool8Type", + "UuidType", + "JsonType", + "OpaqueType", + "UnknownExtensionType", + "register_extension_type", + "unregister_extension_type", + "KeyValueMetadata", + "Field", + "Schema", + "unify_schemas", + "field", + "null", + "bool_", + "uint8", + "int8", + "uint16", + "int16", + "uint32", + "int32", + "int64", + "uint64", + "timestamp", + "time32", + "time64", + "duration", + "month_day_nano_interval", + "date32", + "date64", + "float16", + "float32", + "float64", + "decimal32", + "decimal64", + "decimal128", + "decimal256", + "string", + "utf8", + "binary", + "large_binary", + "large_string", + "large_utf8", + "binary_view", + "string_view", + "list_", + "large_list", + "list_view", + "large_list_view", + "map_", + "dictionary", + "struct", + "sparse_union", + "dense_union", + "union", + "run_end_encoded", + "json_", + "uuid", + "fixed_shape_tensor", + "bool8", + "opaque", + "type_for_alias", + "schema", + "from_numpy_dtype", + "_Unit", + "_Tz", + "_Time32Unit", + "_Time64Unit", + "_DataTypeT", +] diff --git a/python/pyarrow-stubs/pyarrow/error.pyi b/python/pyarrow-stubs/pyarrow/error.pyi new file mode 100644 index 00000000000..eac936afcb5 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/error.pyi @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import sys + +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + + +class ArrowException(Exception): + ... + + +class ArrowInvalid(ValueError, ArrowException): + ... + + +class ArrowMemoryError(MemoryError, ArrowException): + ... + + +class ArrowKeyError(KeyError, ArrowException): + ... + + +class ArrowTypeError(TypeError, ArrowException): + ... + + +class ArrowNotImplementedError(NotImplementedError, ArrowException): + ... + + +class ArrowCapacityError(ArrowException): + ... + + +class ArrowIndexError(IndexError, ArrowException): + ... + + +class ArrowSerializationError(ArrowException): + ... + + +class ArrowCancelled(ArrowException): + signum: int | None + def __init__(self, message: str, signum: int | None = None) -> None: ... + + +ArrowIOError = IOError + + +class StopToken: + ... + + +def enable_signal_handlers(enable: bool) -> None: ... + + +have_signal_refcycle: bool + + +class SignalStopHandler: + def __enter__(self) -> Self: ... + def __exit__(self, exc_type, exc_value, exc_tb) -> None: ... + def __dealloc__(self) -> None: ... + @property + def stop_token(self) -> StopToken: ... + + +__all__ = [ + "ArrowException", + "ArrowInvalid", + "ArrowMemoryError", + "ArrowKeyError", + "ArrowTypeError", + "ArrowNotImplementedError", + "ArrowCapacityError", + "ArrowIndexError", + "ArrowSerializationError", + "ArrowCancelled", + "ArrowIOError", + "StopToken", + "enable_signal_handlers", + "have_signal_refcycle", + "SignalStopHandler", +] diff --git a/python/pyarrow-stubs/pyarrow/io.pyi b/python/pyarrow-stubs/pyarrow/io.pyi new file mode 100644 index 00000000000..467ec48cc76 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/io.pyi @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Placeholder stub - complete annotations in future PR.""" + +from typing import Any + +def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow-stubs/pyarrow/lib.pyi b/python/pyarrow-stubs/pyarrow/lib.pyi new file mode 100644 index 00000000000..775434be2ea --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/lib.pyi @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Placeholder stub for pyarrow.lib C extension module. + +Complete type annotations will be added in subsequent PRs. +""" + +from typing import Any + +def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow-stubs/pyarrow/scalar.pyi b/python/pyarrow-stubs/pyarrow/scalar.pyi new file mode 100644 index 00000000000..467ec48cc76 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/scalar.pyi @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Placeholder stub - complete annotations in future PR.""" + +from typing import Any + +def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py index 670ccaaf245..f055c508182 100644 --- a/python/pyarrow/fs.py +++ b/python/pyarrow/fs.py @@ -111,7 +111,7 @@ def _ensure_filesystem(filesystem, *, use_mmap=False): else: # handle fsspec-compatible filesystems try: - import fsspec + import fsspec # type: ignore[import-untyped] except ImportError: pass else: From 4237b9748de41ca3fe21bfe5ac1e6d71708cb08b Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 9 Feb 2026 21:08:58 +0100 Subject: [PATCH 3/7] Update python/pyarrow-stubs/pyarrow/_stubs_typing.pyi Co-authored-by: Dan Redding <125183946+dangotbanned@users.noreply.github.com> --- python/pyarrow-stubs/pyarrow/_stubs_typing.pyi | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi index 0715012fddc..0e4013c2b9e 100644 --- a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -46,16 +46,29 @@ Compression: TypeAlias = Literal[ NullEncoding: TypeAlias = Literal["mask", "encode"] NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"] TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"] +from pyarrow import lib + +IntegerType: TypeAlias = ( + lib.Int8Type + | lib.Int16Type + | lib.Int32Type + | lib.Int64Type + | lib.UInt8Type + | lib.UInt16Type + | lib.UInt32Type + | lib.UInt64Type +) + Mask: TypeAlias = ( Sequence[bool | None] | NDArray[np.bool_] - | BooleanArray + | lib.Array[lib.Scalar[lib.BoolType]] | ChunkedArray[Any] ) Indices: TypeAlias = ( Sequence[int | None] | NDArray[np.integer[Any]] - | IntegerArray + | lib.Array[lib.Scalar[IntegerType]] | ChunkedArray[Any] ) From abf96ae1c2ef2512554d1d4ba0efd676cc30a4c1 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 9 Mar 2026 22:16:26 +0100 Subject: [PATCH 4/7] post rebase changes --- python/CMakeLists.txt | 4 +- .../pyarrow-stubs/pyarrow/_stubs_typing.pyi | 14 ++--- python/pyarrow-stubs/pyarrow/_types.pyi | 63 +++++++++---------- python/pyarrow-stubs/pyarrow/error.pyi | 8 +-- python/pyarrow-stubs/pyarrow/io.pyi | 10 ++- python/pyarrow-stubs/pyarrow/scalar.pyi | 10 ++- 6 files changed, 58 insertions(+), 51 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index f71a495e224..6395b3e1e7a 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1042,9 +1042,9 @@ if(EXISTS "${PYARROW_STUBS_SOURCE_DIR}") install(CODE " execute_process( COMMAND \"${Python3_EXECUTABLE}\" - \"${CMAKE_SOURCE_DIR}/scripts/update_stub_docstrings.py\" + \"${CMAKE_CURRENT_SOURCE_DIR}/scripts/update_stub_docstrings.py\" \"${CMAKE_INSTALL_PREFIX}\" - \"${CMAKE_SOURCE_DIR}\" + \"${CMAKE_CURRENT_SOURCE_DIR}\" RESULT_VARIABLE _pyarrow_stub_docstrings_result ) if(NOT _pyarrow_stub_docstrings_result EQUAL 0) diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi index 0e4013c2b9e..2b823075895 100644 --- a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -25,6 +25,7 @@ import numpy as np from numpy.typing import NDArray +from pyarrow import lib from pyarrow.lib import BooleanArray, IntegerArray, ChunkedArray ArrayLike: TypeAlias = Any @@ -46,7 +47,6 @@ Compression: TypeAlias = Literal[ NullEncoding: TypeAlias = Literal["mask", "encode"] NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"] TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"] -from pyarrow import lib IntegerType: TypeAlias = ( lib.Int8Type @@ -82,23 +82,23 @@ SingleOrList: TypeAlias = list[_T] | _T class SupportEq(Protocol): - def __eq__(self, other) -> bool: ... + def __eq__(self, other: object) -> bool: ... class SupportLt(Protocol): - def __lt__(self, other) -> bool: ... + def __lt__(self, other: object) -> bool: ... class SupportGt(Protocol): - def __gt__(self, other) -> bool: ... + def __gt__(self, other: object) -> bool: ... class SupportLe(Protocol): - def __le__(self, other) -> bool: ... + def __le__(self, other: object) -> bool: ... class SupportGe(Protocol): - def __ge__(self, other) -> bool: ... + def __ge__(self, other: object) -> bool: ... FilterTuple: TypeAlias = ( @@ -140,7 +140,7 @@ class SupportArrowSchema(Protocol): def __arrow_c_schema__(self) -> Any: ... -class NullableCollection(Protocol[_V]): # type: ignore[reportInvalidTypeVarUse] +class NullableCollection(Protocol[_V]): def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ... def __len__(self) -> int: ... def __contains__(self, item: Any, /) -> bool: ... diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index 3d802382ba1..d646551eecb 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -16,22 +16,18 @@ # under the License. import datetime as dt # noqa: F401 -import sys from collections.abc import Mapping, Sequence, Iterable, Iterator from decimal import Decimal # noqa: F401 - -if sys.version_info >= (3, 11): - from typing import Self -else: - from typing_extensions import Self - from typing import Any, Generic, Literal import numpy as np import pandas as pd -from pyarrow._stubs_typing import SupportArrowSchema +from typing_extensions import Self, TypeVar, deprecated + +from pyarrow._stubs_typing import SupportArrowSchema, TimeUnit +from pyarrow.io import Buffer from pyarrow.lib import ( # noqa: F401 Array, ChunkedArray, @@ -40,11 +36,7 @@ from pyarrow.lib import ( # noqa: F401 MonthDayNano, Table, ) -from typing_extensions import TypeVar, deprecated - -from .io import Buffer -from .scalar import ExtensionScalar -from ._stubs_typing import TimeUnit +from pyarrow.scalar import ExtensionScalar class _Weakrefable: ... @@ -226,7 +218,12 @@ class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): def unit(self) -> _Unit: ... -class FixedSizeBinaryType(_BasicDataType[Decimal]): +_FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) + + +class FixedSizeBinaryType( + _BasicDataType[_FixedSizeBinaryAsPyType], Generic[_FixedSizeBinaryAsPyType] +): ... @@ -234,7 +231,7 @@ _Precision = TypeVar("_Precision", default=Any) _Scale = TypeVar("_Scale", default=Any) -class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): +class Decimal32Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... @@ -242,7 +239,7 @@ class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): def scale(self) -> _Scale: ... -class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): +class Decimal64Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... @@ -250,7 +247,7 @@ class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): def scale(self) -> _Scale: ... -class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): +class Decimal128Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... @@ -258,7 +255,7 @@ class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): def scale(self) -> _Scale: ... -class Decimal256Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): +class Decimal256Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... @@ -491,7 +488,7 @@ def unregister_extension_type(type_name: str) -> None: ... class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): def __init__( self, __arg0__: Mapping[str | bytes, str | bytes] - | Iterable[tuple[str, str]] + | Iterable[tuple[str | bytes, str | bytes]] | KeyValueMetadata | None = None, **kwargs: str ) -> None: ... @@ -500,9 +497,9 @@ class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): def __len__(self) -> int: ... - def __contains__(self, /, __key: object) -> bool: ... # type: ignore[override] + def __contains__(self, /, __key: object) -> bool: ... - def __getitem__(self, /, __key: Any) -> Any: ... # type: ignore[override] + def __getitem__(self, /, __key: Any) -> Any: ... def __iter__(self) -> Iterator[bytes]: ... @@ -636,7 +633,7 @@ def unify_schemas( def field( name: SupportArrowSchema | str | Any, type: _DataTypeT | str | None = None, - nullable: bool = ..., + nullable: bool = True, metadata: dict[Any, Any] | None = None ) -> Field[_DataTypeT] | Field[Any]: ... @@ -702,20 +699,20 @@ def float32() -> Float32Type: ... def float64() -> Float64Type: ... -def decimal32(precision: _Precision, scale: _Scale | - None = None) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... +def decimal32(precision: _Precision, + scale: _Scale | Literal[0] = 0) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... -def decimal64(precision: _Precision, scale: _Scale | - None = None) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... +def decimal64(precision: _Precision, + scale: _Scale | Literal[0] = 0) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... -def decimal128(precision: _Precision, scale: _Scale | - None = None) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... +def decimal128(precision: _Precision, + scale: _Scale | Literal[0] = 0) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... -def decimal256(precision: _Precision, scale: _Scale | - None = None) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... +def decimal256(precision: _Precision, + scale: _Scale | Literal[0] = 0) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... def string() -> StringType: ... @@ -724,7 +721,7 @@ def string() -> StringType: ... utf8 = string -def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType: ... +def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType[bytes]: ... def large_binary() -> LargeBinaryType: ... @@ -764,8 +761,8 @@ def large_list_view( def map_( key_type: _K | Field | str | None = None, item_type: _ValueT | Field | str | None = None, - keys_sorted: bool | None = None -) -> MapType[_K, _ValueT, Literal[False]]: ... + keys_sorted: _Ordered | None = None +) -> MapType[_K, _ValueT, _Ordered]: ... def dictionary( diff --git a/python/pyarrow-stubs/pyarrow/error.pyi b/python/pyarrow-stubs/pyarrow/error.pyi index eac936afcb5..6a010071de0 100644 --- a/python/pyarrow-stubs/pyarrow/error.pyi +++ b/python/pyarrow-stubs/pyarrow/error.pyi @@ -15,12 +15,7 @@ # specific language governing permissions and limitations # under the License. -import sys - -if sys.version_info >= (3, 11): - from typing import Self -else: - from typing_extensions import Self +from typing_extensions import Self class ArrowException(Exception): @@ -80,7 +75,6 @@ have_signal_refcycle: bool class SignalStopHandler: def __enter__(self) -> Self: ... def __exit__(self, exc_type, exc_value, exc_tb) -> None: ... - def __dealloc__(self) -> None: ... @property def stop_token(self) -> StopToken: ... diff --git a/python/pyarrow-stubs/pyarrow/io.pyi b/python/pyarrow-stubs/pyarrow/io.pyi index 467ec48cc76..856fb093a6c 100644 --- a/python/pyarrow-stubs/pyarrow/io.pyi +++ b/python/pyarrow-stubs/pyarrow/io.pyi @@ -15,8 +15,16 @@ # specific language governing permissions and limitations # under the License. -"""Placeholder stub - complete annotations in future PR.""" +"""Placeholder stub - complete annotations in future PR. + +At runtime, these symbols are provided by the pyarrow.lib C extension. +""" from typing import Any + +class Buffer: + ... + + def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow-stubs/pyarrow/scalar.pyi b/python/pyarrow-stubs/pyarrow/scalar.pyi index 467ec48cc76..384507d2a2b 100644 --- a/python/pyarrow-stubs/pyarrow/scalar.pyi +++ b/python/pyarrow-stubs/pyarrow/scalar.pyi @@ -15,8 +15,16 @@ # specific language governing permissions and limitations # under the License. -"""Placeholder stub - complete annotations in future PR.""" +"""Placeholder stub - complete annotations in future PR. + +At runtime, these symbols are provided by the pyarrow.lib C extension. +""" from typing import Any + +class ExtensionScalar: + ... + + def __getattr__(name: str) -> Any: ... From b9d850079d3cf7242d2e13b637ca83a0678e4c95 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 13 Mar 2026 16:27:18 +0100 Subject: [PATCH 5/7] linting and formatting --- python/pyarrow-stubs/pyarrow/__init__.pyi | 8 +- .../pyarrow-stubs/pyarrow/_stubs_typing.pyi | 55 +- python/pyarrow-stubs/pyarrow/_types.pyi | 481 ++++-------------- python/pyarrow-stubs/pyarrow/error.pyi | 54 +- python/pyarrow-stubs/pyarrow/io.pyi | 11 +- python/pyarrow-stubs/pyarrow/lib.pyi | 6 +- python/pyarrow-stubs/pyarrow/scalar.pyi | 11 +- 7 files changed, 134 insertions(+), 492 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/__init__.pyi b/python/pyarrow-stubs/pyarrow/__init__.pyi index ccec8d5abc0..9c7120c7f80 100644 --- a/python/pyarrow-stubs/pyarrow/__init__.pyi +++ b/python/pyarrow-stubs/pyarrow/__init__.pyi @@ -15,11 +15,9 @@ # specific language governing permissions and limitations # under the License. -"""Type stubs for PyArrow. - -This is a placeholder stub file. -Complete type annotations will be added in subsequent PRs. -""" +# Type stubs for PyArrow. +# This is a placeholder stub file. +# Complete type annotations will be added in subsequent PRs. from typing import Any diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi index 2b823075895..1d5f7600c4b 100644 --- a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -26,7 +26,7 @@ import numpy as np from numpy.typing import NDArray from pyarrow import lib -from pyarrow.lib import BooleanArray, IntegerArray, ChunkedArray +from pyarrow.lib import ChunkedArray ArrayLike: TypeAlias = Any ScalarLike: TypeAlias = Any @@ -72,34 +72,38 @@ Indices: TypeAlias = ( | ChunkedArray[Any] ) -PyScalar: TypeAlias = (bool | int | float | Decimal | str | bytes | - dt.date | dt.datetime | dt.time | dt.timedelta) +PyScalar: TypeAlias = ( + bool + | int + | float + | Decimal + | str + | bytes + | dt.date + | dt.datetime + | dt.time + | dt.timedelta +) _T = TypeVar("_T") _V = TypeVar("_V", covariant=True) SingleOrList: TypeAlias = list[_T] | _T - class SupportEq(Protocol): - def __eq__(self, other: object) -> bool: ... - + def __eq__(self, other: object, /) -> bool: ... class SupportLt(Protocol): - def __lt__(self, other: object) -> bool: ... - + def __lt__(self, other: object, /) -> bool: ... class SupportGt(Protocol): - def __gt__(self, other: object) -> bool: ... - + def __gt__(self, other: object, /) -> bool: ... class SupportLe(Protocol): - def __le__(self, other: object) -> bool: ... - + def __le__(self, other: object, /) -> bool: ... class SupportGe(Protocol): - def __ge__(self, other: object) -> bool: ... - + def __ge__(self, other: object, /) -> bool: ... FilterTuple: TypeAlias = ( tuple[str, Literal["=", "==", "!="], SupportEq] @@ -111,35 +115,24 @@ FilterTuple: TypeAlias = ( | tuple[str, str, Any] # Allow general str for operator to avoid type errors ) - -class Buffer(Protocol): - ... - - -class SupportPyBuffer(Protocol): - ... - +class Buffer(Protocol): ... +class SupportPyBuffer(Protocol): ... class SupportArrowStream(Protocol): - def __arrow_c_stream__(self, requested_schema=None) -> Any: ... - + def __arrow_c_stream__(self, requested_schema=None, /) -> Any: ... class SupportPyArrowArray(Protocol): - def __arrow_array__(self, type=None) -> Any: ... - + def __arrow_array__(self, type=None, /) -> Any: ... class SupportArrowArray(Protocol): - def __arrow_c_array__(self, requested_schema=None) -> Any: ... - + def __arrow_c_array__(self, requested_schema=None, /) -> Any: ... class SupportArrowDeviceArray(Protocol): - def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ... - + def __arrow_c_device_array__(self, requested_schema=None, /, **kwargs) -> Any: ... class SupportArrowSchema(Protocol): def __arrow_c_schema__(self) -> Any: ... - class NullableCollection(Protocol[_V]): def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ... def __len__(self) -> int: ... diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index d646551eecb..1fd5c5002d1 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -38,276 +38,157 @@ from pyarrow.lib import ( # noqa: F401 ) from pyarrow.scalar import ExtensionScalar -class _Weakrefable: - ... - - -class _Metadata(_Weakrefable): - ... - +class _Weakrefable: ... +class _Metadata(_Weakrefable): ... class DataType(_Weakrefable): def field(self, i: int) -> Field: ... - @property def id(self) -> int: ... @property def bit_width(self) -> int: ... - @property def byte_width(self) -> int: ... - @property def num_fields(self) -> int: ... - @property def num_buffers(self) -> int: ... - @property def has_variadic_buffers(self) -> bool: ... # Properties that exist on specific subtypes but accessed generically @property def list_size(self) -> int: ... - def __hash__(self) -> int: ... - - def equals(self, other: DataType | str, *, - check_metadata: bool = False) -> bool: ... - + def equals( + self, other: DataType | str, *, check_metadata: bool = False + ) -> bool: ... def to_pandas_dtype(self) -> np.generic: ... - def _export_to_c(self, out_ptr: int) -> None: ... - @classmethod def _import_from_c(cls, in_ptr: int) -> Self: ... - def __arrow_c_schema__(self) -> Any: ... - @classmethod def _import_from_c_capsule(cls, schema) -> Self: ... - _AsPyType = TypeVar("_AsPyType") _DataTypeT = TypeVar("_DataTypeT", bound=DataType) - -class _BasicDataType(DataType, Generic[_AsPyType]): - ... - - -class NullType(_BasicDataType[None]): - ... - - -class BoolType(_BasicDataType[bool]): - ... - - -class UInt8Type(_BasicDataType[int]): - ... - - -class Int8Type(_BasicDataType[int]): - ... - - -class UInt16Type(_BasicDataType[int]): - ... - - -class Int16Type(_BasicDataType[int]): - ... - - -class UInt32Type(_BasicDataType[int]): - ... - - -class Int32Type(_BasicDataType[int]): - ... - - -class UInt64Type(_BasicDataType[int]): - ... - - -class Int64Type(_BasicDataType[int]): - ... - - -class Float16Type(_BasicDataType[float]): - ... - - -class Float32Type(_BasicDataType[float]): - ... - - -class Float64Type(_BasicDataType[float]): - ... - - -class Date32Type(_BasicDataType[dt.date]): - ... - - -class Date64Type(_BasicDataType[dt.date]): - ... - - -class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): - ... - - -class StringType(_BasicDataType[str]): - ... - - -class LargeStringType(_BasicDataType[str]): - ... - - -class StringViewType(_BasicDataType[str]): - ... - - -class BinaryType(_BasicDataType[bytes]): - ... - - -class LargeBinaryType(_BasicDataType[bytes]): - ... - - -class BinaryViewType(_BasicDataType[bytes]): - ... - +class _BasicDataType(DataType, Generic[_AsPyType]): ... +class NullType(_BasicDataType[None]): ... +class BoolType(_BasicDataType[bool]): ... +class UInt8Type(_BasicDataType[int]): ... +class Int8Type(_BasicDataType[int]): ... +class UInt16Type(_BasicDataType[int]): ... +class Int16Type(_BasicDataType[int]): ... +class UInt32Type(_BasicDataType[int]): ... +class Int32Type(_BasicDataType[int]): ... +class UInt64Type(_BasicDataType[int]): ... +class Int64Type(_BasicDataType[int]): ... +class Float16Type(_BasicDataType[float]): ... +class Float32Type(_BasicDataType[float]): ... +class Float64Type(_BasicDataType[float]): ... +class Date32Type(_BasicDataType[dt.date]): ... +class Date64Type(_BasicDataType[dt.date]): ... +class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ... +class StringType(_BasicDataType[str]): ... +class LargeStringType(_BasicDataType[str]): ... +class StringViewType(_BasicDataType[str]): ... +class BinaryType(_BasicDataType[bytes]): ... +class LargeBinaryType(_BasicDataType[bytes]): ... +class BinaryViewType(_BasicDataType[bytes]): ... _Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) _Tz = TypeVar("_Tz", str, None, default=None) - class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]): - @property def unit(self) -> _Unit: ... - @property def tz(self) -> _Tz: ... - _Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) - class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): @property def unit(self) -> _Time32Unit: ... - _Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) - class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): @property def unit(self) -> _Time64Unit: ... - class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): @property def unit(self) -> _Unit: ... - _FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) - class FixedSizeBinaryType( - _BasicDataType[_FixedSizeBinaryAsPyType], Generic[_FixedSizeBinaryAsPyType] -): - ... - + _BasicDataType[_FixedSizeBinaryAsPyType] +): ... _Precision = TypeVar("_Precision", default=Any) _Scale = TypeVar("_Scale", default=Any) - class Decimal32Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... - @property def scale(self) -> _Scale: ... - class Decimal64Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... - @property def scale(self) -> _Scale: ... - class Decimal128Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... - @property def scale(self) -> _Scale: ... - class Decimal256Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... - @property def scale(self) -> _Scale: ... - class ListType(DataType, Generic[_DataTypeT]): @property def value_field(self) -> Field[_DataTypeT]: ... - @property def value_type(self) -> _DataTypeT: ... - class LargeListType(DataType, Generic[_DataTypeT]): @property def value_field(self) -> Field[_DataTypeT]: ... @property def value_type(self) -> _DataTypeT: ... - class ListViewType(DataType, Generic[_DataTypeT]): @property def value_field(self) -> Field[_DataTypeT]: ... - @property def value_type(self) -> _DataTypeT: ... - class LargeListViewType(DataType, Generic[_DataTypeT]): @property def value_field(self) -> Field[_DataTypeT]: ... - @property def value_type(self) -> _DataTypeT: ... - class FixedSizeListType(DataType, Generic[_DataTypeT, _Size]): @property def value_field(self) -> Field[_DataTypeT]: ... - @property def value_type(self) -> _DataTypeT: ... - @property def list_size(self) -> int: ... - -class DictionaryMemo(_Weakrefable): - ... - +class DictionaryMemo(_Weakrefable): ... _IndexT = TypeVar( "_IndexT", @@ -324,287 +205,197 @@ _BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType) _ValueT = TypeVar("_ValueT", bound=DataType) _Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) - class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): @property def ordered(self) -> _Ordered: ... - @property def index_type(self) -> _IndexT: ... - @property def value_type(self) -> _BasicValueT: ... - _K = TypeVar("_K", bound=DataType) - class MapType(DataType, Generic[_K, _ValueT, _Ordered]): @property def key_field(self) -> Field[_K]: ... - @property def key_type(self) -> _K: ... - @property def item_field(self) -> Field[_ValueT]: ... - @property def item_type(self) -> _ValueT: ... - @property def keys_sorted(self) -> _Ordered: ... - _Size = TypeVar("_Size", default=int) - class StructType(DataType): def get_field_index(self, name: str) -> int: ... - def field(self, i: int | str) -> Field: ... - def get_all_field_indices(self, name: str) -> list[int]: ... - def __len__(self) -> int: ... - def __iter__(self) -> Iterator[Field]: ... __getitem__ = field @property def names(self) -> list[str]: ... - @property def fields(self) -> list[Field]: ... - class UnionType(DataType): @property def mode(self) -> Literal["sparse", "dense"]: ... - @property def type_codes(self) -> list[int]: ... - def __len__(self) -> int: ... - def __iter__(self) -> Iterator[Field]: ... - def field(self, i: int) -> Field: ... __getitem__ = field - class SparseUnionType(UnionType): @property def mode(self) -> Literal["sparse"]: ... - class DenseUnionType(UnionType): @property def mode(self) -> Literal["dense"]: ... - _RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type) - class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]): @property def run_end_type(self) -> _RunEndType: ... @property def value_type(self) -> _BasicValueT: ... - _StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray) - class BaseExtensionType(DataType): def __arrow_ext_class__(self) -> type[ExtensionArray]: ... - def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ... - @property def extension_name(self) -> str: ... - @property def storage_type(self) -> DataType: ... - def wrap_array(self, storage: _StorageT) -> _StorageT: ... - class ExtensionType(BaseExtensionType): def __init__(self, storage_type: DataType, extension_name: str) -> None: ... - def __arrow_ext_serialize__(self) -> bytes: ... - @classmethod def __arrow_ext_deserialize__( - cls, storage_type: DataType, serialized: bytes) -> Self: ... - + cls, storage_type: DataType, serialized: bytes + ) -> Self: ... class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]): @property def value_type(self) -> _ValueT: ... - @property def shape(self) -> list[int]: ... - @property def dim_names(self) -> list[str] | None: ... - @property def permutation(self) -> list[int] | None: ... - -class Bool8Type(BaseExtensionType): - ... - - -class UuidType(BaseExtensionType): - ... - - -class JsonType(BaseExtensionType): - ... - +class Bool8Type(BaseExtensionType): ... +class UuidType(BaseExtensionType): ... +class JsonType(BaseExtensionType): ... class OpaqueType(BaseExtensionType): @property def type_name(self) -> str: ... - @property def vendor_name(self) -> str: ... - class UnknownExtensionType(ExtensionType): def __init__(self, storage_type: DataType, serialized: bytes) -> None: ... - def register_extension_type(ext_type: ExtensionType) -> None: ... - - def unregister_extension_type(type_name: str) -> None: ... - class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): def __init__( - self, __arg0__: Mapping[str | bytes, str | bytes] + self, + __arg0__: Mapping[str | bytes, str | bytes] | Iterable[tuple[str | bytes, str | bytes]] | KeyValueMetadata - | None = None, **kwargs: str + | None = None, + **kwargs: str, ) -> None: ... - def equals(self, other: KeyValueMetadata) -> bool: ... - def __len__(self) -> int: ... - def __contains__(self, /, __key: object) -> bool: ... - def __getitem__(self, /, __key: Any) -> Any: ... - def __iter__(self) -> Iterator[bytes]: ... - def get_all(self, key: str) -> list[bytes]: ... - def to_dict(self) -> dict[bytes, bytes]: ... - class Field(_Weakrefable, Generic[_DataTypeT]): def equals(self, other: Field, check_metadata: bool = False) -> bool: ... - def __hash__(self) -> int: ... - @property def nullable(self) -> bool: ... - @property def name(self) -> str: ... - @property def metadata(self) -> dict[bytes, bytes] | None: ... - @property def type(self) -> _DataTypeT: ... - def with_metadata(self, metadata: dict[bytes | str, bytes | str] | - Mapping[bytes | str, bytes | str] | Any) -> Self: ... - + def with_metadata( + self, + metadata: dict[bytes | str, bytes | str] + | Mapping[bytes | str, bytes | str] + | Any, + ) -> Self: ... def remove_metadata(self) -> Self: ... - def with_type(self, new_type: DataType) -> Field: ... - def with_name(self, name: str) -> Self: ... - def with_nullable(self, nullable: bool) -> Field[_DataTypeT]: ... - def flatten(self) -> list[Field]: ... - def _export_to_c(self, out_ptr: int) -> None: ... - @classmethod def _import_from_c(cls, in_ptr: int) -> Self: ... - def __arrow_c_schema__(self) -> Any: ... - @classmethod def _import_from_c_capsule(cls, schema) -> Self: ... - class Schema(_Weakrefable): def __len__(self) -> int: ... - def __getitem__(self, key: str | int) -> Field: ... _field = __getitem__ def __iter__(self) -> Iterator[Field]: ... - def __hash__(self) -> int: ... - def __sizeof__(self) -> int: ... @property def pandas_metadata(self) -> dict: ... - @property def names(self) -> list[str]: ... - @property def types(self) -> list[DataType]: ... - @property def metadata(self) -> dict[bytes, bytes]: ... - def empty_table(self) -> Table: ... - def equals(self, other: Schema, check_metadata: bool = False) -> bool: ... - @classmethod - def from_pandas(cls, df: pd.DataFrame, preserve_index: bool | - None = None) -> Schema: ... - + def from_pandas( + cls, df: pd.DataFrame, preserve_index: bool | None = None + ) -> Schema: ... def field(self, i: int | str | bytes) -> Field: ... - @deprecated("Use 'field' instead") def field_by_name(self, name: str) -> Field: ... - def get_field_index(self, name: str) -> int: ... - def get_all_field_indices(self, name: str) -> list[int]: ... - def append(self, field: Field) -> Schema: ... - def insert(self, i: int, field: Field) -> Schema: ... - def remove(self, i: int) -> Schema: ... - def set(self, i: int, field: Field) -> Schema: ... - @deprecated("Use 'with_metadata' instead") def add_metadata(self, metadata: dict) -> Schema: ... - def with_metadata(self, metadata: dict) -> Schema: ... - def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ... - def remove_metadata(self) -> Schema: ... - def to_string( self, truncate_metadata: bool = True, @@ -612,218 +403,126 @@ class Schema(_Weakrefable): show_schema_metadata: bool = True, element_size_limit: int | None = None, ) -> str: ... - def _export_to_c(self, out_ptr: int) -> None: ... - @classmethod def _import_from_c(cls, in_ptr: int) -> Schema: ... - def __arrow_c_schema__(self) -> Any: ... - @staticmethod def _import_from_c_capsule(schema: Any) -> Schema: ... - def unify_schemas( schemas: Sequence[Schema], *, - promote_options: Literal["default", "permissive"] = "default" + promote_options: Literal["default", "permissive"] = "default", ) -> Schema: ... - - def field( - name: SupportArrowSchema | str | Any, type: _DataTypeT | str | None = None, + name: SupportArrowSchema | str | Any, + type: _DataTypeT | str | None = None, nullable: bool = True, - metadata: dict[Any, Any] | None = None + metadata: dict[Any, Any] | None = None, ) -> Field[_DataTypeT] | Field[Any]: ... - - def null() -> NullType: ... - - def bool_() -> BoolType: ... - - def uint8() -> UInt8Type: ... - - def int8() -> Int8Type: ... - - def uint16() -> UInt16Type: ... - - def int16() -> Int16Type: ... - - def uint32() -> UInt32Type: ... - - def int32() -> Int32Type: ... - - def int64() -> Int64Type: ... - - def uint64() -> UInt64Type: ... - - def timestamp( - unit: _Unit | str, tz: _Tz | None = None) -> TimestampType[_Unit, _Tz]: ... - - + unit: _Unit | str, tz: _Tz | None = None +) -> TimestampType[_Unit, _Tz]: ... def time32(unit: _Time32Unit | str) -> Time32Type[_Time32Unit]: ... - - def time64(unit: _Time64Unit | str) -> Time64Type[_Time64Unit]: ... - - def duration(unit: _Unit | str) -> DurationType[_Unit]: ... - - def month_day_nano_interval() -> MonthDayNanoIntervalType: ... - - def date32() -> Date32Type: ... - - def date64() -> Date64Type: ... - - def float16() -> Float16Type: ... - - def float32() -> Float32Type: ... - - def float64() -> Float64Type: ... - - -def decimal32(precision: _Precision, - scale: _Scale | Literal[0] = 0) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... - - -def decimal64(precision: _Precision, - scale: _Scale | Literal[0] = 0) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... - - -def decimal128(precision: _Precision, - scale: _Scale | Literal[0] = 0) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... - - -def decimal256(precision: _Precision, - scale: _Scale | Literal[0] = 0) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... - - +def decimal32( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... +def decimal64( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... +def decimal128( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... +def decimal256( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... def string() -> StringType: ... - utf8 = string - -def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType[bytes]: ... - - +def binary( + length: Literal[-1] | int = ..., +) -> BinaryType | FixedSizeBinaryType[bytes]: ... def large_binary() -> LargeBinaryType: ... - - def large_string() -> LargeStringType: ... - large_utf8 = large_string - def binary_view() -> BinaryViewType: ... - - def string_view() -> StringViewType: ... - - def list_( value_type: _DataTypeT | Field[_DataTypeT] | None = None, - list_size: Literal[-1] | _Size | None = None + list_size: Literal[-1] | _Size | None = None, ) -> ListType[_DataTypeT] | FixedSizeListType[_DataTypeT, _Size]: ... - - -def large_list(value_type: _DataTypeT | - Field[_DataTypeT] | None = None) -> LargeListType[_DataTypeT]: ... - - -def list_view(value_type: _DataTypeT | - Field[_DataTypeT] | None = None) -> ListViewType[_DataTypeT]: ... - - +def large_list( + value_type: _DataTypeT | Field[_DataTypeT] | None = None, +) -> LargeListType[_DataTypeT]: ... +def list_view( + value_type: _DataTypeT | Field[_DataTypeT] | None = None, +) -> ListViewType[_DataTypeT]: ... def large_list_view( - value_type: _DataTypeT | Field[_DataTypeT] | None = None + value_type: _DataTypeT | Field[_DataTypeT] | None = None, ) -> LargeListViewType[_DataTypeT]: ... - - def map_( key_type: _K | Field | str | None = None, item_type: _ValueT | Field | str | None = None, - keys_sorted: _Ordered | None = None + keys_sorted: _Ordered | None = None, ) -> MapType[_K, _ValueT, _Ordered]: ... - - def dictionary( index_type: _IndexT | str, value_type: _BasicValueT | str, - ordered: _Ordered | None = None + ordered: _Ordered | None = None, ) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ... - - def struct( fields: Iterable[ - Field[Any] - | tuple[str, Field[Any] | None] - | tuple[str, DataType | None] - ] | Mapping[str, Field[Any] | DataType | None], + Field[Any] | tuple[str, Field[Any] | None] | tuple[str, DataType | None] + ] + | Mapping[str, Field[Any] | DataType | None], ) -> StructType: ... - - def sparse_union( child_fields: list[Field[Any]], type_codes: list[int] | None = None ) -> SparseUnionType: ... - - def dense_union( child_fields: list[Field[Any]], type_codes: list[int] | None = None ) -> DenseUnionType: ... - - def union( - child_fields: list[Field[Any]], mode: Literal["sparse", "dense"] | int | str, - type_codes: list[int] | None = None) -> SparseUnionType | DenseUnionType: ... - - + child_fields: list[Field[Any]], + mode: Literal["sparse", "dense"] | int, + type_codes: list[int] | None = None, +) -> SparseUnionType | DenseUnionType: ... def run_end_encoded( run_end_type: _RunEndType | str | None, value_type: _BasicValueT | str | None ) -> RunEndEncodedType[_RunEndType, _BasicValueT]: ... - - def json_(storage_type: DataType = ...) -> JsonType: ... - - def uuid() -> UuidType: ... - - def fixed_shape_tensor( value_type: _ValueT, shape: Sequence[int], dim_names: Sequence[str] | None = None, permutation: Sequence[int] | None = None, ) -> FixedShapeTensorType[_ValueT]: ... - - def bool8() -> Bool8Type: ... - - def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType: ... - - def type_for_alias(name: Any) -> DataType: ... - - def schema( fields: ( Iterable[Field[Any]] @@ -833,13 +532,11 @@ def schema( metadata: Mapping[bytes, bytes] | Mapping[str, str] | Mapping[bytes, str] - | Mapping[str, bytes] | None = None, + | Mapping[str, bytes] + | None = None, ) -> Schema: ... - - def from_numpy_dtype(dtype: np.dtype[Any] | type | str) -> DataType: ... - __all__ = [ "_Weakrefable", "_Metadata", diff --git a/python/pyarrow-stubs/pyarrow/error.pyi b/python/pyarrow-stubs/pyarrow/error.pyi index 6a010071de0..8ee75c3ec41 100644 --- a/python/pyarrow-stubs/pyarrow/error.pyi +++ b/python/pyarrow-stubs/pyarrow/error.pyi @@ -17,68 +17,34 @@ from typing_extensions import Self - -class ArrowException(Exception): - ... - - -class ArrowInvalid(ValueError, ArrowException): - ... - - -class ArrowMemoryError(MemoryError, ArrowException): - ... - - -class ArrowKeyError(KeyError, ArrowException): - ... - - -class ArrowTypeError(TypeError, ArrowException): - ... - - -class ArrowNotImplementedError(NotImplementedError, ArrowException): - ... - - -class ArrowCapacityError(ArrowException): - ... - - -class ArrowIndexError(IndexError, ArrowException): - ... - - -class ArrowSerializationError(ArrowException): - ... - +class ArrowException(Exception): ... +class ArrowInvalid(ValueError, ArrowException): ... +class ArrowMemoryError(MemoryError, ArrowException): ... +class ArrowKeyError(KeyError, ArrowException): ... +class ArrowTypeError(TypeError, ArrowException): ... +class ArrowNotImplementedError(NotImplementedError, ArrowException): ... +class ArrowCapacityError(ArrowException): ... +class ArrowIndexError(IndexError, ArrowException): ... +class ArrowSerializationError(ArrowException): ... class ArrowCancelled(ArrowException): signum: int | None def __init__(self, message: str, signum: int | None = None) -> None: ... - ArrowIOError = IOError - -class StopToken: - ... - +class StopToken: ... def enable_signal_handlers(enable: bool) -> None: ... - have_signal_refcycle: bool - class SignalStopHandler: def __enter__(self) -> Self: ... def __exit__(self, exc_type, exc_value, exc_tb) -> None: ... @property def stop_token(self) -> StopToken: ... - __all__ = [ "ArrowException", "ArrowInvalid", diff --git a/python/pyarrow-stubs/pyarrow/io.pyi b/python/pyarrow-stubs/pyarrow/io.pyi index 856fb093a6c..8a1da39a1cf 100644 --- a/python/pyarrow-stubs/pyarrow/io.pyi +++ b/python/pyarrow-stubs/pyarrow/io.pyi @@ -15,16 +15,11 @@ # specific language governing permissions and limitations # under the License. -"""Placeholder stub - complete annotations in future PR. - -At runtime, these symbols are provided by the pyarrow.lib C extension. -""" +# Placeholder stub - complete annotations in future PR. +# At runtime, these symbols are provided by the pyarrow.lib C extension. from typing import Any - -class Buffer: - ... - +class Buffer: ... def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow-stubs/pyarrow/lib.pyi b/python/pyarrow-stubs/pyarrow/lib.pyi index 775434be2ea..0c539af067e 100644 --- a/python/pyarrow-stubs/pyarrow/lib.pyi +++ b/python/pyarrow-stubs/pyarrow/lib.pyi @@ -15,10 +15,8 @@ # specific language governing permissions and limitations # under the License. -"""Placeholder stub for pyarrow.lib C extension module. - -Complete type annotations will be added in subsequent PRs. -""" +# Placeholder stub for pyarrow.lib C extension module. +# Complete type annotations will be added in subsequent PRs. from typing import Any diff --git a/python/pyarrow-stubs/pyarrow/scalar.pyi b/python/pyarrow-stubs/pyarrow/scalar.pyi index 384507d2a2b..307b3e689a2 100644 --- a/python/pyarrow-stubs/pyarrow/scalar.pyi +++ b/python/pyarrow-stubs/pyarrow/scalar.pyi @@ -15,16 +15,11 @@ # specific language governing permissions and limitations # under the License. -"""Placeholder stub - complete annotations in future PR. - -At runtime, these symbols are provided by the pyarrow.lib C extension. -""" +# Placeholder stub - complete annotations in future PR. +# At runtime, these symbols are provided by the pyarrow.lib C extension. from typing import Any - -class ExtensionScalar: - ... - +class ExtensionScalar: ... def __getattr__(name: str) -> Any: ... From ff521e993d6db2affd7281b5efc42eb41f8bec27 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 13 Mar 2026 16:39:04 +0100 Subject: [PATCH 6/7] add pyi check to pre-commit --- .pre-commit-config.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a33aa3acb47..27e55a35aa0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -136,11 +136,22 @@ repos: - id: flake8 alias: python name: Python Lint + additional_dependencies: + - flake8-pyi args: - "--config" - "python/setup.cfg" + - "--extend-select" + - "Y" + - "--per-file-ignores" + - "python/pyarrow-stubs/pyarrow/*.pyi:E301,E302,E305,E701" files: >- ^(c_glib|dev|python)/ + types: + - file + types_or: + - python + - pyi exclude: >- ( ?^python/pyarrow/vendored/| From 9996c753d85e1ad0e941df2e37c2b33c5479f6fc Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 13 Mar 2026 16:39:47 +0100 Subject: [PATCH 7/7] ruff format --- python/pyarrow-stubs/pyarrow/_types.pyi | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index 1fd5c5002d1..c4d88ffce6b 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -125,9 +125,7 @@ class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): _FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) -class FixedSizeBinaryType( - _BasicDataType[_FixedSizeBinaryAsPyType] -): ... +class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... _Precision = TypeVar("_Precision", default=Any) _Scale = TypeVar("_Scale", default=Any)