diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi new file mode 100644 index 00000000..6c36d7be --- /dev/null +++ b/_duckdb-stubs/__init__.pyi @@ -0,0 +1,1443 @@ +import os +import pathlib +import typing as pytyping +from typing_extensions import Self + +if pytyping.TYPE_CHECKING: + import fsspec + import numpy as np + import polars + import pandas + import pyarrow.lib + import torch as pytorch + import tensorflow + from collections.abc import Callable, Sequence, Mapping + from duckdb import sqltypes, func + + # the field_ids argument to to_parquet and write_parquet has a recursive structure + ParquetFieldIdsType = Mapping[str, pytyping.Union[int, "ParquetFieldIdsType"]] + +__all__: list[str] = [ + "BinderException", + "CSVLineTerminator", + "CaseExpression", + "CatalogException", + "CoalesceOperator", + "ColumnExpression", + "ConnectionException", + "ConstantExpression", + "ConstraintException", + "ConversionException", + "DataError", + "DatabaseError", + "DefaultExpression", + "DependencyException", + "DuckDBPyConnection", + "DuckDBPyRelation", + "Error", + "ExpectedResultType", + "ExplainType", + "Expression", + "FatalException", + "FunctionExpression", + "HTTPException", + "IOException", + "IntegrityError", + "InternalError", + "InternalException", + "InterruptException", + "InvalidInputException", + "InvalidTypeException", + "LambdaExpression", + "NotImplementedException", + "NotSupportedError", + "OperationalError", + "OutOfMemoryException", + "OutOfRangeException", + "ParserException", + "PermissionException", + "ProgrammingError", + "PythonExceptionHandling", + "RenderMode", + "SQLExpression", + "SequenceException", + "SerializationException", + "StarExpression", + "Statement", + "StatementType", + "SyntaxException", + "TransactionException", + "TypeMismatchException", + "Warning", + "aggregate", + "alias", + "apilevel", + "append", + "array_type", + "arrow", + "begin", + "checkpoint", + "close", + "commit", + "connect", + "create_function", + "cursor", + "decimal_type", + "default_connection", + "description", + "df", + "distinct", + "dtype", + "duplicate", + "enum_type", + "execute", + "executemany", + "extract_statements", + "fetch_arrow_table", + "fetch_df", + "fetch_df_chunk", + "fetch_record_batch", + "fetchall", + "fetchdf", + "fetchmany", + "fetchnumpy", + "fetchone", + "filesystem_is_registered", + "filter", + "from_arrow", + "from_csv_auto", + "from_df", + "from_parquet", + "from_query", + "get_table_names", + "install_extension", + "interrupt", + "limit", + "list_filesystems", + "list_type", + "load_extension", + "map_type", + "order", + "paramstyle", + "pl", + "project", + "query", + "query_df", + "query_progress", + "read_csv", + "read_json", + "read_parquet", + "register", + "register_filesystem", + "remove_function", + "rollback", + "row_type", + "rowcount", + "set_default_connection", + "sql", + "sqltype", + "string_type", + "struct_type", + "table", + "table_function", + "tf", + "threadsafety", + "token_type", + "tokenize", + "torch", + "type", + "union_type", + "unregister", + "unregister_filesystem", + "values", + "view", + "write_csv", +] + +class BinderException(ProgrammingError): ... + +class CSVLineTerminator: + CARRIAGE_RETURN_LINE_FEED: pytyping.ClassVar[ + CSVLineTerminator + ] # value = + LINE_FEED: pytyping.ClassVar[CSVLineTerminator] # value = + __members__: pytyping.ClassVar[ + dict[str, CSVLineTerminator] + ] # value = {'LINE_FEED': , 'CARRIAGE_RETURN_LINE_FEED': } # noqa: E501 + def __eq__(self, other: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __init__(self, value: pytyping.SupportsInt) -> None: ... + def __int__(self) -> int: ... + def __ne__(self, other: object) -> bool: ... + def __setstate__(self, state: pytyping.SupportsInt) -> None: ... + @property + def name(self) -> str: ... + @property + def value(self) -> int: ... + +class CatalogException(ProgrammingError): ... +class ConnectionException(OperationalError): ... +class ConstraintException(IntegrityError): ... +class ConversionException(DataError): ... +class DataError(DatabaseError): ... +class DatabaseError(Error): ... +class DependencyException(DatabaseError): ... + +class DuckDBPyConnection: + def __del__(self) -> None: ... + def __enter__(self) -> Self: ... + def __exit__(self, exc_type: object, exc: object, traceback: object) -> None: ... + def append(self, table_name: str, df: pandas.DataFrame, *, by_name: bool = False) -> DuckDBPyConnection: ... + def array_type(self, type: sqltypes.DuckDBPyType, size: pytyping.SupportsInt) -> sqltypes.DuckDBPyType: ... + def arrow(self, rows_per_batch: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: ... + def begin(self) -> DuckDBPyConnection: ... + def checkpoint(self) -> DuckDBPyConnection: ... + def close(self) -> None: ... + def commit(self) -> DuckDBPyConnection: ... + def create_function( + self, + name: str, + function: Callable[..., pytyping.Any], + parameters: list[sqltypes.DuckDBPyType] | None = None, + return_type: sqltypes.DuckDBPyType | None = None, + *, + type: func.PythonUDFType = ..., + null_handling: func.FunctionNullHandling = ..., + exception_handling: PythonExceptionHandling = ..., + side_effects: bool = False, + ) -> DuckDBPyConnection: ... + def cursor(self) -> DuckDBPyConnection: ... + def decimal_type(self, width: pytyping.SupportsInt, scale: pytyping.SupportsInt) -> sqltypes.DuckDBPyType: ... + def df(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... + def dtype(self, type_str: str) -> sqltypes.DuckDBPyType: ... + def duplicate(self) -> DuckDBPyConnection: ... + def enum_type( + self, name: str, type: sqltypes.DuckDBPyType, values: list[pytyping.Any] + ) -> sqltypes.DuckDBPyType: ... + def execute(self, query: Statement | str, parameters: object = None) -> DuckDBPyConnection: ... + def executemany(self, query: Statement | str, parameters: object = None) -> DuckDBPyConnection: ... + def extract_statements(self, query: str) -> list[Statement]: ... + def fetch_arrow_table(self, rows_per_batch: pytyping.SupportsInt = 1000000) -> pyarrow.lib.Table: ... + def fetch_df(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... + def fetch_df_chunk( + self, vectors_per_chunk: pytyping.SupportsInt = 1, *, date_as_object: bool = False + ) -> pandas.DataFrame: ... + def fetch_record_batch(self, rows_per_batch: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: ... + def fetchall(self) -> list[tuple[pytyping.Any, ...]]: ... + def fetchdf(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... + def fetchmany(self, size: pytyping.SupportsInt = 1) -> list[tuple[pytyping.Any, ...]]: ... + def fetchnumpy(self) -> dict[str, np.typing.NDArray[pytyping.Any] | pandas.Categorical]: ... + def fetchone(self) -> tuple[pytyping.Any, ...] | None: ... + def filesystem_is_registered(self, name: str) -> bool: ... + def from_arrow(self, arrow_object: object) -> DuckDBPyRelation: ... + def from_csv_auto( + self, + path_or_buffer: str | bytes | os.PathLike[str] | os.PathLike[bytes], + header: bool | int | None = None, + compression: str | None = None, + sep: str | None = None, + delimiter: str | None = None, + files_to_sniff: int | None = None, + comment: str | None = None, + thousands: str | None = None, + dtype: dict[str, str] | list[str] | None = None, + na_values: str | list[str] | None = None, + skiprows: int | None = None, + quotechar: str | None = None, + escapechar: str | None = None, + encoding: str | None = None, + parallel: bool | None = None, + date_format: str | None = None, + timestamp_format: str | None = None, + sample_size: int | None = None, + auto_detect: bool | int | None = None, + all_varchar: bool | None = None, + normalize_names: bool | None = None, + null_padding: bool | None = None, + names: list[str] | None = None, + lineterminator: str | None = None, + columns: dict[str, str] | None = None, + auto_type_candidates: list[str] | None = None, + max_line_size: int | None = None, + ignore_errors: bool | None = None, + store_rejects: bool | None = None, + rejects_table: str | None = None, + rejects_scan: str | None = None, + rejects_limit: int | None = None, + force_not_null: list[str] | None = None, + buffer_size: int | None = None, + decimal: str | None = None, + allow_quoted_nulls: bool | None = None, + filename: bool | str | None = None, + hive_partitioning: bool | None = None, + union_by_name: bool | None = None, + hive_types: dict[str, str] | None = None, + hive_types_autocast: bool | None = None, + strict_mode: bool | None = None, + ) -> DuckDBPyRelation: ... + def from_df(self, df: pandas.DataFrame) -> DuckDBPyRelation: ... + @pytyping.overload + def from_parquet( + self, + file_glob: str, + binary_as_string: bool = False, + *, + file_row_number: bool = False, + filename: bool = False, + hive_partitioning: bool = False, + union_by_name: bool = False, + compression: str | None = None, + ) -> DuckDBPyRelation: ... + @pytyping.overload + def from_parquet( + self, + file_globs: Sequence[str], + binary_as_string: bool = False, + *, + file_row_number: bool = False, + filename: bool = False, + hive_partitioning: bool = False, + union_by_name: bool = False, + compression: str | None = None, + ) -> DuckDBPyRelation: ... + def from_query(self, query: str, *, alias: str = "", params: object = None) -> DuckDBPyRelation: ... + def get_table_names(self, query: str, *, qualified: bool = False) -> set[str]: ... + def install_extension( + self, + extension: str, + *, + force_install: bool = False, + repository: str | None = None, + repository_url: str | None = None, + version: str | None = None, + ) -> None: ... + def interrupt(self) -> None: ... + def list_filesystems(self) -> list[str]: ... + def list_type(self, type: sqltypes.DuckDBPyType) -> sqltypes.DuckDBPyType: ... + def load_extension(self, extension: str) -> None: ... + def map_type(self, key: sqltypes.DuckDBPyType, value: sqltypes.DuckDBPyType) -> sqltypes.DuckDBPyType: ... + def pl(self, rows_per_batch: pytyping.SupportsInt = 1000000, *, lazy: bool = False) -> polars.DataFrame: ... + def query(self, query: str, *, alias: str = "", params: object = None) -> DuckDBPyRelation: ... + def query_progress(self) -> float: ... + def read_csv( + self, + path_or_buffer: str | bytes | os.PathLike[str], + header: bool | int | None = None, + compression: str | None = None, + sep: str | None = None, + delimiter: str | None = None, + files_to_sniff: int | None = None, + comment: str | None = None, + thousands: str | None = None, + dtype: dict[str, str] | list[str] | None = None, + na_values: str | list[str] | None = None, + skiprows: int | None = None, + quotechar: str | None = None, + escapechar: str | None = None, + encoding: str | None = None, + parallel: bool | None = None, + date_format: str | None = None, + timestamp_format: str | None = None, + sample_size: int | None = None, + auto_detect: bool | int | None = None, + all_varchar: bool | None = None, + normalize_names: bool | None = None, + null_padding: bool | None = None, + names: list[str] | None = None, + lineterminator: str | None = None, + columns: dict[str, str] | None = None, + auto_type_candidates: list[str] | None = None, + max_line_size: int | None = None, + ignore_errors: bool | None = None, + store_rejects: bool | None = None, + rejects_table: str | None = None, + rejects_scan: str | None = None, + rejects_limit: int | None = None, + force_not_null: list[str] | None = None, + buffer_size: int | None = None, + decimal: str | None = None, + allow_quoted_nulls: bool | None = None, + filename: bool | str | None = None, + hive_partitioning: bool | None = None, + union_by_name: bool | None = None, + hive_types: dict[str, str] | None = None, + hive_types_autocast: bool | None = None, + strict_mode: bool | None = None, + ) -> DuckDBPyRelation: ... + def read_json( + self, + path_or_buffer: str | bytes | os.PathLike[str], + *, + columns: dict[str, str] | None = None, + sample_size: int | None = None, + maximum_depth: int | None = None, + records: str | None = None, + format: str | None = None, + date_format: str | None = None, + timestamp_format: str | None = None, + compression: str | None = None, + maximum_object_size: int | None = None, + ignore_errors: bool | None = None, + convert_strings_to_integers: bool | None = None, + field_appearance_threshold: float | None = None, + map_inference_threshold: int | None = None, + maximum_sample_files: int | None = None, + filename: bool | str | None = None, + hive_partitioning: bool | None = None, + union_by_name: bool | None = None, + hive_types: dict[str, str] | None = None, + hive_types_autocast: bool | None = None, + ) -> DuckDBPyRelation: ... + @pytyping.overload + def read_parquet( + self, + file_glob: str, + binary_as_string: bool = False, + *, + file_row_number: bool = False, + filename: bool = False, + hive_partitioning: bool = False, + union_by_name: bool = False, + compression: str | None = None, + ) -> DuckDBPyRelation: ... + @pytyping.overload + def read_parquet( + self, + file_globs: Sequence[str], + binary_as_string: bool = False, + *, + file_row_number: bool = False, + filename: bool = False, + hive_partitioning: bool = False, + union_by_name: bool = False, + compression: pytyping.Any = None, + ) -> DuckDBPyRelation: ... + def register(self, view_name: str, python_object: object) -> DuckDBPyConnection: ... + def register_filesystem(self, filesystem: fsspec.AbstractFileSystem) -> None: ... + def remove_function(self, name: str) -> DuckDBPyConnection: ... + def rollback(self) -> DuckDBPyConnection: ... + def row_type( + self, fields: dict[str, sqltypes.DuckDBPyType] | list[sqltypes.DuckDBPyType] + ) -> sqltypes.DuckDBPyType: ... + def sql(self, query: Statement | str, *, alias: str = "", params: object = None) -> DuckDBPyRelation: ... + def sqltype(self, type_str: str) -> sqltypes.DuckDBPyType: ... + def string_type(self, collation: str = "") -> sqltypes.DuckDBPyType: ... + def struct_type( + self, fields: dict[str, sqltypes.DuckDBPyType] | list[sqltypes.DuckDBPyType] + ) -> sqltypes.DuckDBPyType: ... + def table(self, table_name: str) -> DuckDBPyRelation: ... + def table_function(self, name: str, parameters: object = None) -> DuckDBPyRelation: ... + def tf(self) -> dict[str, tensorflow.Tensor]: ... + def torch(self) -> dict[str, pytorch.Tensor]: ... + def type(self, type_str: str) -> sqltypes.DuckDBPyType: ... + def union_type( + self, members: list[sqltypes.DuckDBPyType] | dict[str, sqltypes.DuckDBPyType] + ) -> sqltypes.DuckDBPyType: ... + def unregister(self, view_name: str) -> DuckDBPyConnection: ... + def unregister_filesystem(self, name: str) -> None: ... + def values(self, *args: list[pytyping.Any] | tuple[Expression, ...] | Expression) -> DuckDBPyRelation: ... + def view(self, view_name: str) -> DuckDBPyRelation: ... + @property + def description(self) -> list[tuple[str, sqltypes.DuckDBPyType, None, None, None, None, None]]: ... + @property + def rowcount(self) -> int: ... + +class DuckDBPyRelation: + def __arrow_c_stream__(self, requested_schema: object | None = None) -> pytyping.Any: ... + def __contains__(self, name: str) -> bool: ... + def __getattr__(self, name: str) -> DuckDBPyRelation: ... + def __getitem__(self, name: str) -> DuckDBPyRelation: ... + def __len__(self) -> int: ... + def aggregate(self, aggr_expr: Expression | str, group_expr: Expression | str = "") -> DuckDBPyRelation: ... + def any_value( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def apply( + self, + function_name: str, + function_aggr: str, + group_expr: str = "", + function_parameter: str = "", + projected_columns: str = "", + ) -> DuckDBPyRelation: ... + def arg_max( + self, arg_column: str, value_column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def arg_min( + self, arg_column: str, value_column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def arrow(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: ... + def avg( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def bit_and( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def bit_or( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def bit_xor( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def bitstring_agg( + self, + column: str, + min: int | None = None, + max: int | None = None, + groups: str = "", + window_spec: str = "", + projected_columns: str = "", + ) -> DuckDBPyRelation: ... + def bool_and( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def bool_or( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def close(self) -> None: ... + def count( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def create(self, table_name: str) -> None: ... + def create_view(self, view_name: str, replace: bool = True) -> DuckDBPyRelation: ... + def cross(self, other_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... + def cume_dist(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... + def dense_rank(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... + def describe(self) -> DuckDBPyRelation: ... + def df(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... + def distinct(self) -> DuckDBPyRelation: ... + def except_(self, other_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... + def execute(self) -> DuckDBPyRelation: ... + def explain(self, type: ExplainType = ExplainType.STANDARD) -> str: ... + def favg( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def fetch_arrow_reader(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: ... + def fetch_arrow_table(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.Table: ... + def fetch_df_chunk( + self, vectors_per_chunk: pytyping.SupportsInt = 1, *, date_as_object: bool = False + ) -> pandas.DataFrame: ... + def fetch_record_batch(self, rows_per_batch: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: ... + def fetchall(self) -> list[tuple[pytyping.Any, ...]]: ... + def fetchdf(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... + def fetchmany(self, size: pytyping.SupportsInt = 1) -> list[tuple[pytyping.Any, ...]]: ... + def fetchnumpy(self) -> dict[str, np.typing.NDArray[pytyping.Any] | pandas.Categorical]: ... + def fetchone(self) -> tuple[pytyping.Any, ...] | None: ... + def filter(self, filter_expr: Expression | str) -> DuckDBPyRelation: ... + def first(self, column: str, groups: str = "", projected_columns: str = "") -> DuckDBPyRelation: ... + def first_value(self, column: str, window_spec: str = "", projected_columns: str = "") -> DuckDBPyRelation: ... + def fsum( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def geomean(self, column: str, groups: str = "", projected_columns: str = "") -> DuckDBPyRelation: ... + def histogram( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def insert(self, values: pytyping.List[object]) -> None: ... + def insert_into(self, table_name: str) -> None: ... + def intersect(self, other_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... + def join( + self, other_rel: DuckDBPyRelation, condition: Expression | str, how: str = "inner" + ) -> DuckDBPyRelation: ... + def lag( + self, + column: str, + window_spec: str, + offset: pytyping.SupportsInt = 1, + default_value: str = "NULL", + ignore_nulls: bool = False, + projected_columns: str = "", + ) -> DuckDBPyRelation: ... + def last(self, column: str, groups: str = "", projected_columns: str = "") -> DuckDBPyRelation: ... + def last_value(self, column: str, window_spec: str = "", projected_columns: str = "") -> DuckDBPyRelation: ... + def lead( + self, + column: str, + window_spec: str, + offset: pytyping.SupportsInt = 1, + default_value: str = "NULL", + ignore_nulls: bool = False, + projected_columns: str = "", + ) -> DuckDBPyRelation: ... + def limit(self, n: pytyping.SupportsInt, offset: pytyping.SupportsInt = 0) -> DuckDBPyRelation: ... + def list( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def map( + self, map_function: Callable[..., pytyping.Any], *, schema: dict[str, sqltypes.DuckDBPyType] | None = None + ) -> DuckDBPyRelation: ... + def max( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def mean( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def median( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def min( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def mode( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def n_tile( + self, window_spec: str, num_buckets: pytyping.SupportsInt, projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def nth_value( + self, + column: str, + window_spec: str, + offset: pytyping.SupportsInt, + ignore_nulls: bool = False, + projected_columns: str = "", + ) -> DuckDBPyRelation: ... + def order(self, order_expr: str) -> DuckDBPyRelation: ... + def percent_rank(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... + def pl(self, batch_size: pytyping.SupportsInt = 1000000, *, lazy: bool = False) -> polars.DataFrame: ... + def product( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def project(self, *args: str | Expression, groups: str = "") -> DuckDBPyRelation: ... + def quantile( + self, + column: str, + q: float | pytyping.List[float] = 0.5, + groups: str = "", + window_spec: str = "", + projected_columns: str = "", + ) -> DuckDBPyRelation: ... + def quantile_cont( + self, + column: str, + q: float | pytyping.List[float] = 0.5, + groups: str = "", + window_spec: str = "", + projected_columns: str = "", + ) -> DuckDBPyRelation: ... + def quantile_disc( + self, + column: str, + q: float | pytyping.List[float] = 0.5, + groups: str = "", + window_spec: str = "", + projected_columns: str = "", + ) -> DuckDBPyRelation: ... + def query(self, virtual_table_name: str, sql_query: str) -> DuckDBPyRelation: ... + def rank(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... + def rank_dense(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... + def record_batch(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.RecordBatchReader: ... + def row_number(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... + def select(self, *args: str | Expression, groups: str = "") -> DuckDBPyRelation: ... + def select_dtypes(self, types: pytyping.List[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... + def select_types(self, types: pytyping.List[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... + def set_alias(self, alias: str) -> DuckDBPyRelation: ... + def show( + self, + *, + max_width: pytyping.SupportsInt | None = None, + max_rows: pytyping.SupportsInt | None = None, + max_col_width: pytyping.SupportsInt | None = None, + null_value: str | None = None, + render_mode: RenderMode | None = None, + ) -> None: ... + def sort(self, *args: Expression) -> DuckDBPyRelation: ... + def sql_query(self) -> str: ... + def std( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def stddev( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def stddev_pop( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def stddev_samp( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def string_agg( + self, column: str, sep: str = ",", groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def sum( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def tf(self) -> dict[str, tensorflow.Tensor]: ... + def to_arrow_table(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.Table: ... + def to_csv( + self, + file_name: str, + *, + sep: str | None = None, + na_rep: str | None = None, + header: bool | None = None, + quotechar: str | None = None, + escapechar: str | None = None, + date_format: str | None = None, + timestamp_format: str | None = None, + quoting: str | int | None = None, + encoding: str | None = None, + compression: str | None = None, + overwrite: bool | None = None, + per_thread_output: bool | None = None, + use_tmp_file: bool | None = None, + partition_by: pytyping.List[str] | None = None, + write_partition_columns: bool | None = None, + ) -> None: ... + def to_df(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... + def to_parquet( + self, + file_name: str, + *, + compression: str | None = None, + field_ids: ParquetFieldIdsType | pytyping.Literal["auto"] | None = None, + row_group_size_bytes: int | str | None = None, + row_group_size: int | None = None, + overwrite: bool | None = None, + per_thread_output: bool | None = None, + use_tmp_file: bool | None = None, + partition_by: pytyping.List[str] | None = None, + write_partition_columns: bool | None = None, + append: bool | None = None, + ) -> None: ... + def to_table(self, table_name: str) -> None: ... + def to_view(self, view_name: str, replace: bool = True) -> DuckDBPyRelation: ... + def torch(self) -> dict[str, pytorch.Tensor]: ... + def union(self, union_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... + def unique(self, unique_aggr: str) -> DuckDBPyRelation: ... + def update(self, set: Expression | str, *, condition: Expression | str | None = None) -> None: ... + def value_counts(self, column: str, groups: str = "") -> DuckDBPyRelation: ... + def var( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def var_pop( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def var_samp( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def variance( + self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" + ) -> DuckDBPyRelation: ... + def write_csv( + self, + file_name: str, + sep: str | None = None, + na_rep: str | None = None, + header: bool | None = None, + quotechar: str | None = None, + escapechar: str | None = None, + date_format: str | None = None, + timestamp_format: str | None = None, + quoting: str | int | None = None, + encoding: str | None = None, + compression: str | None = None, + overwrite: bool | None = None, + per_thread_output: bool | None = None, + use_tmp_file: bool | None = None, + partition_by: pytyping.List[str] | None = None, + write_partition_columns: bool | None = None, + ) -> None: ... + def write_parquet( + self, + file_name: str, + compression: str | None = None, + field_ids: ParquetFieldIdsType | pytyping.Literal["auto"] | None = None, + row_group_size_bytes: str | int | None = None, + row_group_size: int | None = None, + overwrite: bool | None = None, + per_thread_output: bool | None = None, + use_tmp_file: bool | None = None, + partition_by: pytyping.List[str] | None = None, + write_partition_columns: bool | None = None, + append: bool | None = None, + ) -> None: ... + @property + def alias(self) -> str: ... + @property + def columns(self) -> pytyping.List[str]: ... + @property + def description(self) -> pytyping.List[tuple[str, sqltypes.DuckDBPyType, None, None, None, None, None]]: ... + @property + def dtypes(self) -> pytyping.List[str]: ... + @property + def shape(self) -> tuple[int, int]: ... + @property + def type(self) -> str: ... + @property + def types(self) -> pytyping.List[sqltypes.DuckDBPyType]: ... + +class Error(Exception): ... + +class ExpectedResultType: + CHANGED_ROWS: pytyping.ClassVar[ExpectedResultType] # value = + NOTHING: pytyping.ClassVar[ExpectedResultType] # value = + QUERY_RESULT: pytyping.ClassVar[ExpectedResultType] # value = + __members__: pytyping.ClassVar[ + dict[str, ExpectedResultType] + ] # value = {'QUERY_RESULT': , 'CHANGED_ROWS': , 'NOTHING': } # noqa: E501 + def __eq__(self, other: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __init__(self, value: pytyping.SupportsInt) -> None: ... + def __int__(self) -> int: ... + def __ne__(self, other: object) -> bool: ... + def __setstate__(self, state: pytyping.SupportsInt) -> None: ... + @property + def name(self) -> str: ... + @property + def value(self) -> int: ... + +class ExplainType: + ANALYZE: pytyping.ClassVar[ExplainType] # value = + STANDARD: pytyping.ClassVar[ExplainType] # value = + __members__: pytyping.ClassVar[ + dict[str, ExplainType] + ] # value = {'STANDARD': , 'ANALYZE': } + def __eq__(self, other: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __init__(self, value: pytyping.SupportsInt) -> None: ... + def __int__(self) -> int: ... + def __ne__(self, other: object) -> bool: ... + def __setstate__(self, state: pytyping.SupportsInt) -> None: ... + @property + def name(self) -> str: ... + @property + def value(self) -> int: ... + +class Expression: + def __add__(self, other: Expression) -> Expression: ... + def __and__(self, other: Expression) -> Expression: ... + def __div__(self, other: Expression) -> Expression: ... + def __eq__(self, other: Expression) -> Expression: ... # type: ignore[override] + def __floordiv__(self, other: Expression) -> Expression: ... + def __ge__(self, other: Expression) -> Expression: ... + def __gt__(self, other: Expression) -> Expression: ... + @pytyping.overload + def __init__(self, arg0: str) -> None: ... + @pytyping.overload + def __init__(self, arg0: pytyping.Any) -> None: ... + def __invert__(self) -> Expression: ... + def __le__(self, other: Expression) -> Expression: ... + def __lt__(self, other: Expression) -> Expression: ... + def __mod__(self, other: Expression) -> Expression: ... + def __mul__(self, other: Expression) -> Expression: ... + def __ne__(self, other: Expression) -> Expression: ... # type: ignore[override] + def __neg__(self) -> Expression: ... + def __or__(self, other: Expression) -> Expression: ... + def __pow__(self, other: Expression) -> Expression: ... + def __radd__(self, other: Expression) -> Expression: ... + def __rand__(self, other: Expression) -> Expression: ... + def __rdiv__(self, other: Expression) -> Expression: ... + def __rfloordiv__(self, other: Expression) -> Expression: ... + def __rmod__(self, other: Expression) -> Expression: ... + def __rmul__(self, other: Expression) -> Expression: ... + def __ror__(self, other: Expression) -> Expression: ... + def __rpow__(self, other: Expression) -> Expression: ... + def __rsub__(self, other: Expression) -> Expression: ... + def __rtruediv__(self, other: Expression) -> Expression: ... + def __sub__(self, other: Expression) -> Expression: ... + def __truediv__(self, other: Expression) -> Expression: ... + def alias(self, name: str) -> Expression: ... + def asc(self) -> Expression: ... + def between(self, lower: Expression, upper: Expression) -> Expression: ... + def cast(self, type: sqltypes.DuckDBPyType) -> Expression: ... + def collate(self, collation: str) -> Expression: ... + def desc(self) -> Expression: ... + def get_name(self) -> str: ... + def isin(self, *args: Expression) -> Expression: ... + def isnotin(self, *args: Expression) -> Expression: ... + def isnotnull(self) -> Expression: ... + def isnull(self) -> Expression: ... + def nulls_first(self) -> Expression: ... + def nulls_last(self) -> Expression: ... + def otherwise(self, value: Expression) -> Expression: ... + def show(self) -> None: ... + def when(self, condition: Expression, value: Expression) -> Expression: ... + +class FatalException(DatabaseError): ... + +class HTTPException(IOException): + status_code: int + body: str + reason: str + headers: dict[str, str] + +class IOException(OperationalError): ... +class IntegrityError(DatabaseError): ... +class InternalError(DatabaseError): ... +class InternalException(InternalError): ... +class InterruptException(DatabaseError): ... +class InvalidInputException(ProgrammingError): ... +class InvalidTypeException(ProgrammingError): ... +class NotImplementedException(NotSupportedError): ... +class NotSupportedError(DatabaseError): ... +class OperationalError(DatabaseError): ... +class OutOfMemoryException(OperationalError): ... +class OutOfRangeException(DataError): ... +class ParserException(ProgrammingError): ... +class PermissionException(DatabaseError): ... +class ProgrammingError(DatabaseError): ... + +class PythonExceptionHandling: + DEFAULT: pytyping.ClassVar[PythonExceptionHandling] # value = + RETURN_NULL: pytyping.ClassVar[PythonExceptionHandling] # value = + __members__: pytyping.ClassVar[ + dict[str, PythonExceptionHandling] + ] # value = {'DEFAULT': , 'RETURN_NULL': } # noqa: E501 + def __eq__(self, other: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __init__(self, value: pytyping.SupportsInt) -> None: ... + def __int__(self) -> int: ... + def __ne__(self, other: object) -> bool: ... + def __setstate__(self, state: pytyping.SupportsInt) -> None: ... + @property + def name(self) -> str: ... + @property + def value(self) -> int: ... + +class RenderMode: + COLUMNS: pytyping.ClassVar[RenderMode] # value = + ROWS: pytyping.ClassVar[RenderMode] # value = + __members__: pytyping.ClassVar[ + dict[str, RenderMode] + ] # value = {'ROWS': , 'COLUMNS': } + def __eq__(self, other: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __init__(self, value: pytyping.SupportsInt) -> None: ... + def __int__(self) -> int: ... + def __ne__(self, other: object) -> bool: ... + def __setstate__(self, state: pytyping.SupportsInt) -> None: ... + @property + def name(self) -> str: ... + @property + def value(self) -> int: ... + +class SequenceException(DatabaseError): ... +class SerializationException(OperationalError): ... + +class Statement: + @property + def expected_result_type(self) -> list[StatementType]: ... + @property + def named_parameters(self) -> set[str]: ... + @property + def query(self) -> str: ... + @property + def type(self) -> StatementType: ... + +class StatementType: + ALTER_STATEMENT: pytyping.ClassVar[StatementType] # value = + ANALYZE_STATEMENT: pytyping.ClassVar[StatementType] # value = + ATTACH_STATEMENT: pytyping.ClassVar[StatementType] # value = + CALL_STATEMENT: pytyping.ClassVar[StatementType] # value = + COPY_DATABASE_STATEMENT: pytyping.ClassVar[StatementType] # value = + COPY_STATEMENT: pytyping.ClassVar[StatementType] # value = + CREATE_FUNC_STATEMENT: pytyping.ClassVar[StatementType] # value = + CREATE_STATEMENT: pytyping.ClassVar[StatementType] # value = + DELETE_STATEMENT: pytyping.ClassVar[StatementType] # value = + DETACH_STATEMENT: pytyping.ClassVar[StatementType] # value = + DROP_STATEMENT: pytyping.ClassVar[StatementType] # value = + EXECUTE_STATEMENT: pytyping.ClassVar[StatementType] # value = + EXPLAIN_STATEMENT: pytyping.ClassVar[StatementType] # value = + EXPORT_STATEMENT: pytyping.ClassVar[StatementType] # value = + EXTENSION_STATEMENT: pytyping.ClassVar[StatementType] # value = + INSERT_STATEMENT: pytyping.ClassVar[StatementType] # value = + INVALID_STATEMENT: pytyping.ClassVar[StatementType] # value = + LOAD_STATEMENT: pytyping.ClassVar[StatementType] # value = + LOGICAL_PLAN_STATEMENT: pytyping.ClassVar[StatementType] # value = + MERGE_INTO_STATEMENT: pytyping.ClassVar[StatementType] # value = + MULTI_STATEMENT: pytyping.ClassVar[StatementType] # value = + PRAGMA_STATEMENT: pytyping.ClassVar[StatementType] # value = + PREPARE_STATEMENT: pytyping.ClassVar[StatementType] # value = + RELATION_STATEMENT: pytyping.ClassVar[StatementType] # value = + SELECT_STATEMENT: pytyping.ClassVar[StatementType] # value = + SET_STATEMENT: pytyping.ClassVar[StatementType] # value = + TRANSACTION_STATEMENT: pytyping.ClassVar[StatementType] # value = + UPDATE_STATEMENT: pytyping.ClassVar[StatementType] # value = + VACUUM_STATEMENT: pytyping.ClassVar[StatementType] # value = + VARIABLE_SET_STATEMENT: pytyping.ClassVar[StatementType] # value = + __members__: pytyping.ClassVar[ + dict[str, StatementType] + ] # value = {'INVALID_STATEMENT': , 'SELECT_STATEMENT': , 'INSERT_STATEMENT': , 'UPDATE_STATEMENT': , 'CREATE_STATEMENT': , 'DELETE_STATEMENT': , 'PREPARE_STATEMENT': , 'EXECUTE_STATEMENT': , 'ALTER_STATEMENT': , 'TRANSACTION_STATEMENT': , 'COPY_STATEMENT': , 'ANALYZE_STATEMENT': , 'VARIABLE_SET_STATEMENT': , 'CREATE_FUNC_STATEMENT': , 'EXPLAIN_STATEMENT': , 'DROP_STATEMENT': , 'EXPORT_STATEMENT': , 'PRAGMA_STATEMENT': , 'VACUUM_STATEMENT': , 'CALL_STATEMENT': , 'SET_STATEMENT': , 'LOAD_STATEMENT': , 'RELATION_STATEMENT': , 'EXTENSION_STATEMENT': , 'LOGICAL_PLAN_STATEMENT': , 'ATTACH_STATEMENT': , 'DETACH_STATEMENT': , 'MULTI_STATEMENT': , 'COPY_DATABASE_STATEMENT': , 'MERGE_INTO_STATEMENT': } # noqa: E501 + def __eq__(self, other: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __init__(self, value: pytyping.SupportsInt) -> None: ... + def __int__(self) -> int: ... + def __ne__(self, other: object) -> bool: ... + def __setstate__(self, state: pytyping.SupportsInt) -> None: ... + @property + def name(self) -> str: ... + @property + def value(self) -> int: ... + +class SyntaxException(ProgrammingError): ... +class TransactionException(OperationalError): ... +class TypeMismatchException(DataError): ... +class Warning(Exception): ... + +class token_type: + __members__: pytyping.ClassVar[ + dict[str, token_type] + ] # value = {'identifier': , 'numeric_const': , 'string_const': , 'operator': , 'keyword': , 'comment': } # noqa: E501 + comment: pytyping.ClassVar[token_type] # value = + identifier: pytyping.ClassVar[token_type] # value = + keyword: pytyping.ClassVar[token_type] # value = + numeric_const: pytyping.ClassVar[token_type] # value = + operator: pytyping.ClassVar[token_type] # value = + string_const: pytyping.ClassVar[token_type] # value = + def __eq__(self, other: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __init__(self, value: pytyping.SupportsInt) -> None: ... + def __int__(self) -> int: ... + def __ne__(self, other: object) -> bool: ... + def __setstate__(self, state: pytyping.SupportsInt) -> None: ... + @property + def name(self) -> str: ... + @property + def value(self) -> int: ... + +def CaseExpression(condition: Expression, value: Expression) -> Expression: ... +def CoalesceOperator(*args: Expression) -> Expression: ... +def ColumnExpression(*args: str) -> Expression: ... +def ConstantExpression(value: Expression | str) -> Expression: ... +def DefaultExpression() -> Expression: ... +def FunctionExpression(function_name: str, *args: Expression) -> Expression: ... +def LambdaExpression(lhs: Expression | str | tuple[str], rhs: Expression) -> Expression: ... +def SQLExpression(expression: str) -> Expression: ... +@pytyping.overload +def StarExpression(*, exclude: Expression | str | tuple[str]) -> Expression: ... +@pytyping.overload +def StarExpression() -> Expression: ... +def aggregate( + df: pandas.DataFrame, + aggr_expr: Expression | list[Expression] | str | list[str], + group_expr: str = "", + *, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyRelation: ... +def alias(df: pandas.DataFrame, alias: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... +def append( + table_name: str, df: pandas.DataFrame, *, by_name: bool = False, connection: DuckDBPyConnection | None = None +) -> DuckDBPyConnection: ... +def array_type( + type: sqltypes.DuckDBPyType, size: pytyping.SupportsInt, *, connection: DuckDBPyConnection | None = None +) -> sqltypes.DuckDBPyType: ... +@pytyping.overload +def arrow( + rows_per_batch: pytyping.SupportsInt = 1000000, *, connection: DuckDBPyConnection | None = None +) -> pyarrow.lib.RecordBatchReader: ... +@pytyping.overload +def arrow(arrow_object: pytyping.Any, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... +def begin(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... +def checkpoint(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... +def close(*, connection: DuckDBPyConnection | None = None) -> None: ... +def commit(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... +def connect( + database: str | pathlib.Path = ":memory:", + read_only: bool = False, + config: dict[str, str] | None = None, +) -> DuckDBPyConnection: ... +def create_function( + name: str, + function: Callable[..., pytyping.Any], + parameters: list[sqltypes.DuckDBPyType] | None = None, + return_type: sqltypes.DuckDBPyType | None = None, + *, + type: func.PythonUDFType = ..., + null_handling: func.FunctionNullHandling = ..., + exception_handling: PythonExceptionHandling = ..., + side_effects: bool = False, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyConnection: ... +def cursor(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... +def decimal_type( + width: pytyping.SupportsInt, scale: pytyping.SupportsInt, *, connection: DuckDBPyConnection | None = None +) -> sqltypes.DuckDBPyType: ... +def default_connection() -> DuckDBPyConnection: ... +def description( + *, connection: DuckDBPyConnection | None = None +) -> list[tuple[str, sqltypes.DuckDBPyType, None, None, None, None, None]] | None: ... +@pytyping.overload +def df(*, date_as_object: bool = False, connection: DuckDBPyConnection | None = None) -> pandas.DataFrame: ... +@pytyping.overload +def df(df: pandas.DataFrame, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... +def distinct(df: pandas.DataFrame, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... +def dtype(type_str: str, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... +def duplicate(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... +def enum_type( + name: str, + type: sqltypes.DuckDBPyType, + values: list[pytyping.Any], + *, + connection: DuckDBPyConnection | None = None, +) -> sqltypes.DuckDBPyType: ... +def execute( + query: Statement | str, + parameters: object = None, + *, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyConnection: ... +def executemany( + query: Statement | str, + parameters: object = None, + *, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyConnection: ... +def extract_statements(query: str, *, connection: DuckDBPyConnection | None = None) -> list[Statement]: ... +def fetch_arrow_table( + rows_per_batch: pytyping.SupportsInt = 1000000, *, connection: DuckDBPyConnection | None = None +) -> pyarrow.lib.Table: ... +def fetch_df(*, date_as_object: bool = False, connection: DuckDBPyConnection | None = None) -> pandas.DataFrame: ... +def fetch_df_chunk( + vectors_per_chunk: pytyping.SupportsInt = 1, + *, + date_as_object: bool = False, + connection: DuckDBPyConnection | None = None, +) -> pandas.DataFrame: ... +def fetch_record_batch( + rows_per_batch: pytyping.SupportsInt = 1000000, *, connection: DuckDBPyConnection | None = None +) -> pyarrow.lib.RecordBatchReader: ... +def fetchall(*, connection: DuckDBPyConnection | None = None) -> list[tuple[pytyping.Any, ...]]: ... +def fetchdf(*, date_as_object: bool = False, connection: DuckDBPyConnection | None = None) -> pandas.DataFrame: ... +def fetchmany( + size: pytyping.SupportsInt = 1, *, connection: DuckDBPyConnection | None = None +) -> list[tuple[pytyping.Any, ...]]: ... +def fetchnumpy( + *, connection: DuckDBPyConnection | None = None +) -> dict[str, np.typing.NDArray[pytyping.Any] | pandas.Categorical]: ... +def fetchone(*, connection: DuckDBPyConnection | None = None) -> tuple[pytyping.Any, ...] | None: ... +def filesystem_is_registered(name: str, *, connection: DuckDBPyConnection | None = None) -> bool: ... +def filter( + df: pandas.DataFrame, + filter_expr: Expression | str, + *, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyRelation: ... +def from_arrow( + arrow_object: object, + *, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyRelation: ... +def from_csv_auto( + path_or_buffer: str | bytes | os.PathLike[str], + header: bool | int | None = None, + compression: str | None = None, + sep: str | None = None, + delimiter: str | None = None, + files_to_sniff: int | None = None, + comment: str | None = None, + thousands: str | None = None, + dtype: dict[str, str] | list[str] | None = None, + na_values: str | list[str] | None = None, + skiprows: int | None = None, + quotechar: str | None = None, + escapechar: str | None = None, + encoding: str | None = None, + parallel: bool | None = None, + date_format: str | None = None, + timestamp_format: str | None = None, + sample_size: int | None = None, + auto_detect: bool | int | None = None, + all_varchar: bool | None = None, + normalize_names: bool | None = None, + null_padding: bool | None = None, + names: list[str] | None = None, + lineterminator: str | None = None, + columns: dict[str, str] | None = None, + auto_type_candidates: list[str] | None = None, + max_line_size: int | None = None, + ignore_errors: bool | None = None, + store_rejects: bool | None = None, + rejects_table: str | None = None, + rejects_scan: str | None = None, + rejects_limit: int | None = None, + force_not_null: list[str] | None = None, + buffer_size: int | None = None, + decimal: str | None = None, + allow_quoted_nulls: bool | None = None, + filename: bool | str | None = None, + hive_partitioning: bool | None = None, + union_by_name: bool | None = None, + hive_types: dict[str, str] | None = None, + hive_types_autocast: bool | None = None, + strict_mode: bool | None = None, +) -> DuckDBPyRelation: ... +def from_df(df: pandas.DataFrame, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... +@pytyping.overload +def from_parquet( + file_glob: str, + binary_as_string: bool = False, + *, + file_row_number: bool = False, + filename: bool = False, + hive_partitioning: bool = False, + union_by_name: bool = False, + compression: str | None = None, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyRelation: ... +@pytyping.overload +def from_parquet( + file_globs: Sequence[str], + binary_as_string: bool = False, + *, + file_row_number: bool = False, + filename: bool = False, + hive_partitioning: bool = False, + union_by_name: bool = False, + compression: pytyping.Any = None, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyRelation: ... +def from_query( + query: Statement | str, + *, + alias: str = "", + params: object = None, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyRelation: ... +def get_table_names( + query: str, *, qualified: bool = False, connection: DuckDBPyConnection | None = None +) -> set[str]: ... +def install_extension( + extension: str, + *, + force_install: bool = False, + repository: str | None = None, + repository_url: str | None = None, + version: str | None = None, + connection: DuckDBPyConnection | None = None, +) -> None: ... +def interrupt(*, connection: DuckDBPyConnection | None = None) -> None: ... +def limit( + df: pandas.DataFrame, + n: pytyping.SupportsInt, + offset: pytyping.SupportsInt = 0, + *, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyRelation: ... +def list_filesystems(*, connection: DuckDBPyConnection | None = None) -> list[str]: ... +def list_type( + type: sqltypes.DuckDBPyType, *, connection: DuckDBPyConnection | None = None +) -> sqltypes.DuckDBPyType: ... +def load_extension(extension: str, *, connection: DuckDBPyConnection | None = None) -> None: ... +def map_type( + key: sqltypes.DuckDBPyType, + value: sqltypes.DuckDBPyType, + *, + connection: DuckDBPyConnection | None = None, +) -> sqltypes.DuckDBPyType: ... +def order( + df: pandas.DataFrame, order_expr: str, *, connection: DuckDBPyConnection | None = None +) -> DuckDBPyRelation: ... +def pl( + rows_per_batch: pytyping.SupportsInt = 1000000, + *, + lazy: bool = False, + connection: DuckDBPyConnection | None = None, +) -> polars.DataFrame: ... +def project( + df: pandas.DataFrame, *args: str | Expression, groups: str = "", connection: DuckDBPyConnection | None = None +) -> DuckDBPyRelation: ... +def query( + query: Statement | str, + *, + alias: str = "", + params: object = None, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyRelation: ... +def query_df( + df: pandas.DataFrame, + virtual_table_name: str, + sql_query: str, + *, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyRelation: ... +def query_progress(*, connection: DuckDBPyConnection | None = None) -> float: ... +def read_csv( + path_or_buffer: str | bytes | os.PathLike[str], + header: bool | int | None = None, + compression: str | None = None, + sep: str | None = None, + delimiter: str | None = None, + files_to_sniff: int | None = None, + comment: str | None = None, + thousands: str | None = None, + dtype: dict[str, str] | list[str] | None = None, + na_values: str | list[str] | None = None, + skiprows: int | None = None, + quotechar: str | None = None, + escapechar: str | None = None, + encoding: str | None = None, + parallel: bool | None = None, + date_format: str | None = None, + timestamp_format: str | None = None, + sample_size: int | None = None, + auto_detect: bool | int | None = None, + all_varchar: bool | None = None, + normalize_names: bool | None = None, + null_padding: bool | None = None, + names: list[str] | None = None, + lineterminator: str | None = None, + columns: dict[str, str] | None = None, + auto_type_candidates: list[str] | None = None, + max_line_size: int | None = None, + ignore_errors: bool | None = None, + store_rejects: bool | None = None, + rejects_table: str | None = None, + rejects_scan: str | None = None, + rejects_limit: int | None = None, + force_not_null: list[str] | None = None, + buffer_size: int | None = None, + decimal: str | None = None, + allow_quoted_nulls: bool | None = None, + filename: bool | str | None = None, + hive_partitioning: bool | None = None, + union_by_name: bool | None = None, + hive_types: dict[str, str] | None = None, + hive_types_autocast: bool | None = None, + strict_mode: bool | None = None, +) -> DuckDBPyRelation: ... +def read_json( + path_or_buffer: str | bytes | os.PathLike[str], + *, + columns: dict[str, str] | None = None, + sample_size: int | None = None, + maximum_depth: int | None = None, + records: str | None = None, + format: str | None = None, + date_format: str | None = None, + timestamp_format: str | None = None, + compression: str | None = None, + maximum_object_size: int | None = None, + ignore_errors: bool | None = None, + convert_strings_to_integers: bool | None = None, + field_appearance_threshold: float | None = None, + map_inference_threshold: int | None = None, + maximum_sample_files: int | None = None, + filename: bool | str | None = None, + hive_partitioning: bool | None = None, + union_by_name: bool | None = None, + hive_types: dict[str, str] | None = None, + hive_types_autocast: bool | None = None, +) -> DuckDBPyRelation: ... +@pytyping.overload +def read_parquet( + file_glob: str, + binary_as_string: bool = False, + *, + file_row_number: bool = False, + filename: bool = False, + hive_partitioning: bool = False, + union_by_name: bool = False, + compression: str | None = None, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyRelation: ... +@pytyping.overload +def read_parquet( + file_globs: Sequence[str], + binary_as_string: bool = False, + *, + file_row_number: bool = False, + filename: bool = False, + hive_partitioning: bool = False, + union_by_name: bool = False, + compression: pytyping.Any = None, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyRelation: ... +def register( + view_name: str, + python_object: object, + *, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyConnection: ... +def register_filesystem( + filesystem: fsspec.AbstractFileSystem, *, connection: DuckDBPyConnection | None = None +) -> None: ... +def remove_function(name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... +def rollback(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... +def row_type( + fields: dict[str, sqltypes.DuckDBPyType] | list[sqltypes.DuckDBPyType], + *, + connection: DuckDBPyConnection | None = None, +) -> sqltypes.DuckDBPyType: ... +def rowcount(*, connection: DuckDBPyConnection | None = None) -> int: ... +def set_default_connection(connection: DuckDBPyConnection) -> None: ... +def sql( + query: Statement | str, + *, + alias: str = "", + params: object = None, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyRelation: ... +def sqltype(type_str: str, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... +def string_type(collation: str = "", *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... +def struct_type( + fields: dict[str, sqltypes.DuckDBPyType] | list[sqltypes.DuckDBPyType], + *, + connection: DuckDBPyConnection | None = None, +) -> sqltypes.DuckDBPyType: ... +def table(table_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... +def table_function( + name: str, + parameters: object = None, + *, + connection: DuckDBPyConnection | None = None, +) -> DuckDBPyRelation: ... +def tf(*, connection: DuckDBPyConnection | None = None) -> dict[str, tensorflow.Tensor]: ... +def tokenize(query: str) -> list[tuple[int, token_type]]: ... +def torch(*, connection: DuckDBPyConnection | None = None) -> dict[str, pytorch.Tensor]: ... +def type(type_str: str, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... +def union_type( + members: dict[str, sqltypes.DuckDBPyType] | list[sqltypes.DuckDBPyType], + *, + connection: DuckDBPyConnection | None = None, +) -> sqltypes.DuckDBPyType: ... +def unregister(view_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... +def unregister_filesystem(name: str, *, connection: DuckDBPyConnection | None = None) -> None: ... +def values( + *args: list[pytyping.Any] | tuple[Expression, ...] | Expression, connection: DuckDBPyConnection | None = None +) -> DuckDBPyRelation: ... +def view(view_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... +def write_csv( + df: pandas.DataFrame, + filename: str, + *, + sep: str | None = None, + na_rep: str | None = None, + header: bool | None = None, + quotechar: str | None = None, + escapechar: str | None = None, + date_format: str | None = None, + timestamp_format: str | None = None, + quoting: str | int | None = None, + encoding: str | None = None, + compression: str | None = None, + overwrite: bool | None = None, + per_thread_output: bool | None = None, + use_tmp_file: bool | None = None, + partition_by: list[str] | None = None, + write_partition_columns: bool | None = None, +) -> None: ... + +__formatted_python_version__: str +__git_revision__: str +__interactive__: bool +__jupyter__: bool +__standard_vector_size__: int +__version__: str +_clean_default_connection: pytyping.Any # value = +apilevel: str +paramstyle: str +threadsafety: int diff --git a/_duckdb-stubs/_func.pyi b/_duckdb-stubs/_func.pyi new file mode 100644 index 00000000..68484499 --- /dev/null +++ b/_duckdb-stubs/_func.pyi @@ -0,0 +1,46 @@ +import typing as pytyping + +__all__: list[str] = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"] + +class FunctionNullHandling: + DEFAULT: pytyping.ClassVar[FunctionNullHandling] # value = + SPECIAL: pytyping.ClassVar[FunctionNullHandling] # value = + __members__: pytyping.ClassVar[ + dict[str, FunctionNullHandling] + ] # value = {'DEFAULT': , 'SPECIAL': } + def __eq__(self, other: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __init__(self, value: pytyping.SupportsInt) -> None: ... + def __int__(self) -> int: ... + def __ne__(self, other: object) -> bool: ... + def __setstate__(self, state: pytyping.SupportsInt) -> None: ... + @property + def name(self) -> str: ... + @property + def value(self) -> int: ... + +class PythonUDFType: + ARROW: pytyping.ClassVar[PythonUDFType] # value = + NATIVE: pytyping.ClassVar[PythonUDFType] # value = + __members__: pytyping.ClassVar[ + dict[str, PythonUDFType] + ] # value = {'NATIVE': , 'ARROW': } + def __eq__(self, other: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __init__(self, value: pytyping.SupportsInt) -> None: ... + def __int__(self) -> int: ... + def __ne__(self, other: object) -> bool: ... + def __setstate__(self, state: pytyping.SupportsInt) -> None: ... + @property + def name(self) -> str: ... + @property + def value(self) -> int: ... + +ARROW: PythonUDFType # value = +DEFAULT: FunctionNullHandling # value = +NATIVE: PythonUDFType # value = +SPECIAL: FunctionNullHandling # value = diff --git a/_duckdb-stubs/_sqltypes.pyi b/_duckdb-stubs/_sqltypes.pyi new file mode 100644 index 00000000..88abb977 --- /dev/null +++ b/_duckdb-stubs/_sqltypes.pyi @@ -0,0 +1,75 @@ +import duckdb +import typing as pytyping + +__all__: list[str] = [ + "BIGINT", + "BIT", + "BLOB", + "BOOLEAN", + "DATE", + "DOUBLE", + "FLOAT", + "HUGEINT", + "INTEGER", + "INTERVAL", + "SMALLINT", + "SQLNULL", + "TIME", + "TIMESTAMP", + "TIMESTAMP_MS", + "TIMESTAMP_NS", + "TIMESTAMP_S", + "TIMESTAMP_TZ", + "TIME_TZ", + "TINYINT", + "UBIGINT", + "UHUGEINT", + "UINTEGER", + "USMALLINT", + "UTINYINT", + "UUID", + "VARCHAR", + "DuckDBPyType", +] + +class DuckDBPyType: + def __eq__(self, other: object) -> bool: ... + def __getattr__(self, name: str) -> DuckDBPyType: ... + def __getitem__(self, name: str) -> DuckDBPyType: ... + def __hash__(self) -> int: ... + @pytyping.overload + def __init__(self, type_str: str, connection: duckdb.DuckDBPyConnection) -> None: ... + @pytyping.overload + def __init__(self, obj: object) -> None: ... + @property + def children(self) -> list[tuple[str, object]]: ... + @property + def id(self) -> str: ... + +BIGINT: DuckDBPyType # value = BIGINT +BIT: DuckDBPyType # value = BIT +BLOB: DuckDBPyType # value = BLOB +BOOLEAN: DuckDBPyType # value = BOOLEAN +DATE: DuckDBPyType # value = DATE +DOUBLE: DuckDBPyType # value = DOUBLE +FLOAT: DuckDBPyType # value = FLOAT +HUGEINT: DuckDBPyType # value = HUGEINT +INTEGER: DuckDBPyType # value = INTEGER +INTERVAL: DuckDBPyType # value = INTERVAL +SMALLINT: DuckDBPyType # value = SMALLINT +SQLNULL: DuckDBPyType # value = "NULL" +TIME: DuckDBPyType # value = TIME +TIMESTAMP: DuckDBPyType # value = TIMESTAMP +TIMESTAMP_MS: DuckDBPyType # value = TIMESTAMP_MS +TIMESTAMP_NS: DuckDBPyType # value = TIMESTAMP_NS +TIMESTAMP_S: DuckDBPyType # value = TIMESTAMP_S +TIMESTAMP_TZ: DuckDBPyType # value = TIMESTAMP WITH TIME ZONE +TIME_TZ: DuckDBPyType # value = TIME WITH TIME ZONE +TINYINT: DuckDBPyType # value = TINYINT +UBIGINT: DuckDBPyType # value = UBIGINT +UHUGEINT: DuckDBPyType # value = UHUGEINT +UINTEGER: DuckDBPyType # value = UINTEGER +USMALLINT: DuckDBPyType # value = USMALLINT +UTINYINT: DuckDBPyType # value = UTINYINT +UUID: DuckDBPyType # value = UUID +VARCHAR: DuckDBPyType # value = VARCHAR diff --git a/duckdb/__init__.py b/duckdb/__init__.py index d6d13faa..e1a4aa9a 100644 --- a/duckdb/__init__.py +++ b/duckdb/__init__.py @@ -1,123 +1,75 @@ -# ruff: noqa: D104, F401, E402 -from importlib.metadata import version +# ruff: noqa: F401 +"""The DuckDB Python Package. -from _duckdb import __version__ as duckdb_version +This module re-exports the DuckDB C++ extension (`_duckdb`) and provides DuckDB's public API. -import duckdb.functional as functional -import duckdb.typing as typing +Note: +- Some symbols exposed here are implementation details of DuckDB's C++ engine. +- They are kept for backwards compatibility but are not considered stable API. +- Future versions may move them into submodules with deprecation warnings. +""" -# duckdb.__version__ returns the version of the distribution package, i.e. the pypi version -__version__ = version("duckdb") - - -# version() is a more human friendly formatted version string of both the distribution package and the bundled duckdb -def version() -> str: - return f"{__version__} (with duckdb {duckdb_version})" - - -_exported_symbols = ["__version__", "version"] - -_exported_symbols.extend(["typing", "functional"]) - - -class DBAPITypeObject: - def __init__(self, types: list[typing.DuckDBPyType]) -> None: - self.types = types - - def __eq__(self, other: object) -> bool: - if isinstance(other, typing.DuckDBPyType): - return other in self.types - return False - - def __repr__(self) -> str: - return f"" - - -# Define the standard DBAPI sentinels -STRING = DBAPITypeObject([typing.VARCHAR]) -NUMBER = DBAPITypeObject( - [ - typing.TINYINT, - typing.UTINYINT, - typing.SMALLINT, - typing.USMALLINT, - typing.INTEGER, - typing.UINTEGER, - typing.BIGINT, - typing.UBIGINT, - typing.HUGEINT, - typing.UHUGEINT, - typing.DuckDBPyType("BIGNUM"), - typing.DuckDBPyType("DECIMAL"), - typing.FLOAT, - typing.DOUBLE, - ] -) -DATETIME = DBAPITypeObject( - [ - typing.DATE, - typing.TIME, - typing.TIME_TZ, - typing.TIMESTAMP, - typing.TIMESTAMP_TZ, - typing.TIMESTAMP_NS, - typing.TIMESTAMP_MS, - typing.TIMESTAMP_S, - ] -) -BINARY = DBAPITypeObject([typing.BLOB]) -ROWID = None - -# Classes from _duckdb import ( + BinderException, CaseExpression, + CatalogException, CoalesceOperator, ColumnExpression, + ConnectionException, ConstantExpression, + ConstraintException, + ConversionException, CSVLineTerminator, + DatabaseError, + DataError, DefaultExpression, + DependencyException, DuckDBPyConnection, DuckDBPyRelation, + Error, ExpectedResultType, ExplainType, Expression, + FatalException, FunctionExpression, + HTTPException, + IntegrityError, + InternalError, + InternalException, + InterruptException, + InvalidInputException, + InvalidTypeException, + IOException, LambdaExpression, + NotImplementedException, + NotSupportedError, + OperationalError, + OutOfMemoryException, + OutOfRangeException, + ParserException, + PermissionException, + ProgrammingError, PythonExceptionHandling, RenderMode, + SequenceException, + SerializationException, SQLExpression, StarExpression, Statement, StatementType, -) - -_exported_symbols.extend( - [ - "DuckDBPyRelation", - "DuckDBPyConnection", - "ExplainType", - "PythonExceptionHandling", - "Expression", - "ConstantExpression", - "ColumnExpression", - "DefaultExpression", - "CoalesceOperator", - "LambdaExpression", - "StarExpression", - "FunctionExpression", - "CaseExpression", - "SQLExpression", - ] -) - -# These are overloaded twice, we define them inside of C++ so pybind can deal with it -_exported_symbols.extend(["df", "arrow"]) -# NOTE: this section is generated by tools/pythonpkg/scripts/generate_connection_wrapper_methods.py. -# Do not edit this section manually, your changes will be overwritten! -# START OF CONNECTION WRAPPER -from _duckdb import ( + SyntaxException, + TransactionException, + TypeMismatchException, + Warning, + __formatted_python_version__, + __git_revision__, + __interactive__, + __jupyter__, + __standard_vector_size__, + _clean_default_connection, aggregate, alias, + apilevel, append, array_type, arrow, @@ -125,9 +77,11 @@ def __repr__(self) -> str: checkpoint, close, commit, + connect, create_function, cursor, decimal_type, + default_connection, description, df, distinct, @@ -162,6 +116,7 @@ def __repr__(self) -> str: load_extension, map_type, order, + paramstyle, pl, project, query, @@ -176,6 +131,7 @@ def __repr__(self) -> str: rollback, row_type, rowcount, + set_default_connection, sql, sqltype, string_type, @@ -183,6 +139,9 @@ def __repr__(self) -> str: table, table_function, tf, + threadsafety, + token_type, + tokenize, torch, type, union_type, @@ -193,220 +152,19 @@ def __repr__(self) -> str: write_csv, ) -_exported_symbols.extend( - [ - "cursor", - "register_filesystem", - "unregister_filesystem", - "list_filesystems", - "filesystem_is_registered", - "create_function", - "remove_function", - "sqltype", - "dtype", - "type", - "array_type", - "list_type", - "union_type", - "string_type", - "enum_type", - "decimal_type", - "struct_type", - "row_type", - "map_type", - "duplicate", - "execute", - "executemany", - "close", - "interrupt", - "query_progress", - "fetchone", - "fetchmany", - "fetchall", - "fetchnumpy", - "fetchdf", - "fetch_df", - "df", - "fetch_df_chunk", - "pl", - "fetch_arrow_table", - "arrow", - "fetch_record_batch", - "torch", - "tf", - "begin", - "commit", - "rollback", - "checkpoint", - "append", - "register", - "unregister", - "table", - "view", - "values", - "table_function", - "read_json", - "extract_statements", - "sql", - "query", - "from_query", - "read_csv", - "from_csv_auto", - "from_df", - "from_arrow", - "from_parquet", - "read_parquet", - "from_parquet", - "read_parquet", - "get_table_names", - "install_extension", - "load_extension", - "project", - "distinct", - "write_csv", - "aggregate", - "alias", - "filter", - "limit", - "order", - "query_df", - "description", - "rowcount", - ] +from duckdb._dbapi_type_object import ( + BINARY, + DATETIME, + NUMBER, + ROWID, + STRING, + DBAPITypeObject, ) - -# END OF CONNECTION WRAPPER - -# Enums -from _duckdb import ANALYZE, COLUMNS, DEFAULT, RETURN_NULL, ROWS, STANDARD - -_exported_symbols.extend(["ANALYZE", "DEFAULT", "RETURN_NULL", "STANDARD"]) - - -# read-only properties -from _duckdb import ( - __formatted_python_version__, - __interactive__, - __jupyter__, - __standard_vector_size__, - apilevel, - comment, - identifier, - keyword, - numeric_const, - operator, - paramstyle, - string_const, - threadsafety, - token_type, - tokenize, -) - -_exported_symbols.extend( - [ - "__standard_vector_size__", - "__interactive__", - "__jupyter__", - "__formatted_python_version__", - "apilevel", - "comment", - "identifier", - "keyword", - "numeric_const", - "operator", - "paramstyle", - "string_const", - "threadsafety", - "token_type", - "tokenize", - ] -) - - -from _duckdb import ( - connect, - default_connection, - set_default_connection, -) - -_exported_symbols.extend( - [ - "connect", - "default_connection", - "set_default_connection", - ] -) - -# Exceptions -from _duckdb import ( - BinderException, - CatalogException, - ConnectionException, - ConstraintException, - ConversionException, - DataError, - Error, - FatalException, - HTTPException, - IntegrityError, - InternalError, - InternalException, - InterruptException, - InvalidInputException, - InvalidTypeException, - IOException, - NotImplementedException, - NotSupportedError, - OperationalError, - OutOfMemoryException, - OutOfRangeException, - ParserException, - PermissionException, - ProgrammingError, - SequenceException, - SerializationException, - SyntaxException, - TransactionException, - TypeMismatchException, - Warning, -) - -_exported_symbols.extend( - [ - "Error", - "DataError", - "ConversionException", - "OutOfRangeException", - "TypeMismatchException", - "FatalException", - "IntegrityError", - "ConstraintException", - "InternalError", - "InternalException", - "InterruptException", - "NotSupportedError", - "NotImplementedException", - "OperationalError", - "ConnectionException", - "IOException", - "HTTPException", - "OutOfMemoryException", - "SerializationException", - "TransactionException", - "PermissionException", - "ProgrammingError", - "BinderException", - "CatalogException", - "InvalidInputException", - "InvalidTypeException", - "ParserException", - "SyntaxException", - "SequenceException", - "Warning", - ] +from duckdb._version import ( + __duckdb_version__, + __version__, + version, ) - -# Value from duckdb.value.constant import ( BinaryValue, BitValue, @@ -419,10 +177,13 @@ def __repr__(self) -> str: HugeIntegerValue, IntegerValue, IntervalValue, + ListValue, LongValue, + MapValue, NullValue, ShortValue, StringValue, + StructValue, TimestampMilisecondValue, TimestampNanosecondValue, TimestampSecondValue, @@ -430,7 +191,9 @@ def __repr__(self) -> str: TimestampValue, TimeTimeZoneValue, TimeValue, + UnionType, UnsignedBinaryValue, + UnsignedHugeIntegerValue, UnsignedIntegerValue, UnsignedLongValue, UnsignedShortValue, @@ -438,37 +201,181 @@ def __repr__(self) -> str: Value, ) -_exported_symbols.extend( - [ - "Value", - "NullValue", - "BooleanValue", - "UnsignedBinaryValue", - "UnsignedShortValue", - "UnsignedIntegerValue", - "UnsignedLongValue", - "BinaryValue", - "ShortValue", - "IntegerValue", - "LongValue", - "HugeIntegerValue", - "FloatValue", - "DoubleValue", - "DecimalValue", - "StringValue", - "UUIDValue", - "BitValue", - "BlobValue", - "DateValue", - "IntervalValue", - "TimestampValue", - "TimestampSecondValue", - "TimestampMilisecondValue", - "TimestampNanosecondValue", - "TimestampTimeZoneValue", - "TimeValue", - "TimeTimeZoneValue", - ] -) - -__all__ = _exported_symbols +__all__: list[str] = [ + "BinaryValue", + "BinderException", + "BitValue", + "BlobValue", + "BooleanValue", + "CSVLineTerminator", + "CaseExpression", + "CatalogException", + "CoalesceOperator", + "ColumnExpression", + "ConnectionException", + "ConstantExpression", + "ConstraintException", + "ConversionException", + "DataError", + "DatabaseError", + "DateValue", + "DecimalValue", + "DefaultExpression", + "DependencyException", + "DoubleValue", + "DuckDBPyConnection", + "DuckDBPyRelation", + "Error", + "ExpectedResultType", + "ExplainType", + "Expression", + "FatalException", + "FloatValue", + "FunctionExpression", + "HTTPException", + "HugeIntegerValue", + "IOException", + "IntegerValue", + "IntegrityError", + "InternalError", + "InternalException", + "InterruptException", + "IntervalValue", + "InvalidInputException", + "InvalidTypeException", + "LambdaExpression", + "ListValue", + "LongValue", + "MapValue", + "NotImplementedException", + "NotSupportedError", + "NullValue", + "OperationalError", + "OutOfMemoryException", + "OutOfRangeException", + "ParserException", + "PermissionException", + "ProgrammingError", + "PythonExceptionHandling", + "RenderMode", + "SQLExpression", + "SequenceException", + "SerializationException", + "ShortValue", + "StarExpression", + "Statement", + "StatementType", + "StringValue", + "StructValue", + "SyntaxException", + "TimeTimeZoneValue", + "TimeValue", + "TimestampMilisecondValue", + "TimestampNanosecondValue", + "TimestampSecondValue", + "TimestampTimeZoneValue", + "TimestampValue", + "TransactionException", + "TypeMismatchException", + "UUIDValue", + "UnionType", + "UnsignedBinaryValue", + "UnsignedHugeIntegerValue", + "UnsignedIntegerValue", + "UnsignedLongValue", + "UnsignedShortValue", + "Value", + "Warning", + "__formatted_python_version__", + "__git_revision__", + "__interactive__", + "__jupyter__", + "__standard_vector_size__", + "__version__", + "_clean_default_connection", + "aggregate", + "alias", + "apilevel", + "append", + "array_type", + "arrow", + "begin", + "checkpoint", + "close", + "commit", + "connect", + "create_function", + "cursor", + "decimal_type", + "default_connection", + "description", + "df", + "distinct", + "dtype", + "duplicate", + "enum_type", + "execute", + "executemany", + "extract_statements", + "fetch_arrow_table", + "fetch_df", + "fetch_df_chunk", + "fetch_record_batch", + "fetchall", + "fetchdf", + "fetchmany", + "fetchnumpy", + "fetchone", + "filesystem_is_registered", + "filter", + "from_arrow", + "from_csv_auto", + "from_df", + "from_parquet", + "from_query", + "get_table_names", + "install_extension", + "interrupt", + "limit", + "list_filesystems", + "list_type", + "load_extension", + "map_type", + "order", + "paramstyle", + "paramstyle", + "pl", + "project", + "query", + "query_df", + "query_progress", + "read_csv", + "read_json", + "read_parquet", + "register", + "register_filesystem", + "remove_function", + "rollback", + "row_type", + "rowcount", + "set_default_connection", + "sql", + "sqltype", + "string_type", + "struct_type", + "table", + "table_function", + "tf", + "threadsafety", + "threadsafety", + "token_type", + "tokenize", + "torch", + "type", + "union_type", + "unregister", + "unregister_filesystem", + "values", + "view", + "write_csv", +] diff --git a/duckdb/__init__.pyi b/duckdb/__init__.pyi deleted file mode 100644 index 79066bb2..00000000 --- a/duckdb/__init__.pyi +++ /dev/null @@ -1,1137 +0,0 @@ -# to regenerate this from scratch, run scripts/regenerate_python_stubs.sh . -# be warned - currently there are still tweaks needed after this file is -# generated. These should be annotated with a comment like -# # stubgen override -# to help the sanity of maintainers. - -import duckdb.typing as typing -import duckdb.functional as functional -from duckdb.typing import DuckDBPyType -from duckdb.functional import FunctionNullHandling, PythonUDFType -from duckdb.value.constant import ( - Value, - NullValue, - BooleanValue, - UnsignedBinaryValue, - UnsignedShortValue, - UnsignedIntegerValue, - UnsignedLongValue, - BinaryValue, - ShortValue, - IntegerValue, - LongValue, - HugeIntegerValue, - FloatValue, - DoubleValue, - DecimalValue, - StringValue, - UUIDValue, - BitValue, - BlobValue, - DateValue, - IntervalValue, - TimestampValue, - TimestampSecondValue, - TimestampMilisecondValue, - TimestampNanosecondValue, - TimestampTimeZoneValue, - TimeValue, - TimeTimeZoneValue, -) - -# We also run this in python3.7, where this is needed -from typing_extensions import Literal - -# stubgen override - missing import of Set -from typing import Any, ClassVar, Set, Optional, Callable -from io import StringIO, TextIOBase -from pathlib import Path - -from typing import overload, Dict, List, Union, Tuple -import pandas - -# stubgen override - unfortunately we need this for version checks -import sys -import fsspec -import pyarrow.lib -import polars - -# stubgen override - This should probably not be exposed -apilevel: str -comment: token_type -identifier: token_type -keyword: token_type -numeric_const: token_type -operator: token_type -paramstyle: str -string_const: token_type -threadsafety: int -__standard_vector_size__: int -STANDARD: ExplainType -ANALYZE: ExplainType -DEFAULT: PythonExceptionHandling -RETURN_NULL: PythonExceptionHandling -ROWS: RenderMode -COLUMNS: RenderMode - -__version__: str - -__interactive__: bool -__jupyter__: bool -__formatted_python_version__: str - -class BinderException(ProgrammingError): ... -class CatalogException(ProgrammingError): ... -class ConnectionException(OperationalError): ... -class ConstraintException(IntegrityError): ... -class ConversionException(DataError): ... -class DataError(Error): ... - -class ExplainType: - STANDARD: ExplainType - ANALYZE: ExplainType - def __int__(self) -> int: ... - def __index__(self) -> int: ... - @property - def __members__(self) -> Dict[str, ExplainType]: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - -class RenderMode: - ROWS: RenderMode - COLUMNS: RenderMode - def __int__(self) -> int: ... - def __index__(self) -> int: ... - @property - def __members__(self) -> Dict[str, RenderMode]: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - -class PythonExceptionHandling: - DEFAULT: PythonExceptionHandling - RETURN_NULL: PythonExceptionHandling - def __int__(self) -> int: ... - def __index__(self) -> int: ... - @property - def __members__(self) -> Dict[str, PythonExceptionHandling]: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - -class CSVLineTerminator: - LINE_FEED: CSVLineTerminator - CARRIAGE_RETURN_LINE_FEED: CSVLineTerminator - def __int__(self) -> int: ... - def __index__(self) -> int: ... - @property - def __members__(self) -> Dict[str, CSVLineTerminator]: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - -class ExpectedResultType: - QUERY_RESULT: ExpectedResultType - CHANGED_ROWS: ExpectedResultType - NOTHING: ExpectedResultType - def __int__(self) -> int: ... - def __index__(self) -> int: ... - @property - def __members__(self) -> Dict[str, ExpectedResultType]: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - -class StatementType: - INVALID: StatementType - SELECT: StatementType - INSERT: StatementType - UPDATE: StatementType - CREATE: StatementType - DELETE: StatementType - PREPARE: StatementType - EXECUTE: StatementType - ALTER: StatementType - TRANSACTION: StatementType - COPY: StatementType - ANALYZE: StatementType - VARIABLE_SET: StatementType - CREATE_FUNC: StatementType - EXPLAIN: StatementType - DROP: StatementType - EXPORT: StatementType - PRAGMA: StatementType - VACUUM: StatementType - CALL: StatementType - SET: StatementType - LOAD: StatementType - RELATION: StatementType - EXTENSION: StatementType - LOGICAL_PLAN: StatementType - ATTACH: StatementType - DETACH: StatementType - MULTI: StatementType - COPY_DATABASE: StatementType - MERGE_INTO: StatementType - def __int__(self) -> int: ... - def __index__(self) -> int: ... - @property - def __members__(self) -> Dict[str, StatementType]: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - -class Statement: - def __init__(self, *args, **kwargs) -> None: ... - @property - def query(self) -> str: ... - @property - def named_parameters(self) -> Set[str]: ... - @property - def expected_result_type(self) -> List[ExpectedResultType]: ... - @property - def type(self) -> StatementType: ... - -class Expression: - def __init__(self, *args, **kwargs) -> None: ... - def __neg__(self) -> "Expression": ... - def __add__(self, expr: "Expression") -> "Expression": ... - def __radd__(self, expr: "Expression") -> "Expression": ... - def __sub__(self, expr: "Expression") -> "Expression": ... - def __rsub__(self, expr: "Expression") -> "Expression": ... - def __mul__(self, expr: "Expression") -> "Expression": ... - def __rmul__(self, expr: "Expression") -> "Expression": ... - def __div__(self, expr: "Expression") -> "Expression": ... - def __rdiv__(self, expr: "Expression") -> "Expression": ... - def __truediv__(self, expr: "Expression") -> "Expression": ... - def __rtruediv__(self, expr: "Expression") -> "Expression": ... - def __floordiv__(self, expr: "Expression") -> "Expression": ... - def __rfloordiv__(self, expr: "Expression") -> "Expression": ... - def __mod__(self, expr: "Expression") -> "Expression": ... - def __rmod__(self, expr: "Expression") -> "Expression": ... - def __pow__(self, expr: "Expression") -> "Expression": ... - def __rpow__(self, expr: "Expression") -> "Expression": ... - def __and__(self, expr: "Expression") -> "Expression": ... - def __rand__(self, expr: "Expression") -> "Expression": ... - def __or__(self, expr: "Expression") -> "Expression": ... - def __ror__(self, expr: "Expression") -> "Expression": ... - def __invert__(self) -> "Expression": ... - def __eq__( # type: ignore[override] - self, expr: "Expression" - ) -> "Expression": ... - def __ne__( # type: ignore[override] - self, expr: "Expression" - ) -> "Expression": ... - def __gt__(self, expr: "Expression") -> "Expression": ... - def __ge__(self, expr: "Expression") -> "Expression": ... - def __lt__(self, expr: "Expression") -> "Expression": ... - def __le__(self, expr: "Expression") -> "Expression": ... - def show(self) -> None: ... - def __repr__(self) -> str: ... - def get_name(self) -> str: ... - def alias(self, alias: str) -> "Expression": ... - def when(self, condition: "Expression", value: "Expression") -> "Expression": ... - def otherwise(self, value: "Expression") -> "Expression": ... - def cast(self, type: DuckDBPyType) -> "Expression": ... - def between(self, lower: "Expression", upper: "Expression") -> "Expression": ... - def collate(self, collation: str) -> "Expression": ... - def asc(self) -> "Expression": ... - def desc(self) -> "Expression": ... - def nulls_first(self) -> "Expression": ... - def nulls_last(self) -> "Expression": ... - def isnull(self) -> "Expression": ... - def isnotnull(self) -> "Expression": ... - def isin(self, *cols: "Expression") -> "Expression": ... - def isnotin(self, *cols: "Expression") -> "Expression": ... - -def StarExpression(exclude: Optional[List[str]] = None) -> Expression: ... -def ColumnExpression(column: str) -> Expression: ... -def DefaultExpression() -> Expression: ... -def ConstantExpression(val: Any) -> Expression: ... -def CaseExpression(condition: Expression, value: Expression) -> Expression: ... -def FunctionExpression(function: str, *cols: Expression) -> Expression: ... -def CoalesceOperator(*cols: Expression) -> Expression: ... -def LambdaExpression(lhs: Union[Tuple["Expression", ...], str], rhs: Expression) -> Expression: ... -def SQLExpression(expr: str) -> Expression: ... - -class DuckDBPyConnection: - def __init__(self, *args, **kwargs) -> None: ... - def __enter__(self) -> DuckDBPyConnection: ... - def __exit__(self, exc_type: object, exc: object, traceback: object) -> None: ... - def __del__(self) -> None: ... - @property - def description(self) -> Optional[List[Any]]: ... - @property - def rowcount(self) -> int: ... - - # NOTE: this section is generated by tools/pythonpkg/scripts/generate_connection_stubs.py. - # Do not edit this section manually, your changes will be overwritten! - - # START OF CONNECTION METHODS - def cursor(self) -> DuckDBPyConnection: ... - def register_filesystem(self, filesystem: fsspec.AbstractFileSystem) -> None: ... - def unregister_filesystem(self, name: str) -> None: ... - def list_filesystems(self) -> list: ... - def filesystem_is_registered(self, name: str) -> bool: ... - def create_function( - self, - name: str, - function: function, - parameters: Optional[List[DuckDBPyType]] = None, - return_type: Optional[DuckDBPyType] = None, - *, - type: Optional[PythonUDFType] = PythonUDFType.NATIVE, - null_handling: Optional[FunctionNullHandling] = FunctionNullHandling.DEFAULT, - exception_handling: Optional[PythonExceptionHandling] = PythonExceptionHandling.DEFAULT, - side_effects: bool = False, - ) -> DuckDBPyConnection: ... - def remove_function(self, name: str) -> DuckDBPyConnection: ... - def sqltype(self, type_str: str) -> DuckDBPyType: ... - def dtype(self, type_str: str) -> DuckDBPyType: ... - def type(self, type_str: str) -> DuckDBPyType: ... - def array_type(self, type: DuckDBPyType, size: int) -> DuckDBPyType: ... - def list_type(self, type: DuckDBPyType) -> DuckDBPyType: ... - def union_type(self, members: DuckDBPyType) -> DuckDBPyType: ... - def string_type(self, collation: str = "") -> DuckDBPyType: ... - def enum_type(self, name: str, type: DuckDBPyType, values: List[Any]) -> DuckDBPyType: ... - def decimal_type(self, width: int, scale: int) -> DuckDBPyType: ... - def struct_type(self, fields: Union[Dict[str, DuckDBPyType], List[str]]) -> DuckDBPyType: ... - def row_type(self, fields: Union[Dict[str, DuckDBPyType], List[str]]) -> DuckDBPyType: ... - def map_type(self, key: DuckDBPyType, value: DuckDBPyType) -> DuckDBPyType: ... - def duplicate(self) -> DuckDBPyConnection: ... - def execute(self, query: object, parameters: object = None) -> DuckDBPyConnection: ... - def executemany(self, query: object, parameters: object = None) -> DuckDBPyConnection: ... - def close(self) -> None: ... - def interrupt(self) -> None: ... - def query_progress(self) -> float: ... - def fetchone(self) -> Optional[tuple]: ... - def fetchmany(self, size: int = 1) -> List[Any]: ... - def fetchall(self) -> List[Any]: ... - def fetchnumpy(self) -> dict: ... - def fetchdf(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... - def fetch_df(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... - def df(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... - def fetch_df_chunk(self, vectors_per_chunk: int = 1, *, date_as_object: bool = False) -> pandas.DataFrame: ... - def pl(self, rows_per_batch: int = 1000000, *, lazy: bool = False) -> polars.DataFrame: ... - def fetch_arrow_table(self, rows_per_batch: int = 1000000) -> pyarrow.lib.Table: ... - def fetch_record_batch(self, rows_per_batch: int = 1000000) -> pyarrow.lib.RecordBatchReader: ... - def arrow(self, rows_per_batch: int = 1000000) -> pyarrow.lib.RecordBatchReader: ... - def torch(self) -> dict: ... - def tf(self) -> dict: ... - def begin(self) -> DuckDBPyConnection: ... - def commit(self) -> DuckDBPyConnection: ... - def rollback(self) -> DuckDBPyConnection: ... - def checkpoint(self) -> DuckDBPyConnection: ... - def append(self, table_name: str, df: pandas.DataFrame, *, by_name: bool = False) -> DuckDBPyConnection: ... - def register(self, view_name: str, python_object: object) -> DuckDBPyConnection: ... - def unregister(self, view_name: str) -> DuckDBPyConnection: ... - def table(self, table_name: str) -> DuckDBPyRelation: ... - def view(self, view_name: str) -> DuckDBPyRelation: ... - def values(self, *args: Union[List[Any], Expression, Tuple[Expression]]) -> DuckDBPyRelation: ... - def table_function(self, name: str, parameters: object = None) -> DuckDBPyRelation: ... - def read_json( - self, - path_or_buffer: Union[str, StringIO, TextIOBase], - *, - columns: Optional[Dict[str, str]] = None, - sample_size: Optional[int] = None, - maximum_depth: Optional[int] = None, - records: Optional[str] = None, - format: Optional[str] = None, - date_format: Optional[str] = None, - timestamp_format: Optional[str] = None, - compression: Optional[str] = None, - maximum_object_size: Optional[int] = None, - ignore_errors: Optional[bool] = None, - convert_strings_to_integers: Optional[bool] = None, - field_appearance_threshold: Optional[float] = None, - map_inference_threshold: Optional[int] = None, - maximum_sample_files: Optional[int] = None, - filename: Optional[Union[bool, str]] = None, - hive_partitioning: Optional[bool] = None, - union_by_name: Optional[bool] = None, - hive_types: Optional[Dict[str, str]] = None, - hive_types_autocast: Optional[bool] = None, - ) -> DuckDBPyRelation: ... - def extract_statements(self, query: str) -> List[Statement]: ... - def sql(self, query: str, *, alias: str = "", params: object = None) -> DuckDBPyRelation: ... - def query(self, query: str, *, alias: str = "", params: object = None) -> DuckDBPyRelation: ... - def from_query(self, query: str, *, alias: str = "", params: object = None) -> DuckDBPyRelation: ... - def read_csv( - self, - path_or_buffer: Union[str, StringIO, TextIOBase], - *, - header: Optional[Union[bool, int]] = None, - compression: Optional[str] = None, - sep: Optional[str] = None, - delimiter: Optional[str] = None, - dtype: Optional[Union[Dict[str, str], List[str]]] = None, - na_values: Optional[Union[str, List[str]]] = None, - skiprows: Optional[int] = None, - quotechar: Optional[str] = None, - escapechar: Optional[str] = None, - encoding: Optional[str] = None, - parallel: Optional[bool] = None, - date_format: Optional[str] = None, - timestamp_format: Optional[str] = None, - sample_size: Optional[int] = None, - all_varchar: Optional[bool] = None, - normalize_names: Optional[bool] = None, - null_padding: Optional[bool] = None, - names: Optional[List[str]] = None, - lineterminator: Optional[str] = None, - columns: Optional[Dict[str, str]] = None, - auto_type_candidates: Optional[List[str]] = None, - max_line_size: Optional[int] = None, - ignore_errors: Optional[bool] = None, - store_rejects: Optional[bool] = None, - rejects_table: Optional[str] = None, - rejects_scan: Optional[str] = None, - rejects_limit: Optional[int] = None, - force_not_null: Optional[List[str]] = None, - buffer_size: Optional[int] = None, - decimal: Optional[str] = None, - allow_quoted_nulls: Optional[bool] = None, - filename: Optional[Union[bool, str]] = None, - hive_partitioning: Optional[bool] = None, - union_by_name: Optional[bool] = None, - hive_types: Optional[Dict[str, str]] = None, - hive_types_autocast: Optional[bool] = None, - ) -> DuckDBPyRelation: ... - def from_csv_auto( - self, - path_or_buffer: Union[str, StringIO, TextIOBase], - *, - header: Optional[Union[bool, int]] = None, - compression: Optional[str] = None, - sep: Optional[str] = None, - delimiter: Optional[str] = None, - dtype: Optional[Union[Dict[str, str], List[str]]] = None, - na_values: Optional[Union[str, List[str]]] = None, - skiprows: Optional[int] = None, - quotechar: Optional[str] = None, - escapechar: Optional[str] = None, - encoding: Optional[str] = None, - parallel: Optional[bool] = None, - date_format: Optional[str] = None, - timestamp_format: Optional[str] = None, - sample_size: Optional[int] = None, - all_varchar: Optional[bool] = None, - normalize_names: Optional[bool] = None, - null_padding: Optional[bool] = None, - names: Optional[List[str]] = None, - lineterminator: Optional[str] = None, - columns: Optional[Dict[str, str]] = None, - auto_type_candidates: Optional[List[str]] = None, - max_line_size: Optional[int] = None, - ignore_errors: Optional[bool] = None, - store_rejects: Optional[bool] = None, - rejects_table: Optional[str] = None, - rejects_scan: Optional[str] = None, - rejects_limit: Optional[int] = None, - force_not_null: Optional[List[str]] = None, - buffer_size: Optional[int] = None, - decimal: Optional[str] = None, - allow_quoted_nulls: Optional[bool] = None, - filename: Optional[Union[bool, str]] = None, - hive_partitioning: Optional[bool] = None, - union_by_name: Optional[bool] = None, - hive_types: Optional[Dict[str, str]] = None, - hive_types_autocast: Optional[bool] = None, - ) -> DuckDBPyRelation: ... - def from_df(self, df: pandas.DataFrame) -> DuckDBPyRelation: ... - def from_arrow(self, arrow_object: object) -> DuckDBPyRelation: ... - def from_parquet( - self, - file_glob: str, - binary_as_string: bool = False, - *, - file_row_number: bool = False, - filename: bool = False, - hive_partitioning: bool = False, - union_by_name: bool = False, - compression: Optional[str] = None, - ) -> DuckDBPyRelation: ... - def read_parquet( - self, - file_glob: str, - binary_as_string: bool = False, - *, - file_row_number: bool = False, - filename: bool = False, - hive_partitioning: bool = False, - union_by_name: bool = False, - compression: Optional[str] = None, - ) -> DuckDBPyRelation: ... - def get_table_names(self, query: str, *, qualified: bool = False) -> Set[str]: ... - def install_extension( - self, - extension: str, - *, - force_install: bool = False, - repository: Optional[str] = None, - repository_url: Optional[str] = None, - version: Optional[str] = None, - ) -> None: ... - def load_extension(self, extension: str) -> None: ... - # END OF CONNECTION METHODS - -class DuckDBPyRelation: - def close(self) -> None: ... - def __getattr__(self, name: str) -> DuckDBPyRelation: ... - def __getitem__(self, name: str) -> DuckDBPyRelation: ... - def __init__(self, *args, **kwargs) -> None: ... - def __contains__(self, name: str) -> bool: ... - def aggregate(self, aggr_expr: str, group_expr: str = ...) -> DuckDBPyRelation: ... - def apply( - self, - function_name: str, - function_aggr: str, - group_expr: str = ..., - function_parameter: str = ..., - projected_columns: str = ..., - ) -> DuckDBPyRelation: ... - def cume_dist(self, window_spec: str, projected_columns: str = ...) -> DuckDBPyRelation: ... - def dense_rank(self, window_spec: str, projected_columns: str = ...) -> DuckDBPyRelation: ... - def percent_rank(self, window_spec: str, projected_columns: str = ...) -> DuckDBPyRelation: ... - def rank(self, window_spec: str, projected_columns: str = ...) -> DuckDBPyRelation: ... - def rank_dense(self, window_spec: str, projected_columns: str = ...) -> DuckDBPyRelation: ... - def row_number(self, window_spec: str, projected_columns: str = ...) -> DuckDBPyRelation: ... - def lag( - self, - column: str, - window_spec: str, - offset: int, - default_value: str, - ignore_nulls: bool, - projected_columns: str = ..., - ) -> DuckDBPyRelation: ... - def lead( - self, - column: str, - window_spec: str, - offset: int, - default_value: str, - ignore_nulls: bool, - projected_columns: str = ..., - ) -> DuckDBPyRelation: ... - def nth_value( - self, column: str, window_spec: str, offset: int, ignore_nulls: bool = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def value_counts(self, column: str, groups: str = ...) -> DuckDBPyRelation: ... - def geomean(self, column: str, groups: str = ..., projected_columns: str = ...) -> DuckDBPyRelation: ... - def first(self, column: str, groups: str = ..., projected_columns: str = ...) -> DuckDBPyRelation: ... - def first_value(self, column: str, window_spec: str = ..., projected_columns: str = ...) -> DuckDBPyRelation: ... - def last(self, column: str, groups: str = ..., projected_columns: str = ...) -> DuckDBPyRelation: ... - def last_value(self, column: str, window_spec: str = ..., projected_columns: str = ...) -> DuckDBPyRelation: ... - def mode(self, aggregation_columns: str, group_columns: str = ...) -> DuckDBPyRelation: ... - def n_tile(self, window_spec: str, num_buckets: int, projected_columns: str = ...) -> DuckDBPyRelation: ... - def quantile_cont( - self, column: str, q: Union[float, List[float]] = ..., groups: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def quantile_disc( - self, column: str, q: Union[float, List[float]] = ..., groups: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def sum(self, sum_aggr: str, group_expr: str = ...) -> DuckDBPyRelation: ... - def any_value( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def arg_max( - self, - arg_column: str, - value_column: str, - groups: str = ..., - window_spec: str = ..., - projected_columns: str = ..., - ) -> DuckDBPyRelation: ... - def arg_min( - self, - arg_column: str, - value_column: str, - groups: str = ..., - window_spec: str = ..., - projected_columns: str = ..., - ) -> DuckDBPyRelation: ... - def avg( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def bit_and( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def bit_or( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def bit_xor( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def bitstring_agg( - self, - column: str, - min: Optional[int], - max: Optional[int], - groups: str = ..., - window_spec: str = ..., - projected_columns: str = ..., - ) -> DuckDBPyRelation: ... - def bool_and( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def bool_or( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def count( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def favg( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def fsum( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def histogram( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def max( - self, max_aggr: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def min( - self, min_aggr: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def mean( - self, mean_aggr: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def median( - self, median_aggr: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def product( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def quantile( - self, q: str, quantile_aggr: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def std( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def stddev( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def stddev_pop( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def stddev_samp( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def string_agg( - self, column: str, sep: str = ..., groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def var( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def var_pop( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def var_samp( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def variance( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def list( - self, column: str, groups: str = ..., window_spec: str = ..., projected_columns: str = ... - ) -> DuckDBPyRelation: ... - def arrow(self, batch_size: int = ...) -> pyarrow.lib.RecordBatchReader: ... - def __arrow_c_stream__(self, requested_schema: Optional[object] = None) -> object: ... - def create(self, table_name: str) -> None: ... - def create_view(self, view_name: str, replace: bool = ...) -> DuckDBPyRelation: ... - def describe(self) -> DuckDBPyRelation: ... - def df(self, *args, **kwargs) -> pandas.DataFrame: ... - def distinct(self) -> DuckDBPyRelation: ... - def except_(self, other_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... - def execute(self, *args, **kwargs) -> DuckDBPyRelation: ... - def explain(self, type: Optional[Union[Literal["standard", "analyze"], int]] = "standard") -> str: ... - def fetchall(self) -> List[Any]: ... - def fetchmany(self, size: int = ...) -> List[Any]: ... - def fetchnumpy(self) -> dict: ... - def fetchone(self) -> Optional[tuple]: ... - def fetchdf(self, *args, **kwargs) -> Any: ... - def fetch_arrow_reader(self, batch_size: int = ...) -> pyarrow.lib.RecordBatchReader: ... - def fetch_arrow_table(self, rows_per_batch: int = ...) -> pyarrow.lib.Table: ... - def filter(self, filter_expr: Union[Expression, str]) -> DuckDBPyRelation: ... - def insert(self, values: List[Any]) -> None: ... - def update(self, set: Dict[str, Expression], condition: Optional[Expression] = None) -> None: ... - def insert_into(self, table_name: str) -> None: ... - def intersect(self, other_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... - def join( - self, other_rel: DuckDBPyRelation, condition: Union[str, Expression], how: str = ... - ) -> DuckDBPyRelation: ... - def cross(self, other_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... - def limit(self, n: int, offset: int = ...) -> DuckDBPyRelation: ... - def map(self, map_function: function, schema: Optional[Dict[str, DuckDBPyType]] = None) -> DuckDBPyRelation: ... - def order(self, order_expr: str) -> DuckDBPyRelation: ... - def sort(self, *cols: Expression) -> DuckDBPyRelation: ... - def project(self, *cols: Union[str, Expression]) -> DuckDBPyRelation: ... - def select(self, *cols: Union[str, Expression]) -> DuckDBPyRelation: ... - def pl(self, rows_per_batch: int = ..., connection: DuckDBPyConnection = ...) -> polars.DataFrame: ... - def query(self, virtual_table_name: str, sql_query: str) -> DuckDBPyRelation: ... - def record_batch(self, batch_size: int = ...) -> pyarrow.lib.RecordBatchReader: ... - def fetch_record_batch( - self, rows_per_batch: int = 1000000, *, connection: DuckDBPyConnection = ... - ) -> pyarrow.lib.RecordBatchReader: ... - def select_types(self, types: List[Union[str, DuckDBPyType]]) -> DuckDBPyRelation: ... - def select_dtypes(self, types: List[Union[str, DuckDBPyType]]) -> DuckDBPyRelation: ... - def set_alias(self, alias: str) -> DuckDBPyRelation: ... - def show( - self, - max_width: Optional[int] = None, - max_rows: Optional[int] = None, - max_col_width: Optional[int] = None, - null_value: Optional[str] = None, - render_mode: Optional[RenderMode] = None, - ) -> None: ... - def sql_query(self) -> str: ... - def to_arrow_table(self, batch_size: int = ...) -> pyarrow.lib.Table: ... - def to_csv( - self, - file_name: str, - sep: Optional[str] = None, - na_rep: Optional[str] = None, - header: Optional[bool] = None, - quotechar: Optional[str] = None, - escapechar: Optional[str] = None, - date_format: Optional[str] = None, - timestamp_format: Optional[str] = None, - quoting: Optional[Union[str, int]] = None, - encoding: Optional[str] = None, - compression: Optional[str] = None, - write_partition_columns: Optional[bool] = None, - overwrite: Optional[bool] = None, - per_thread_output: Optional[bool] = None, - use_tmp_file: Optional[bool] = None, - partition_by: Optional[List[str]] = None, - ) -> None: ... - def to_df(self, *args, **kwargs) -> pandas.DataFrame: ... - def to_parquet( - self, - file_name: str, - compression: Optional[str] = None, - field_ids: Optional[Union[dict, str]] = None, - row_group_size_bytes: Optional[Union[int, str]] = None, - row_group_size: Optional[int] = None, - partition_by: Optional[List[str]] = None, - write_partition_columns: Optional[bool] = None, - overwrite: Optional[bool] = None, - per_thread_output: Optional[bool] = None, - use_tmp_file: Optional[bool] = None, - append: Optional[bool] = None, - ) -> None: ... - def fetch_df_chunk(self, vectors_per_chunk: int = 1, *, date_as_object: bool = False) -> pandas.DataFrame: ... - def to_table(self, table_name: str) -> None: ... - def to_view(self, view_name: str, replace: bool = ...) -> DuckDBPyRelation: ... - def torch(self, connection: DuckDBPyConnection = ...) -> dict: ... - def tf(self, connection: DuckDBPyConnection = ...) -> dict: ... - def union(self, union_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... - def unique(self, unique_aggr: str) -> DuckDBPyRelation: ... - def write_csv( - self, - file_name: str, - sep: Optional[str] = None, - na_rep: Optional[str] = None, - header: Optional[bool] = None, - quotechar: Optional[str] = None, - escapechar: Optional[str] = None, - date_format: Optional[str] = None, - timestamp_format: Optional[str] = None, - quoting: Optional[Union[str, int]] = None, - encoding: Optional[str] = None, - compression: Optional[str] = None, - write_partition_columns: Optional[bool] = None, - overwrite: Optional[bool] = None, - per_thread_output: Optional[bool] = None, - use_tmp_file: Optional[bool] = None, - partition_by: Optional[List[str]] = None, - ) -> None: ... - def write_parquet( - self, - file_name: str, - compression: Optional[str] = None, - field_ids: Optional[Union[dict, str]] = None, - row_group_size_bytes: Optional[Union[int, str]] = None, - row_group_size: Optional[int] = None, - partition_by: Optional[List[str]] = None, - write_partition_columns: Optional[bool] = None, - overwrite: Optional[bool] = None, - per_thread_output: Optional[bool] = None, - use_tmp_file: Optional[bool] = None, - append: Optional[bool] = None, - ) -> None: ... - def __len__(self) -> int: ... - @property - def alias(self) -> str: ... - @property - def columns(self) -> List[str]: ... - @property - def dtypes(self) -> List[DuckDBPyType]: ... - @property - def description(self) -> List[Any]: ... - @property - def shape(self) -> tuple[int, int]: ... - @property - def type(self) -> str: ... - @property - def types(self) -> List[DuckDBPyType]: ... - -class Error(Exception): ... -class FatalException(Error): ... - -class HTTPException(IOException): - status_code: int - body: str - reason: str - headers: Dict[str, str] - -class IOException(OperationalError): ... -class IntegrityError(Error): ... -class InternalError(Error): ... -class InternalException(InternalError): ... -class InterruptException(Error): ... -class InvalidInputException(ProgrammingError): ... -class InvalidTypeException(ProgrammingError): ... -class NotImplementedException(NotSupportedError): ... -class NotSupportedError(Error): ... -class OperationalError(Error): ... -class OutOfMemoryException(OperationalError): ... -class OutOfRangeException(DataError): ... -class ParserException(ProgrammingError): ... -class PermissionException(Error): ... -class ProgrammingError(Error): ... -class SequenceException(Error): ... -class SerializationException(OperationalError): ... -class SyntaxException(ProgrammingError): ... -class TransactionException(OperationalError): ... -class TypeMismatchException(DataError): ... -class Warning(Exception): ... - -class token_type: - # stubgen override - these make mypy sad - # __doc__: ClassVar[str] = ... # read-only - # __members__: ClassVar[dict] = ... # read-only - __entries: ClassVar[dict] = ... - comment: ClassVar[token_type] = ... - identifier: ClassVar[token_type] = ... - keyword: ClassVar[token_type] = ... - numeric_const: ClassVar[token_type] = ... - operator: ClassVar[token_type] = ... - string_const: ClassVar[token_type] = ... - def __init__(self, value: int) -> None: ... - def __eq__(self, other: object) -> bool: ... - def __getstate__(self) -> int: ... - def __hash__(self) -> int: ... - # stubgen override - pybind only puts index in python >= 3.8: https://github.com/EricCousineau-TRI/pybind11/blob/54430436/include/pybind11/pybind11.h#L1789 - if sys.version_info >= (3, 7): - def __index__(self) -> int: ... - def __int__(self) -> int: ... - def __ne__(self, other: object) -> bool: ... - def __setstate__(self, state: int) -> None: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - @property - # stubgen override - this gets removed by stubgen but it shouldn't - def __members__(self) -> object: ... - -def connect(database: Union[str, Path] = ..., read_only: bool = ..., config: dict = ...) -> DuckDBPyConnection: ... -def default_connection() -> DuckDBPyConnection: ... -def set_default_connection(connection: DuckDBPyConnection) -> None: ... -def tokenize(query: str) -> List[Any]: ... - -# NOTE: this section is generated by tools/pythonpkg/scripts/generate_connection_wrapper_stubs.py. -# Do not edit this section manually, your changes will be overwritten! - -# START OF CONNECTION WRAPPER -def cursor(*, connection: DuckDBPyConnection = ...) -> DuckDBPyConnection: ... -def register_filesystem(filesystem: fsspec.AbstractFileSystem, *, connection: DuckDBPyConnection = ...) -> None: ... -def unregister_filesystem(name: str, *, connection: DuckDBPyConnection = ...) -> None: ... -def list_filesystems(*, connection: DuckDBPyConnection = ...) -> list: ... -def filesystem_is_registered(name: str, *, connection: DuckDBPyConnection = ...) -> bool: ... -def create_function( - name: str, - function: function, - parameters: Optional[List[DuckDBPyType]] = None, - return_type: Optional[DuckDBPyType] = None, - *, - type: Optional[PythonUDFType] = PythonUDFType.NATIVE, - null_handling: Optional[FunctionNullHandling] = FunctionNullHandling.DEFAULT, - exception_handling: Optional[PythonExceptionHandling] = PythonExceptionHandling.DEFAULT, - side_effects: bool = False, - connection: DuckDBPyConnection = ..., -) -> DuckDBPyConnection: ... -def remove_function(name: str, *, connection: DuckDBPyConnection = ...) -> DuckDBPyConnection: ... -def sqltype(type_str: str, *, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... -def dtype(type_str: str, *, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... -def type(type_str: str, *, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... -def array_type(type: DuckDBPyType, size: int, *, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... -def list_type(type: DuckDBPyType, *, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... -def union_type(members: DuckDBPyType, *, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... -def string_type(collation: str = "", *, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... -def enum_type( - name: str, type: DuckDBPyType, values: List[Any], *, connection: DuckDBPyConnection = ... -) -> DuckDBPyType: ... -def decimal_type(width: int, scale: int, *, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... -def struct_type( - fields: Union[Dict[str, DuckDBPyType], List[str]], *, connection: DuckDBPyConnection = ... -) -> DuckDBPyType: ... -def row_type( - fields: Union[Dict[str, DuckDBPyType], List[str]], *, connection: DuckDBPyConnection = ... -) -> DuckDBPyType: ... -def map_type(key: DuckDBPyType, value: DuckDBPyType, *, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... -def duplicate(*, connection: DuckDBPyConnection = ...) -> DuckDBPyConnection: ... -def execute( - query: object, parameters: object = None, *, connection: DuckDBPyConnection = ... -) -> DuckDBPyConnection: ... -def executemany( - query: object, parameters: object = None, *, connection: DuckDBPyConnection = ... -) -> DuckDBPyConnection: ... -def close(*, connection: DuckDBPyConnection = ...) -> None: ... -def interrupt(*, connection: DuckDBPyConnection = ...) -> None: ... -def query_progress(*, connection: DuckDBPyConnection = ...) -> float: ... -def fetchone(*, connection: DuckDBPyConnection = ...) -> Optional[tuple]: ... -def fetchmany(size: int = 1, *, connection: DuckDBPyConnection = ...) -> List[Any]: ... -def fetchall(*, connection: DuckDBPyConnection = ...) -> List[Any]: ... -def fetchnumpy(*, connection: DuckDBPyConnection = ...) -> dict: ... -def fetchdf(*, date_as_object: bool = False, connection: DuckDBPyConnection = ...) -> pandas.DataFrame: ... -def fetch_df(*, date_as_object: bool = False, connection: DuckDBPyConnection = ...) -> pandas.DataFrame: ... -def df(*, date_as_object: bool = False, connection: DuckDBPyConnection = ...) -> pandas.DataFrame: ... -def fetch_df_chunk( - vectors_per_chunk: int = 1, *, date_as_object: bool = False, connection: DuckDBPyConnection = ... -) -> pandas.DataFrame: ... -def pl( - rows_per_batch: int = 1000000, *, lazy: bool = False, connection: DuckDBPyConnection = ... -) -> polars.DataFrame: ... -def fetch_arrow_table(rows_per_batch: int = 1000000, *, connection: DuckDBPyConnection = ...) -> pyarrow.lib.Table: ... -def fetch_record_batch( - rows_per_batch: int = 1000000, *, connection: DuckDBPyConnection = ... -) -> pyarrow.lib.RecordBatchReader: ... -def arrow(rows_per_batch: int = 1000000, *, connection: DuckDBPyConnection = ...) -> pyarrow.lib.RecordBatchReader: ... -def torch(*, connection: DuckDBPyConnection = ...) -> dict: ... -def tf(*, connection: DuckDBPyConnection = ...) -> dict: ... -def begin(*, connection: DuckDBPyConnection = ...) -> DuckDBPyConnection: ... -def commit(*, connection: DuckDBPyConnection = ...) -> DuckDBPyConnection: ... -def rollback(*, connection: DuckDBPyConnection = ...) -> DuckDBPyConnection: ... -def checkpoint(*, connection: DuckDBPyConnection = ...) -> DuckDBPyConnection: ... -def append( - table_name: str, df: pandas.DataFrame, *, by_name: bool = False, connection: DuckDBPyConnection = ... -) -> DuckDBPyConnection: ... -def register(view_name: str, python_object: object, *, connection: DuckDBPyConnection = ...) -> DuckDBPyConnection: ... -def unregister(view_name: str, *, connection: DuckDBPyConnection = ...) -> DuckDBPyConnection: ... -def table(table_name: str, *, connection: DuckDBPyConnection = ...) -> DuckDBPyRelation: ... -def view(view_name: str, *, connection: DuckDBPyConnection = ...) -> DuckDBPyRelation: ... -def values( - *args: Union[List[Any], Expression, Tuple[Expression]], connection: DuckDBPyConnection = ... -) -> DuckDBPyRelation: ... -def table_function( - name: str, parameters: object = None, *, connection: DuckDBPyConnection = ... -) -> DuckDBPyRelation: ... -def read_json( - path_or_buffer: Union[str, StringIO, TextIOBase], - *, - columns: Optional[Dict[str, str]] = None, - sample_size: Optional[int] = None, - maximum_depth: Optional[int] = None, - records: Optional[str] = None, - format: Optional[str] = None, - date_format: Optional[str] = None, - timestamp_format: Optional[str] = None, - compression: Optional[str] = None, - maximum_object_size: Optional[int] = None, - ignore_errors: Optional[bool] = None, - convert_strings_to_integers: Optional[bool] = None, - field_appearance_threshold: Optional[float] = None, - map_inference_threshold: Optional[int] = None, - maximum_sample_files: Optional[int] = None, - filename: Optional[Union[bool, str]] = None, - hive_partitioning: Optional[bool] = None, - union_by_name: Optional[bool] = None, - hive_types: Optional[Dict[str, str]] = None, - hive_types_autocast: Optional[bool] = None, - connection: DuckDBPyConnection = ..., -) -> DuckDBPyRelation: ... -def extract_statements(query: str, *, connection: DuckDBPyConnection = ...) -> List[Statement]: ... -def sql( - query: str, *, alias: str = "", params: object = None, connection: DuckDBPyConnection = ... -) -> DuckDBPyRelation: ... -def query( - query: str, *, alias: str = "", params: object = None, connection: DuckDBPyConnection = ... -) -> DuckDBPyRelation: ... -def from_query( - query: str, *, alias: str = "", params: object = None, connection: DuckDBPyConnection = ... -) -> DuckDBPyRelation: ... -def read_csv( - path_or_buffer: Union[str, StringIO, TextIOBase], - *, - header: Optional[Union[bool, int]] = None, - compression: Optional[str] = None, - sep: Optional[str] = None, - delimiter: Optional[str] = None, - dtype: Optional[Union[Dict[str, str], List[str]]] = None, - na_values: Optional[Union[str, List[str]]] = None, - skiprows: Optional[int] = None, - quotechar: Optional[str] = None, - escapechar: Optional[str] = None, - encoding: Optional[str] = None, - parallel: Optional[bool] = None, - date_format: Optional[str] = None, - timestamp_format: Optional[str] = None, - sample_size: Optional[int] = None, - all_varchar: Optional[bool] = None, - normalize_names: Optional[bool] = None, - null_padding: Optional[bool] = None, - names: Optional[List[str]] = None, - lineterminator: Optional[str] = None, - columns: Optional[Dict[str, str]] = None, - auto_type_candidates: Optional[List[str]] = None, - max_line_size: Optional[int] = None, - ignore_errors: Optional[bool] = None, - store_rejects: Optional[bool] = None, - rejects_table: Optional[str] = None, - rejects_scan: Optional[str] = None, - rejects_limit: Optional[int] = None, - force_not_null: Optional[List[str]] = None, - buffer_size: Optional[int] = None, - decimal: Optional[str] = None, - allow_quoted_nulls: Optional[bool] = None, - filename: Optional[Union[bool, str]] = None, - hive_partitioning: Optional[bool] = None, - union_by_name: Optional[bool] = None, - hive_types: Optional[Dict[str, str]] = None, - hive_types_autocast: Optional[bool] = None, - connection: DuckDBPyConnection = ..., -) -> DuckDBPyRelation: ... -def from_csv_auto( - path_or_buffer: Union[str, StringIO, TextIOBase], - *, - header: Optional[Union[bool, int]] = None, - compression: Optional[str] = None, - sep: Optional[str] = None, - delimiter: Optional[str] = None, - dtype: Optional[Union[Dict[str, str], List[str]]] = None, - na_values: Optional[Union[str, List[str]]] = None, - skiprows: Optional[int] = None, - quotechar: Optional[str] = None, - escapechar: Optional[str] = None, - encoding: Optional[str] = None, - parallel: Optional[bool] = None, - date_format: Optional[str] = None, - timestamp_format: Optional[str] = None, - sample_size: Optional[int] = None, - all_varchar: Optional[bool] = None, - normalize_names: Optional[bool] = None, - null_padding: Optional[bool] = None, - names: Optional[List[str]] = None, - lineterminator: Optional[str] = None, - columns: Optional[Dict[str, str]] = None, - auto_type_candidates: Optional[List[str]] = None, - max_line_size: Optional[int] = None, - ignore_errors: Optional[bool] = None, - store_rejects: Optional[bool] = None, - rejects_table: Optional[str] = None, - rejects_scan: Optional[str] = None, - rejects_limit: Optional[int] = None, - force_not_null: Optional[List[str]] = None, - buffer_size: Optional[int] = None, - decimal: Optional[str] = None, - allow_quoted_nulls: Optional[bool] = None, - filename: Optional[Union[bool, str]] = None, - hive_partitioning: Optional[bool] = None, - union_by_name: Optional[bool] = None, - hive_types: Optional[Dict[str, str]] = None, - hive_types_autocast: Optional[bool] = None, - connection: DuckDBPyConnection = ..., -) -> DuckDBPyRelation: ... -def from_df(df: pandas.DataFrame, *, connection: DuckDBPyConnection = ...) -> DuckDBPyRelation: ... -def from_arrow(arrow_object: object, *, connection: DuckDBPyConnection = ...) -> DuckDBPyRelation: ... -def from_parquet( - file_glob: str, - binary_as_string: bool = False, - *, - file_row_number: bool = False, - filename: bool = False, - hive_partitioning: bool = False, - union_by_name: bool = False, - compression: Optional[str] = None, - connection: DuckDBPyConnection = ..., -) -> DuckDBPyRelation: ... -def read_parquet( - file_glob: str, - binary_as_string: bool = False, - *, - file_row_number: bool = False, - filename: bool = False, - hive_partitioning: bool = False, - union_by_name: bool = False, - compression: Optional[str] = None, - connection: DuckDBPyConnection = ..., -) -> DuckDBPyRelation: ... -def get_table_names(query: str, *, qualified: bool = False, connection: DuckDBPyConnection = ...) -> Set[str]: ... -def install_extension( - extension: str, - *, - force_install: bool = False, - repository: Optional[str] = None, - repository_url: Optional[str] = None, - version: Optional[str] = None, - connection: DuckDBPyConnection = ..., -) -> None: ... -def load_extension(extension: str, *, connection: DuckDBPyConnection = ...) -> None: ... -def project( - df: pandas.DataFrame, *args: str, groups: str = "", connection: DuckDBPyConnection = ... -) -> DuckDBPyRelation: ... -def distinct(df: pandas.DataFrame, *, connection: DuckDBPyConnection = ...) -> DuckDBPyRelation: ... -def write_csv( - df: pandas.DataFrame, - filename: str, - *, - sep: Optional[str] = None, - na_rep: Optional[str] = None, - header: Optional[bool] = None, - quotechar: Optional[str] = None, - escapechar: Optional[str] = None, - date_format: Optional[str] = None, - timestamp_format: Optional[str] = None, - quoting: Optional[Union[str, int]] = None, - encoding: Optional[str] = None, - compression: Optional[str] = None, - overwrite: Optional[bool] = None, - per_thread_output: Optional[bool] = None, - use_tmp_file: Optional[bool] = None, - partition_by: Optional[List[str]] = None, - write_partition_columns: Optional[bool] = None, - connection: DuckDBPyConnection = ..., -) -> None: ... -def aggregate( - df: pandas.DataFrame, - aggr_expr: Union[str, List[Expression]], - group_expr: str = "", - *, - connection: DuckDBPyConnection = ..., -) -> DuckDBPyRelation: ... -def alias(df: pandas.DataFrame, alias: str, *, connection: DuckDBPyConnection = ...) -> DuckDBPyRelation: ... -def filter(df: pandas.DataFrame, filter_expr: str, *, connection: DuckDBPyConnection = ...) -> DuckDBPyRelation: ... -def limit( - df: pandas.DataFrame, n: int, offset: int = 0, *, connection: DuckDBPyConnection = ... -) -> DuckDBPyRelation: ... -def order(df: pandas.DataFrame, order_expr: str, *, connection: DuckDBPyConnection = ...) -> DuckDBPyRelation: ... -def query_df( - df: pandas.DataFrame, virtual_table_name: str, sql_query: str, *, connection: DuckDBPyConnection = ... -) -> DuckDBPyRelation: ... -def description(*, connection: DuckDBPyConnection = ...) -> Optional[List[Any]]: ... -def rowcount(*, connection: DuckDBPyConnection = ...) -> int: ... - -# END OF CONNECTION WRAPPER diff --git a/duckdb/_dbapi_type_object.py b/duckdb/_dbapi_type_object.py new file mode 100644 index 00000000..ed73760d --- /dev/null +++ b/duckdb/_dbapi_type_object.py @@ -0,0 +1,231 @@ +"""DuckDB DB API 2.0 Type Objects Module. + +This module provides DB API 2.0 compliant type objects for DuckDB, allowing applications +to check column types returned by queries against standard database API categories. + +Example: + >>> import duckdb + >>> + >>> conn = duckdb.connect() + >>> cursor = conn.cursor() + >>> cursor.execute("SELECT 'hello' as text_col, 42 as num_col, CURRENT_DATE as date_col") + >>> + >>> # Check column types using DB API type objects + >>> for i, desc in enumerate(cursor.description): + >>> col_name, col_type = desc[0], desc[1] + >>> if col_type == duckdb.STRING: + >>> print(f"{col_name} is a string type") + >>> elif col_type == duckdb.NUMBER: + >>> print(f"{col_name} is a numeric type") + >>> elif col_type == duckdb.DATETIME: + >>> print(f"{col_name} is a date/time type") + +See Also: + - PEP 249: https://peps.python.org/pep-0249/ + - DuckDB Type System: https://duckdb.org/docs/sql/data_types/overview +""" + +from duckdb import sqltypes + + +class DBAPITypeObject: + """DB API 2.0 type object for categorizing database column types. + + This class implements the type objects defined in PEP 249 (DB API 2.0). + It allows checking whether a specific DuckDB type belongs to a broader + category like STRING, NUMBER, DATETIME, etc. + + The type object supports equality comparison with DuckDBPyType instances, + returning True if the type belongs to this category. + + Args: + types: A list of DuckDBPyType instances that belong to this type category. + + Example: + >>> string_types = DBAPITypeObject([sqltypes.VARCHAR, sqltypes.CHAR]) + >>> result = sqltypes.VARCHAR == string_types # True + >>> result = sqltypes.INTEGER == string_types # False + + Note: + This follows the DB API 2.0 specification where type objects are compared + using equality operators rather than isinstance() checks. + """ + + def __init__(self, types: list[sqltypes.DuckDBPyType]) -> None: + """Initialize a DB API type object. + + Args: + types: List of DuckDB types that belong to this category. + """ + self.types = types + + def __eq__(self, other: object) -> bool: + """Check if a DuckDB type belongs to this type category. + + This method implements the DB API 2.0 type checking mechanism. + It returns True if the other object is a DuckDBPyType that + is contained in this type category. + + Args: + other: The object to compare, typically a DuckDBPyType instance. + + Returns: + True if other is a DuckDBPyType in this category, False otherwise. + + Example: + >>> NUMBER == sqltypes.INTEGER # True + >>> NUMBER == sqltypes.VARCHAR # False + """ + if isinstance(other, sqltypes.DuckDBPyType): + return other in self.types + return False + + def __repr__(self) -> str: + """Return a string representation of this type object. + + Returns: + A string showing the type object and its contained DuckDB types. + + Example: + >>> repr(STRING) + '' + """ + return f"" + + +# Define the standard DB API 2.0 type objects for DuckDB + +STRING = DBAPITypeObject([sqltypes.VARCHAR]) +""" +STRING type object for text-based database columns. + +This type object represents all string/text types in DuckDB. Currently includes: +- VARCHAR: Variable-length character strings + +Use this to check if a column contains textual data that should be handled +as Python strings. + +DB API 2.0 Reference: + https://peps.python.org/pep-0249/#string + +Example: + >>> cursor.description[0][1] == STRING # Check if first column is text +""" + +NUMBER = DBAPITypeObject( + [ + sqltypes.TINYINT, + sqltypes.UTINYINT, + sqltypes.SMALLINT, + sqltypes.USMALLINT, + sqltypes.INTEGER, + sqltypes.UINTEGER, + sqltypes.BIGINT, + sqltypes.UBIGINT, + sqltypes.HUGEINT, + sqltypes.UHUGEINT, + sqltypes.DuckDBPyType("BIGNUM"), + sqltypes.DuckDBPyType("DECIMAL"), + sqltypes.FLOAT, + sqltypes.DOUBLE, + ] +) +""" +NUMBER type object for numeric database columns. + +This type object represents all numeric types in DuckDB, including: + +Integer Types: +- TINYINT, UTINYINT: 8-bit signed/unsigned integers +- SMALLINT, USMALLINT: 16-bit signed/unsigned integers +- INTEGER, UINTEGER: 32-bit signed/unsigned integers +- BIGINT, UBIGINT: 64-bit signed/unsigned integers +- HUGEINT, UHUGEINT: 128-bit signed/unsigned integers + +Decimal Types: +- BIGNUM: Arbitrary precision integers +- DECIMAL: Fixed-point decimal numbers + +Floating Point Types: +- FLOAT: 32-bit floating point +- DOUBLE: 64-bit floating point + +Use this to check if a column contains numeric data that should be handled +as Python int, float, or Decimal objects. + +DB API 2.0 Reference: + https://peps.python.org/pep-0249/#number + +Example: + >>> cursor.description[1][1] == NUMBER # Check if second column is numeric +""" + +DATETIME = DBAPITypeObject( + [ + sqltypes.DATE, + sqltypes.TIME, + sqltypes.TIME_TZ, + sqltypes.TIMESTAMP, + sqltypes.TIMESTAMP_TZ, + sqltypes.TIMESTAMP_NS, + sqltypes.TIMESTAMP_MS, + sqltypes.TIMESTAMP_S, + ] +) +""" +DATETIME type object for date and time database columns. + +This type object represents all date/time types in DuckDB, including: + +Date Types: +- DATE: Calendar dates (year, month, day) + +Time Types: +- TIME: Time of day without timezone +- TIME_TZ: Time of day with timezone + +Timestamp Types: +- TIMESTAMP: Date and time without timezone (microsecond precision) +- TIMESTAMP_TZ: Date and time with timezone +- TIMESTAMP_NS: Nanosecond precision timestamps +- TIMESTAMP_MS: Millisecond precision timestamps +- TIMESTAMP_S: Second precision timestamps + +Use this to check if a column contains temporal data that should be handled +as Python datetime, date, or time objects. + +DB API 2.0 Reference: + https://peps.python.org/pep-0249/#datetime + +Example: + >>> cursor.description[2][1] == DATETIME # Check if third column is date/time +""" + +BINARY = DBAPITypeObject([sqltypes.BLOB]) +""" +BINARY type object for binary data database columns. + +This type object represents binary data types in DuckDB: +- BLOB: Binary Large Objects for storing arbitrary binary data + +Use this to check if a column contains binary data that should be handled +as Python bytes objects. + +DB API 2.0 Reference: + https://peps.python.org/pep-0249/#binary + +Example: + >>> cursor.description[3][1] == BINARY # Check if fourth column is binary +""" + +ROWID = None +""" +ROWID type object for row identifier columns. + +DB API 2.0 Reference: + https://peps.python.org/pep-0249/#rowid + +Note: + This will always be None for DuckDB connections. Applications should not + rely on ROWID functionality when using DuckDB. +""" diff --git a/duckdb/_version.py b/duckdb/_version.py new file mode 100644 index 00000000..165bdef2 --- /dev/null +++ b/duckdb/_version.py @@ -0,0 +1,22 @@ +# ---------------------------------------------------------------------- +# Version API +# +# We provide three symbols: +# - duckdb.__version__: The version of this package +# - duckdb.__duckdb_version__: The version of duckdb that is bundled +# - duckdb.version(): A human-readable version string containing both of the above +# ---------------------------------------------------------------------- +from importlib.metadata import version as _dist_version + +import _duckdb + +__version__: str = _dist_version("duckdb") +"""Version of the DuckDB Python Package.""" + +__duckdb_version__: str = _duckdb.__version__ +"""Version of DuckDB that is bundled.""" + + +def version() -> str: + """Human-friendly formatted version string of both the distribution package and the bundled DuckDB engine.""" + return f"{__version__} (with duckdb {_duckdb.__version__})" diff --git a/duckdb/bytes_io_wrapper.py b/duckdb/bytes_io_wrapper.py index 4ced78e8..722c7cb4 100644 --- a/duckdb/bytes_io_wrapper.py +++ b/duckdb/bytes_io_wrapper.py @@ -1,7 +1,5 @@ -from io import StringIO, TextIOBase # noqa: D100 -from typing import Any, Union +"""StringIO buffer wrapper. -""" BSD 3-Clause License Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team @@ -35,10 +33,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ +from io import StringIO, TextIOBase +from typing import Any, Union + + +class BytesIOWrapper: + """Wrapper that wraps a StringIO buffer and reads bytes from it. + + Created for compat with pyarrow read_csv. + """ -class BytesIOWrapper: # noqa: D101 - # Wrapper that wraps a StringIO buffer and reads bytes from it - # Created for compat with pyarrow read_csv def __init__(self, buffer: Union[StringIO, TextIOBase], encoding: str = "utf-8") -> None: # noqa: D107 self.buffer = buffer self.encoding = encoding diff --git a/duckdb/experimental/spark/sql/column.py b/duckdb/experimental/spark/sql/column.py index 9aae0d67..661e4da7 100644 --- a/duckdb/experimental/spark/sql/column.py +++ b/duckdb/experimental/spark/sql/column.py @@ -8,7 +8,7 @@ from ._typing import DateTimeLiteral, DecimalLiteral, LiteralType from duckdb import ColumnExpression, ConstantExpression, Expression, FunctionExpression -from duckdb.typing import DuckDBPyType +from duckdb.sqltypes import DuckDBPyType __all__ = ["Column"] diff --git a/duckdb/experimental/spark/sql/type_utils.py b/duckdb/experimental/spark/sql/type_utils.py index 7aa73eec..90dac658 100644 --- a/duckdb/experimental/spark/sql/type_utils.py +++ b/duckdb/experimental/spark/sql/type_utils.py @@ -1,6 +1,6 @@ from typing import cast # noqa: D100 -from duckdb.typing import DuckDBPyType +from duckdb.sqltypes import DuckDBPyType from .types import ( ArrayType, diff --git a/duckdb/experimental/spark/sql/types.py b/duckdb/experimental/spark/sql/types.py index 61256366..856885e9 100644 --- a/duckdb/experimental/spark/sql/types.py +++ b/duckdb/experimental/spark/sql/types.py @@ -22,7 +22,7 @@ ) import duckdb -from duckdb.typing import DuckDBPyType +from duckdb.sqltypes import DuckDBPyType from ..exception import ContributionsAcceptedError diff --git a/duckdb/filesystem.py b/duckdb/filesystem.py index 1bfd4bb5..cc082efb 100644 --- a/duckdb/filesystem.py +++ b/duckdb/filesystem.py @@ -1,5 +1,12 @@ -from io import TextIOBase # noqa: D100 -from typing import IO +"""In-memory filesystem to store ephemeral dependencies. + +Warning: Not for external use. May change at any moment. Likely to be made internal. +""" + +from __future__ import annotations + +import io +import typing from fsspec import AbstractFileSystem from fsspec.implementations.memory import MemoryFile, MemoryFileSystem @@ -7,22 +14,20 @@ from .bytes_io_wrapper import BytesIOWrapper -def is_file_like(obj) -> bool: # noqa: D103, ANN001 - # We only care that we can read from the file - return hasattr(obj, "read") and hasattr(obj, "seek") +class ModifiedMemoryFileSystem(MemoryFileSystem): + """In-memory filesystem implementation that uses its own protocol.""" - -class ModifiedMemoryFileSystem(MemoryFileSystem): # noqa: D101 protocol = ("DUCKDB_INTERNAL_OBJECTSTORE",) # defer to the original implementation that doesn't hardcode the protocol - _strip_protocol = classmethod(AbstractFileSystem._strip_protocol.__func__) + _strip_protocol: typing.Callable[[str], str] = classmethod(AbstractFileSystem._strip_protocol.__func__) # type: ignore[assignment] - def add_file(self, object: IO, path: str) -> None: # noqa: D102 - if not is_file_like(object): + def add_file(self, obj: io.IOBase | BytesIOWrapper | object, path: str) -> None: + """Add a file to the filesystem.""" + if not (hasattr(obj, "read") and hasattr(obj, "seek")): msg = "Can not read from a non file-like object" - raise ValueError(msg) - path = self._strip_protocol(path) - if isinstance(object, TextIOBase): + raise TypeError(msg) + if isinstance(obj, io.TextIOBase): # Wrap this so that we can return a bytes object from 'read' - object = BytesIOWrapper(object) - self.store[path] = MemoryFile(self, path, object.read()) + obj = BytesIOWrapper(obj) + path = self._strip_protocol(path) + self.store[path] = MemoryFile(self, path, obj.read()) diff --git a/duckdb/func/__init__.py b/duckdb/func/__init__.py new file mode 100644 index 00000000..5d73f490 --- /dev/null +++ b/duckdb/func/__init__.py @@ -0,0 +1,3 @@ +from _duckdb._func import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType # noqa: D104 + +__all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"] diff --git a/duckdb/functional/__init__.py b/duckdb/functional/__init__.py index a1d69d39..5114629b 100644 --- a/duckdb/functional/__init__.py +++ b/duckdb/functional/__init__.py @@ -1,3 +1,13 @@ -from _duckdb.functional import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType # noqa: D104 +"""DuckDB function constants and types. DEPRECATED: please use `duckdb.func` instead.""" + +import warnings + +from duckdb.func import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType __all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"] + +warnings.warn( + "`duckdb.functional` is deprecated and will be removed in a future version. Please use `duckdb.func` instead.", + DeprecationWarning, + stacklevel=2, +) diff --git a/duckdb/functional/__init__.pyi b/duckdb/functional/__init__.pyi deleted file mode 100644 index 33ea33fa..00000000 --- a/duckdb/functional/__init__.pyi +++ /dev/null @@ -1,31 +0,0 @@ -from typing import Dict - -SPECIAL: FunctionNullHandling -DEFAULT: FunctionNullHandling - -NATIVE: PythonUDFType -ARROW: PythonUDFType - -class FunctionNullHandling: - DEFAULT: FunctionNullHandling - SPECIAL: FunctionNullHandling - def __int__(self) -> int: ... - def __index__(self) -> int: ... - @property - def __members__(self) -> Dict[str, FunctionNullHandling]: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - -class PythonUDFType: - NATIVE: PythonUDFType - ARROW: PythonUDFType - def __int__(self) -> int: ... - def __index__(self) -> int: ... - @property - def __members__(self) -> Dict[str, PythonUDFType]: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... diff --git a/duckdb/polars_io.py b/duckdb/polars_io.py index 56bf743c..f43e0afd 100644 --- a/duckdb/polars_io.py +++ b/duckdb/polars_io.py @@ -1,17 +1,24 @@ -import datetime # noqa: D100 +from __future__ import annotations # noqa: D100 + +import datetime import json -from collections.abc import Iterator +import typing from decimal import Decimal -from typing import Optional import polars as pl from polars.io.plugins import register_io_source import duckdb -from duckdb import SQLExpression +if typing.TYPE_CHECKING: + from collections.abc import Iterator + + import typing_extensions + +_ExpressionTree: typing_extensions.TypeAlias = typing.Dict[str, typing.Union[str, int, "_ExpressionTree", typing.Any]] # noqa: UP006 -def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]: + +def _predicate_to_expression(predicate: pl.Expr) -> duckdb.Expression | None: """Convert a Polars predicate expression to a DuckDB-compatible SQL expression. Parameters: @@ -31,7 +38,7 @@ def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]: try: # Convert the tree to SQL sql_filter = _pl_tree_to_sql(tree) - return SQLExpression(sql_filter) + return duckdb.SQLExpression(sql_filter) except Exception: # If the conversion fails, we return None return None @@ -70,7 +77,7 @@ def _escape_sql_identifier(identifier: str) -> str: return f'"{escaped}"' -def _pl_tree_to_sql(tree: dict) -> str: +def _pl_tree_to_sql(tree: _ExpressionTree) -> str: """Recursively convert a Polars expression tree (as JSON) to a SQL string. Parameters: @@ -91,38 +98,51 @@ def _pl_tree_to_sql(tree: dict) -> str: Output: "(foo > 5)" """ [node_type] = tree.keys() - subtree = tree[node_type] if node_type == "BinaryExpr": # Binary expressions: left OP right - return ( - "(" - + " ".join( - ( - _pl_tree_to_sql(subtree["left"]), - _pl_operation_to_sql(subtree["op"]), - _pl_tree_to_sql(subtree["right"]), - ) - ) - + ")" - ) + bin_expr_tree = tree[node_type] + assert isinstance(bin_expr_tree, dict), f"A {node_type} should be a dict but got {type(bin_expr_tree)}" + lhs, op, rhs = bin_expr_tree["left"], bin_expr_tree["op"], bin_expr_tree["right"] + assert isinstance(lhs, dict), f"LHS of a {node_type} should be a dict but got {type(lhs)}" + assert isinstance(op, str), f"The op of a {node_type} should be a str but got {type(op)}" + assert isinstance(rhs, dict), f"RHS of a {node_type} should be a dict but got {type(rhs)}" + return f"({_pl_tree_to_sql(lhs)} {_pl_operation_to_sql(op)} {_pl_tree_to_sql(rhs)})" if node_type == "Column": # A reference to a column name # Wrap in quotes to handle special characters - return _escape_sql_identifier(subtree) + col_name = tree[node_type] + assert isinstance(col_name, str), f"The col name of a {node_type} should be a str but got {type(col_name)}" + return _escape_sql_identifier(col_name) if node_type in ("Literal", "Dyn"): # Recursively process dynamic or literal values - return _pl_tree_to_sql(subtree) + val_tree = tree[node_type] + assert isinstance(val_tree, dict), f"A {node_type} should be a dict but got {type(val_tree)}" + return _pl_tree_to_sql(val_tree) if node_type == "Int": # Direct integer literals - return str(subtree) + int_literal = tree[node_type] + assert isinstance(int_literal, (int, str)), ( + f"The value of an Int should be an int or str but got {type(int_literal)}" + ) + return str(int_literal) if node_type == "Function": # Handle boolean functions like IsNull, IsNotNull - inputs = subtree["input"] - func_dict = subtree["function"] + func_tree = tree[node_type] + assert isinstance(func_tree, dict), f"A {node_type} should be a dict but got {type(func_tree)}" + inputs = func_tree["input"] + assert isinstance(inputs, list), f"A {node_type} should have a list of dicts as input but got {type(inputs)}" + input_tree = inputs[0] + assert isinstance(input_tree, dict), ( + f"A {node_type} should have a list of dicts as input but got {type(input_tree)}" + ) + func_dict = func_tree["function"] + assert isinstance(func_dict, dict), ( + f"A {node_type} should have a function dict as input but got {type(func_dict)}" + ) if "Boolean" in func_dict: func = func_dict["Boolean"] @@ -140,24 +160,31 @@ def _pl_tree_to_sql(tree: dict) -> str: if node_type == "Scalar": # Detect format: old style (dtype/value) or new style (direct type key) - if "dtype" in subtree and "value" in subtree: - dtype = str(subtree["dtype"]) - value = subtree["value"] + scalar_tree = tree[node_type] + assert isinstance(scalar_tree, dict), f"A {node_type} should be a dict but got {type(scalar_tree)}" + if "dtype" in scalar_tree and "value" in scalar_tree: + dtype = str(scalar_tree["dtype"]) + value = scalar_tree["value"] else: # New style: dtype is the single key in the dict - dtype = next(iter(subtree.keys())) - value = subtree + dtype = next(iter(scalar_tree.keys())) + value = scalar_tree + assert isinstance(dtype, str), f"A {node_type} should have a str dtype but got {type(dtype)}" + assert isinstance(value, dict), f"A {node_type} should have a dict value but got {type(value)}" # Decimal support if dtype.startswith("{'Decimal'") or dtype == "Decimal": decimal_value = value["Decimal"] - decimal_value = Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1]) - return str(decimal_value) + assert isinstance(decimal_value, list), ( + f"A {dtype} should be a two member list but got {type(decimal_value)}" + ) + return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1])) # Datetime with microseconds since epoch if dtype.startswith("{'Datetime'") or dtype == "Datetime": - micros = value["Datetime"][0] - dt_timestamp = datetime.datetime.fromtimestamp(micros / 1_000_000, tz=datetime.UTC) + micros = value["Datetime"] + assert isinstance(micros, list), f"A {dtype} should be a one member list but got {type(micros)}" + dt_timestamp = datetime.datetime.fromtimestamp(micros[0] / 1_000_000, tz=datetime.timezone.utc) return f"'{dt_timestamp!s}'::TIMESTAMP" # Match simple numeric/boolean types @@ -179,6 +206,7 @@ def _pl_tree_to_sql(tree: dict) -> str: # Time type if dtype == "Time": nanoseconds = value["Time"] + assert isinstance(nanoseconds, int), f"A {dtype} should be an int but got {type(nanoseconds)}" seconds = nanoseconds // 1_000_000_000 microseconds = (nanoseconds % 1_000_000_000) // 1_000 dt_time = (datetime.datetime.min + datetime.timedelta(seconds=seconds, microseconds=microseconds)).time() @@ -187,25 +215,30 @@ def _pl_tree_to_sql(tree: dict) -> str: # Date type if dtype == "Date": days_since_epoch = value["Date"] + assert isinstance(days_since_epoch, (float, int)), ( + f"A {dtype} should be a number but got {type(days_since_epoch)}" + ) date = datetime.date(1970, 1, 1) + datetime.timedelta(days=days_since_epoch) return f"'{date}'::DATE" # Binary type if dtype == "Binary": - binary_data = bytes(value["Binary"]) + bin_value = value["Binary"] + assert isinstance(bin_value, list), f"A {dtype} should be a list but got {type(bin_value)}" + binary_data = bytes(bin_value) escaped = "".join(f"\\x{b:02x}" for b in binary_data) return f"'{escaped}'::BLOB" # String type if dtype == "String" or dtype == "StringOwned": # Some new formats may store directly under StringOwned - string_val = value.get("StringOwned", value.get("String", None)) + string_val: object | None = value.get("StringOwned", value.get("String", None)) return f"'{string_val}'" msg = f"Unsupported scalar type {dtype!s}, with value {value}" raise NotImplementedError(msg) - msg = f"Node type: {node_type} is not implemented. {subtree}" + msg = f"Node type: {node_type} is not implemented. {tree[node_type]}" raise NotImplementedError(msg) @@ -213,10 +246,10 @@ def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) - """A polars IO plugin for DuckDB.""" def source_generator( - with_columns: Optional[list[str]], - predicate: Optional[pl.Expr], - n_rows: Optional[int], - batch_size: Optional[int], + with_columns: list[str] | None, + predicate: pl.Expr | None, + n_rows: int | None, + batch_size: int | None, ) -> Iterator[pl.DataFrame]: duck_predicate = None relation_final = relation @@ -239,8 +272,8 @@ def source_generator( for record_batch in iter(results.read_next_batch, None): if predicate is not None and duck_predicate is None: # We have a predicate, but did not manage to push it down, we fallback here - yield pl.from_arrow(record_batch).filter(predicate) + yield pl.from_arrow(record_batch).filter(predicate) # type: ignore[arg-type,misc] else: - yield pl.from_arrow(record_batch) + yield pl.from_arrow(record_batch) # type: ignore[misc] return register_io_source(source_generator, schema=schema) diff --git a/duckdb/value/__init__.pyi b/duckdb/py.typed similarity index 100% rename from duckdb/value/__init__.pyi rename to duckdb/py.typed diff --git a/duckdb/sqltypes/__init__.py b/duckdb/sqltypes/__init__.py new file mode 100644 index 00000000..38917ce3 --- /dev/null +++ b/duckdb/sqltypes/__init__.py @@ -0,0 +1,63 @@ +"""DuckDB's SQL types.""" + +from _duckdb._sqltypes import ( + BIGINT, + BIT, + BLOB, + BOOLEAN, + DATE, + DOUBLE, + FLOAT, + HUGEINT, + INTEGER, + INTERVAL, + SMALLINT, + SQLNULL, + TIME, + TIME_TZ, + TIMESTAMP, + TIMESTAMP_MS, + TIMESTAMP_NS, + TIMESTAMP_S, + TIMESTAMP_TZ, + TINYINT, + UBIGINT, + UHUGEINT, + UINTEGER, + USMALLINT, + UTINYINT, + UUID, + VARCHAR, + DuckDBPyType, +) + +__all__ = [ + "BIGINT", + "BIT", + "BLOB", + "BOOLEAN", + "DATE", + "DOUBLE", + "FLOAT", + "HUGEINT", + "INTEGER", + "INTERVAL", + "SMALLINT", + "SQLNULL", + "TIME", + "TIMESTAMP", + "TIMESTAMP_MS", + "TIMESTAMP_NS", + "TIMESTAMP_S", + "TIMESTAMP_TZ", + "TIME_TZ", + "TINYINT", + "UBIGINT", + "UHUGEINT", + "UINTEGER", + "USMALLINT", + "UTINYINT", + "UUID", + "VARCHAR", + "DuckDBPyType", +] diff --git a/duckdb/typing/__init__.py b/duckdb/typing/__init__.py index 8e2e092d..4c29047b 100644 --- a/duckdb/typing/__init__.py +++ b/duckdb/typing/__init__.py @@ -1,4 +1,8 @@ -from _duckdb.typing import ( # noqa: D104 +"""DuckDB's SQL types. DEPRECATED. Please use `duckdb.sqltypes` instead.""" + +import warnings + +from duckdb.sqltypes import ( BIGINT, BIT, BLOB, @@ -59,3 +63,9 @@ "VARCHAR", "DuckDBPyType", ] + +warnings.warn( + "`duckdb.typing` is deprecated and will be removed in a future version. Please use `duckdb.sqltypes` instead.", + DeprecationWarning, + stacklevel=2, +) diff --git a/duckdb/typing/__init__.pyi b/duckdb/typing/__init__.pyi deleted file mode 100644 index 8a3cef79..00000000 --- a/duckdb/typing/__init__.pyi +++ /dev/null @@ -1,38 +0,0 @@ -from duckdb import DuckDBPyConnection - -SQLNULL: DuckDBPyType -BOOLEAN: DuckDBPyType -TINYINT: DuckDBPyType -UTINYINT: DuckDBPyType -SMALLINT: DuckDBPyType -USMALLINT: DuckDBPyType -INTEGER: DuckDBPyType -UINTEGER: DuckDBPyType -BIGINT: DuckDBPyType -UBIGINT: DuckDBPyType -HUGEINT: DuckDBPyType -UHUGEINT: DuckDBPyType -UUID: DuckDBPyType -FLOAT: DuckDBPyType -DOUBLE: DuckDBPyType -DATE: DuckDBPyType -TIMESTAMP: DuckDBPyType -TIMESTAMP_MS: DuckDBPyType -TIMESTAMP_NS: DuckDBPyType -TIMESTAMP_S: DuckDBPyType -TIME: DuckDBPyType -TIME_TZ: DuckDBPyType -TIMESTAMP_TZ: DuckDBPyType -VARCHAR: DuckDBPyType -BLOB: DuckDBPyType -BIT: DuckDBPyType -INTERVAL: DuckDBPyType - -class DuckDBPyType: - def __init__(self, type_str: str, connection: DuckDBPyConnection = ...) -> None: ... - def __repr__(self) -> str: ... - def __eq__(self, other) -> bool: ... - def __getattr__(self, name: str): - DuckDBPyType - def __getitem__(self, name: str): - DuckDBPyType diff --git a/duckdb/udf.py b/duckdb/udf.py index 13b32c41..b15ba709 100644 --- a/duckdb/udf.py +++ b/duckdb/udf.py @@ -1,8 +1,8 @@ # ruff: noqa: D100 -from typing import Callable +import typing -def vectorized(func: Callable) -> Callable: +def vectorized(func: typing.Callable[..., typing.Any]) -> typing.Callable[..., typing.Any]: """Decorate a function with annotated function parameters. This allows DuckDB to infer that the function should be provided with pyarrow arrays and should expect diff --git a/duckdb/value/constant/__init__.py b/duckdb/value/constant/__init__.py index 8857a268..530c6bdc 100644 --- a/duckdb/value/constant/__init__.py +++ b/duckdb/value/constant/__init__.py @@ -1,7 +1,7 @@ # ruff: noqa: D101, D104, D105, D107, ANN401 from typing import Any -from duckdb.typing import ( +from duckdb.sqltypes import ( BIGINT, BIT, BLOB, diff --git a/duckdb/value/constant/__init__.pyi b/duckdb/value/constant/__init__.pyi deleted file mode 100644 index f5190345..00000000 --- a/duckdb/value/constant/__init__.pyi +++ /dev/null @@ -1,114 +0,0 @@ -from duckdb.typing import DuckDBPyType -from typing import Any - -class NullValue(Value): - def __init__(self) -> None: ... - def __repr__(self) -> str: ... - -class BooleanValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class UnsignedBinaryValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class UnsignedShortValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class UnsignedIntegerValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class UnsignedLongValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class BinaryValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class ShortValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class IntegerValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class LongValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class HugeIntegerValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class FloatValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class DoubleValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class DecimalValue(Value): - def __init__(self, object: Any, width: int, scale: int) -> None: ... - def __repr__(self) -> str: ... - -class StringValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class UUIDValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class BitValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class BlobValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class DateValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class IntervalValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class TimestampValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class TimestampSecondValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class TimestampMilisecondValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class TimestampNanosecondValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class TimestampTimeZoneValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class TimeValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class TimeTimeZoneValue(Value): - def __init__(self, object: Any) -> None: ... - def __repr__(self) -> str: ... - -class Value: - def __init__(self, object: Any, type: DuckDBPyType) -> None: ... - def __repr__(self) -> str: ... diff --git a/external/duckdb b/external/duckdb index b8a06e4a..9eccb88e 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit b8a06e4a22672e254cd0baa68a3dbed2eb51c56e +Subproject commit 9eccb88e54dbed8f9d8059f84acb7ee3ae0ac5e9 diff --git a/pyproject.toml b/pyproject.toml index ece5da98..52b63edc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,7 @@ metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" cmake = true packages.duckdb = "duckdb" packages.adbc_driver_duckdb = "adbc_driver_duckdb" +packages._duckdb-stubs = "_duckdb-stubs" [tool.scikit-build.cmake.define] CORE_EXTENSIONS = "core_functions;json;parquet;icu;jemalloc" @@ -135,11 +136,12 @@ include = [ "CMakeLists.txt", "cmake/**", - # Source code + # Source code and stubs "src/**", "duckdb/**", "duckdb_packaging/**", "adbc_driver_duckdb/**", + "_duckdb-stubs/*.pyi", # Generated during sdist build, contains git describe string for duckdb "duckdb_packaging/duckdb_version.txt", @@ -219,6 +221,8 @@ torchvision = [ { index = "pytorch-cpu" } ] [dependency-groups] # used for development only, requires pip >=25.1.0 stubdeps = [ # dependencies used for typehints in the stubs + "pybind11-stubgen", + "mypy", "fsspec", "pandas", "polars", @@ -310,6 +314,32 @@ filterwarnings = [ "ignore:is_datetime64tz_dtype is deprecated:DeprecationWarning", ] +[tool.mypy] +packages = ["duckdb", "_duckdb"] +strict = true +warn_unreachable = true +pretty = true +python_version = "3.9" +exclude = [ + "duckdb/experimental/", # not checking the pyspark API + "duckdb/query_graph/", # old and unmaintained (should probably remove) +] + +[[tool.mypy.overrides]] +module = [ + "fsspec.*", + "pandas", + "polars", + "pyarrow.*", + "torch", + "tensorflow", +] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "duckdb.filesystem" +disallow_subclassing_any = false + [tool.coverage.run] branch = true source = ["duckdb"] diff --git a/src/duckdb_py/functional/functional.cpp b/src/duckdb_py/functional/functional.cpp index 6761a264..252634b1 100644 --- a/src/duckdb_py/functional/functional.cpp +++ b/src/duckdb_py/functional/functional.cpp @@ -3,8 +3,7 @@ namespace duckdb { void DuckDBPyFunctional::Initialize(py::module_ &parent) { - auto m = - parent.def_submodule("functional", "This module contains classes and methods related to functions and udf"); + auto m = parent.def_submodule("_func", "This module contains classes and methods related to functions and udf"); py::enum_(m, "PythonUDFType") .value("NATIVE", duckdb::PythonUDFType::NATIVE) diff --git a/src/duckdb_py/typing/typing.cpp b/src/duckdb_py/typing/typing.cpp index c0e2675e..fe990de1 100644 --- a/src/duckdb_py/typing/typing.cpp +++ b/src/duckdb_py/typing/typing.cpp @@ -39,7 +39,7 @@ static void DefineBaseTypes(py::handle &m) { } void DuckDBPyTyping::Initialize(py::module_ &parent) { - auto m = parent.def_submodule("typing", "This module contains classes and methods related to typing"); + auto m = parent.def_submodule("_sqltypes", "This module contains classes and methods related to typing"); DuckDBPyType::Initialize(m); DefineBaseTypes(m); diff --git a/tests/fast/adbc/test_connection_get_info.py b/tests/fast/adbc/test_connection_get_info.py index 8bc4b97a..aa2b3d32 100644 --- a/tests/fast/adbc/test_connection_get_info.py +++ b/tests/fast/adbc/test_connection_get_info.py @@ -23,7 +23,7 @@ def test_connection_get_info_all(self): expected_result = pa.array( [ "duckdb", - "v" + duckdb.duckdb_version, # don't hardcode this, as it will change every version + "v" + duckdb.__duckdb_version__, # don't hardcode this, as it will change every version "ADBC DuckDB Driver", "(unknown)", "(unknown)", diff --git a/tests/fast/api/test_explain.py b/tests/fast/api/test_explain.py index 2b154321..61ea979c 100644 --- a/tests/fast/api/test_explain.py +++ b/tests/fast/api/test_explain.py @@ -15,9 +15,6 @@ def test_explain_standard(self, duckdb_cursor): res = duckdb_cursor.sql("select 42").explain("STANDARD") assert isinstance(res, str) - res = duckdb_cursor.sql("select 42").explain(duckdb.STANDARD) - assert isinstance(res, str) - res = duckdb_cursor.sql("select 42").explain(duckdb.ExplainType.STANDARD) assert isinstance(res, str) diff --git a/tests/fast/api/test_read_csv.py b/tests/fast/api/test_read_csv.py index 9d8e518b..e7862e9b 100644 --- a/tests/fast/api/test_read_csv.py +++ b/tests/fast/api/test_read_csv.py @@ -92,7 +92,6 @@ def test_header_true(self, duckdb_cursor): print(res) assert res == (1, "Action", datetime.datetime(2006, 2, 15, 4, 46, 27)) - @pytest.mark.skip(reason="Issue #6011 needs to be fixed first, header=False doesn't work correctly") def test_header_false(self, duckdb_cursor): duckdb_cursor.read_csv(TestFile("category.csv"), header=False) @@ -383,13 +382,13 @@ def read(self, amount=-1): def test_filelike_non_readable(self, duckdb_cursor): _ = pytest.importorskip("fsspec") obj = 5 - with pytest.raises(ValueError, match="Can not read from a non file-like object"): + with pytest.raises(TypeError, match="Can not read from a non file-like object"): duckdb_cursor.read_csv(obj).fetchall() def test_filelike_none(self, duckdb_cursor): _ = pytest.importorskip("fsspec") obj = None - with pytest.raises(ValueError, match="Can not read from a non file-like object"): + with pytest.raises(TypeError, match="Can not read from a non file-like object"): duckdb_cursor.read_csv(obj).fetchall() @pytest.mark.skip(reason="depends on garbage collector behaviour, and sporadically breaks in CI")