Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/packaging_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ jobs:
env:
CIBW_ARCHS: ${{ matrix.platform.arch == 'amd64' && 'AMD64' || matrix.platform.arch }}
CIBW_BUILD: ${{ matrix.python }}-${{ matrix.platform.cibw_system }}_${{ matrix.platform.arch }}

- name: Upload wheel
uses: actions/upload-artifact@v4
with:
Expand Down
1 change: 0 additions & 1 deletion _duckdb-stubs/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1437,7 +1437,6 @@ __interactive__: bool
__jupyter__: bool
__standard_vector_size__: int
__version__: str
_clean_default_connection: pytyping.Any # value = <capsule object>
apilevel: str
paramstyle: str
threadsafety: int
2 changes: 0 additions & 2 deletions duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
__interactive__,
__jupyter__,
__standard_vector_size__,
_clean_default_connection,
aggregate,
alias,
apilevel,
Expand Down Expand Up @@ -292,7 +291,6 @@
"__jupyter__",
"__standard_vector_size__",
"__version__",
"_clean_default_connection",
"aggregate",
"alias",
"apilevel",
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ test = [ # dependencies used for running tests
"pytest",
"pytest-reraise",
"pytest-timeout",
"pytest-run-parallel",
"mypy",
"coverage",
"gcovr; python_version < '3.14'",
Expand Down
1 change: 1 addition & 0 deletions src/duckdb_py/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ add_library(
duckdb_python.cpp
importer.cpp
map.cpp
module_state.cpp
path_like.cpp
pyconnection.cpp
pyexpression.cpp
Expand Down
44 changes: 34 additions & 10 deletions src/duckdb_py/duckdb_python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "duckdb_python/pybind11/conversions/python_udf_type_enum.hpp"
#include "duckdb_python/pybind11/conversions/python_csv_line_terminator_enum.hpp"
#include "duckdb/common/enums/statement_type.hpp"
#include "duckdb_python/module_state.hpp"
#include "duckdb/common/adbc/adbc-init.hpp"

#include "duckdb.hpp"
Expand All @@ -32,6 +33,20 @@ namespace py = pybind11;

namespace duckdb {

// Private function to initialize module state
void InitializeModuleState(py::module_ &m) {
auto state_ptr = new DuckDBPyModuleState();
SetModuleState(state_ptr);

// https://pybind11.readthedocs.io/en/stable/advanced/misc.html#module-destructors
auto capsule = py::capsule(state_ptr, [](void *p) {
auto state = static_cast<DuckDBPyModuleState *>(p);
DuckDBPyModuleState::SetGlobalModuleState(nullptr);
delete state;
});
m.attr("__duckdb_state") = capsule;
}

enum PySQLTokenType : uint8_t {
PY_SQL_TOKEN_IDENTIFIER = 0,
PY_SQL_TOKEN_NUMERIC_CONSTANT,
Expand Down Expand Up @@ -1031,7 +1046,21 @@ PYBIND11_EXPORT void *_force_symbol_inclusion() {
}
};

PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m) { // NOLINT
// Only mark mod_gil_not_used for 3.14t or later
// This is to not add support for 3.13t
// Py_GIL_DISABLED check is not strictly necessary
#if defined(Py_GIL_DISABLED) && PY_VERSION_HEX >= 0x030e0000
PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m, py::mod_gil_not_used(),
py::multiple_interpreters::not_supported()) { // NOLINT
#else
PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m,
py::multiple_interpreters::not_supported()) { // NOLINT
#endif
// Initialize module state completely during initialization
// PEP 489 wants calls for state to be module local, but currently
// static via g_module_state.
InitializeModuleState(m);

// DO NOT REMOVE: the below forces that we include all symbols we want to export
volatile auto *keep_alive = _force_symbol_inclusion();
(void)keep_alive;
Expand Down Expand Up @@ -1075,9 +1104,10 @@ PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m) { // NOLINT
m.attr("__version__") = std::string(DuckDB::LibraryVersion()).substr(1);
m.attr("__standard_vector_size__") = DuckDB::StandardVectorSize();
m.attr("__git_revision__") = DuckDB::SourceID();
m.attr("__interactive__") = DuckDBPyConnection::DetectAndGetEnvironment();
m.attr("__jupyter__") = DuckDBPyConnection::IsJupyter();
m.attr("__formatted_python_version__") = DuckDBPyConnection::FormattedPythonVersion();
auto &module_state = GetModuleState();
m.attr("__interactive__") = module_state.environment != PythonEnvironmentType::NORMAL;
m.attr("__jupyter__") = module_state.environment == PythonEnvironmentType::JUPYTER;
m.attr("__formatted_python_version__") = module_state.formatted_python_version;
m.def("default_connection", &DuckDBPyConnection::DefaultConnection,
"Retrieve the connection currently registered as the default to be used by the module");
m.def("set_default_connection", &DuckDBPyConnection::SetDefaultConnection,
Expand Down Expand Up @@ -1107,12 +1137,6 @@ PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m) { // NOLINT
.value("keyword", PySQLTokenType::PY_SQL_TOKEN_KEYWORD)
.value("comment", PySQLTokenType::PY_SQL_TOKEN_COMMENT)
.export_values();

// we need this because otherwise we try to remove registered_dfs on shutdown when python is already dead
auto clean_default_connection = []() {
DuckDBPyConnection::Cleanup();
};
m.add_object("_clean_default_connection", py::capsule(clean_default_connection));
}

} // namespace duckdb
63 changes: 63 additions & 0 deletions src/duckdb_py/include/duckdb_python/module_state.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb_python/module_state.hpp
//
//
//===----------------------------------------------------------------------===//

#pragma once

#include "duckdb_python/pybind11/pybind_wrapper.hpp"
#include "duckdb/common/shared_ptr.hpp"
#include "duckdb/main/db_instance_cache.hpp"
#include "duckdb/main/database.hpp"
#include "duckdb_python/import_cache/python_import_cache.hpp"
#include "duckdb_python/pyconnection/pyconnection.hpp"
#include <pybind11/critical_section.h>

namespace duckdb {

// Module state structure to hold per-interpreter state
struct DuckDBPyModuleState {
// Python environment tracking
PythonEnvironmentType environment = PythonEnvironmentType::NORMAL;
string formatted_python_version;

DuckDBPyModuleState();

shared_ptr<DuckDBPyConnection> GetDefaultConnection();
void SetDefaultConnection(shared_ptr<DuckDBPyConnection> connection);
void ClearDefaultConnection();

PythonImportCache *GetImportCache();
void ClearImportCache();

DBInstanceCache *GetInstanceCache();

static DuckDBPyModuleState &GetGlobalModuleState();
static void SetGlobalModuleState(DuckDBPyModuleState *state);

private:
shared_ptr<DuckDBPyConnection> default_connection_ptr;
PythonImportCache import_cache;
DBInstanceCache instance_cache;
#ifdef Py_GIL_DISABLED
py::object default_con_lock;
#endif

// Implemented as static as a first step towards PEP 489 / multi-phase init
// Intent is to move to per-module object, but frequent calls to import_cache
// need to be considered carefully.
// TODO: Replace with non-static per-interpreter state for multi-interpreter support
static DuckDBPyModuleState *g_module_state;

// Non-copyable
DuckDBPyModuleState(const DuckDBPyModuleState &) = delete;
DuckDBPyModuleState &operator=(const DuckDBPyModuleState &) = delete;
};

DuckDBPyModuleState &GetModuleState();
void SetModuleState(DuckDBPyModuleState *state);

} // namespace duckdb
21 changes: 10 additions & 11 deletions src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

namespace duckdb {
struct BoundParameterData;
struct DuckDBPyModuleState;

enum class PythonEnvironmentType { NORMAL, INTERACTIVE, JUPYTER };

Expand Down Expand Up @@ -172,8 +173,7 @@ struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
case_insensitive_set_t registered_objects;

public:
explicit DuckDBPyConnection() {
}
DuckDBPyConnection();
~DuckDBPyConnection();

public:
Expand All @@ -190,9 +190,17 @@ struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
static std::string FormattedPythonVersion();
static shared_ptr<DuckDBPyConnection> DefaultConnection();
static void SetDefaultConnection(shared_ptr<DuckDBPyConnection> conn);
static shared_ptr<DuckDBPyConnection> GetDefaultConnection();
static void ClearDefaultConnection();
static void ClearImportCache();
static PythonImportCache *ImportCache();
static bool IsInteractive();

// Instance methods for optimized module state access
bool IsJupyterInstance() const;
bool IsInteractiveInstance() const;
std::string FormattedPythonVersionInstance() const;

unique_ptr<DuckDBPyRelation> ReadCSV(const py::object &name, py::kwargs &kwargs);

py::list ExtractStatements(const string &query);
Expand Down Expand Up @@ -337,11 +345,6 @@ struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
py::list ListFilesystems();
bool FileSystemIsRegistered(const string &name);

//! Default connection to an in-memory database
static DefaultConnectionHolder default_connection;
//! Caches and provides an interface to get frequently used modules+subtypes
static shared_ptr<PythonImportCache> import_cache;

static bool IsPandasDataframe(const py::object &object);
static PyArrowObjectType GetArrowType(const py::handle &obj);
static bool IsAcceptedArrowObject(const py::object &object);
Expand All @@ -357,10 +360,6 @@ struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
bool side_effects);
void RegisterArrowObject(const py::object &arrow_object, const string &name);
vector<unique_ptr<SQLStatement>> GetStatements(const py::object &query);

static PythonEnvironmentType environment;
static std::string formatted_python_version;
static void DetectEnvironment();
};

template <typename T>
Expand Down
128 changes: 128 additions & 0 deletions src/duckdb_py/module_state.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb_python/module_state.cpp
//
//
//===----------------------------------------------------------------------===//

#include "duckdb_python/module_state.hpp"
#include <stdexcept>
#include <chrono>
#include <thread>

#define DEBUG_MODULE_STATE 0

namespace duckdb {

// Forward declaration from pyconnection.cpp
void InstantiateNewInstance(DuckDB &db);

// Static member initialization - required for all static class members in C++
DuckDBPyModuleState *DuckDBPyModuleState::g_module_state = nullptr;

DuckDBPyModuleState::DuckDBPyModuleState() {
// Caches are constructed as direct objects - no heap allocation needed

#ifdef Py_GIL_DISABLED
// Initialize lock object for critical sections
// TODO: Consider moving to finer-grained locks
default_con_lock = py::none();
#endif

// Detects Python environment and version during intialization
// Moved from DuckDBPyConnection::DetectEnvironment()
py::module_ sys = py::module_::import("sys");
py::object version_info = sys.attr("version_info");
int major = py::cast<int>(version_info.attr("major"));
int minor = py::cast<int>(version_info.attr("minor"));
formatted_python_version = std::to_string(major) + "." + std::to_string(minor);

// If __main__ does not have a __file__ attribute, we are in interactive mode
auto main_module = py::module_::import("__main__");
if (!py::hasattr(main_module, "__file__")) {
environment = PythonEnvironmentType::INTERACTIVE;

if (ModuleIsLoaded<IpythonCacheItem>()) {
// Check to see if we are in a Jupyter Notebook
auto get_ipython = import_cache.IPython.get_ipython();
if (get_ipython.ptr() != nullptr) {
auto ipython = get_ipython();
if (py::hasattr(ipython, "config")) {
py::dict ipython_config = ipython.attr("config");
if (ipython_config.contains("IPKernelApp")) {
environment = PythonEnvironmentType::JUPYTER;
}
}
}
}
}
}

DuckDBPyModuleState &DuckDBPyModuleState::GetGlobalModuleState() {
// TODO: Externalize this static cache when adding multi-interpreter support
// For now, single interpreter assumption allows simple static caching
if (!g_module_state) {
throw InternalException("Module state not initialized - call SetGlobalModuleState() during module init");
}
return *g_module_state;
}

void DuckDBPyModuleState::SetGlobalModuleState(DuckDBPyModuleState *state) {
#if DEBUG_MODULE_STATE
printf("DEBUG: SetGlobalModuleState() called - initializing static cache (built: %s %s)\n", __DATE__, __TIME__);
#endif
g_module_state = state;
}

DuckDBPyModuleState &GetModuleState() {
#if DEBUG_MODULE_STATE
printf("DEBUG: GetModuleState() called\n");
#endif
return DuckDBPyModuleState::GetGlobalModuleState();
}

void SetModuleState(DuckDBPyModuleState *state) {
DuckDBPyModuleState::SetGlobalModuleState(state);
}

shared_ptr<DuckDBPyConnection> DuckDBPyModuleState::GetDefaultConnection() {
#if defined(Py_GIL_DISABLED)
// TODO: Consider whether a mutex vs a scoped_critical_section
py::scoped_critical_section guard(default_con_lock);
#endif
// Reproduce exact logic from original DefaultConnectionHolder::Get()
if (!default_connection_ptr || default_connection_ptr->con.ConnectionIsClosed()) {
py::dict config_dict;
default_connection_ptr = DuckDBPyConnection::Connect(py::str(":memory:"), false, config_dict);
}
return default_connection_ptr;
}

void DuckDBPyModuleState::SetDefaultConnection(shared_ptr<DuckDBPyConnection> connection) {
#if defined(Py_GIL_DISABLED)
py::scoped_critical_section guard(default_con_lock);
#endif
default_connection_ptr = std::move(connection);
}

void DuckDBPyModuleState::ClearDefaultConnection() {
#if defined(Py_GIL_DISABLED)
py::scoped_critical_section guard(default_con_lock);
#endif
default_connection_ptr = nullptr;
}

PythonImportCache *DuckDBPyModuleState::GetImportCache() {
return &import_cache;
}

void DuckDBPyModuleState::ClearImportCache() {
import_cache = PythonImportCache();
}

DBInstanceCache *DuckDBPyModuleState::GetInstanceCache() {
return &instance_cache;
}

} // namespace duckdb
Loading
Loading