From 5496746f1aff29c3109cd251cf51b40575fd9ee1 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Sun, 28 Sep 2025 18:20:06 +0800
Subject: [PATCH 01/22] chore: add ChunkCollectorOutputFormat.cpp

---
 programs/local/ChunkCollectorOutputFormat.cpp | 49 +++++++++++++++++++
 programs/local/ChunkCollectorOutputFormat.h   | 44 +++++++++++++++++
 2 files changed, 93 insertions(+)
 create mode 100644 programs/local/ChunkCollectorOutputFormat.cpp
 create mode 100644 programs/local/ChunkCollectorOutputFormat.h
diff --git a/programs/local/ChunkCollectorOutputFormat.cpp b/programs/local/ChunkCollectorOutputFormat.cpp
new file mode 100644
index 00000000000..3e4451dc09d
--- /dev/null
+++ b/programs/local/ChunkCollectorOutputFormat.cpp
@@ -0,0 +1,49 @@
+#include "ChunkCollectorOutputFormat.h"
+#include "PandasDataFrameBuilder.h"
+
+#include <IO/NullWriteBuffer.h>
+#include <Processors/Port.h>
+#include <base/defines.h>
+
+namespace DB
+{
+
+NullWriteBuffer ChunkCollectorOutputFormat::out;
+
+ChunkCollectorOutputFormat::ChunkCollectorOutputFormat(
+    const Block & header,
+    PandasDataFrameBuilder & builder)
+    : IOutputFormat(header, out)
+    , dataframe_builder(builder)
+{}
+
+void ChunkCollectorOutputFormat::consume(Chunk chunk)
+{
+    chunks.emplace_back(std::move(chunk));
+}
+
+void ChunkCollectorOutputFormat::consumeTotals(Chunk totals)
+{
+    chunks.emplace_back(std::move(totals));
+}
+
+void ChunkCollectorOutputFormat::consumeExtremes(Chunk extremes)
+{
+    chunks.emplace_back(std::move(extremes));
+}
+
+void ChunkCollectorOutputFormat::finalizeImpl()
+{
+    // Add all collected chunks to the builder
+    for (const auto & chunk : chunks)
+    {
+        dataframe_builder.addChunk(chunk);
+    }
+
+    // Finalize the DataFrame generation
+    dataframe_builder.finalize();
+
+    chunks.clear();
+}
+
+}
diff --git a/programs/local/ChunkCollectorOutputFormat.h b/programs/local/ChunkCollectorOutputFormat.h
new file mode 100644
index 00000000000..707b2ae6bb3
--- /dev/null
+++ b/programs/local/ChunkCollectorOutputFormat.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <vector>
+#include <Processors/Formats/IOutputFormat.h>
+
+namespace DB
+{
+
+class NullWriteBuffer;
+class PandasDataFrameBuilder;
+
+/// OutputFormat that collects all chunks into memory for further processing
+/// Does not write to WriteBuffer, instead accumulates data for conversion to pandas DataFrame objects
+class ChunkCollectorOutputFormat : public IOutputFormat
+{
+public:
+    ChunkCollectorOutputFormat(const Block & header, PandasDataFrameBuilder & builder);
+
+    String getName() const override { return "ChunkCollectorOutputFormat"; }
+
+    void onCancel() noexcept override
+    {
+        chunks.clear();
+    }
+
+protected:
+    void consume(Chunk chunk) override;
+
+    void consumeTotals(Chunk totals) override;
+
+    void consumeExtremes(Chunk extremes) override;
+
+    void finalizeImpl() override;
+
+private:
+    std::vector<Chunk> chunks;
+
+    PandasDataFrameBuilder & dataframe_builder;
+
+    /// Is not used.
+    static NullWriteBuffer out;
+};
+
+}

From bd998e2b988d014c388daeba3e3f98d16cb04398 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Mon, 29 Sep 2025 15:57:24 +0800
Subject: [PATCH 02/22] chore: adjust Python-C interface to support direct
 DataFrame return

---
 chdb/__init__.py                              | 30 +--------
 chdb/state/sqlitelike.py                      | 44 +------------
 programs/local/CMakeLists.txt                 |  2 +
 programs/local/ChunkCollectorOutputFormat.cpp | 37 +++++++++++
 programs/local/ChunkCollectorOutputFormat.h   | 12 ++++
 programs/local/LocalChdb.cpp                  | 63 ++++++++++++++-----
 programs/local/LocalChdb.h                    |  2 +-
 programs/local/LocalServer.cpp                |  4 +-
 programs/local/QueryResult.h                  |  3 +
 src/Client/ClientBase.cpp                     | 31 +++++++++
 src/Client/ClientBase.h                       | 13 ++++
 11 files changed, 151 insertions(+), 90 deletions(-)

diff --git a/chdb/__init__.py b/chdb/__init__.py
index 0674a46927c..2ad5d96d720 100644
--- a/chdb/__init__.py
+++ b/chdb/__init__.py
@@ -38,9 +38,8 @@ class ChdbError(Exception):
     """
 
 
-_arrow_format = set({"dataframe", "arrowtable"})
+_arrow_format = set({"arrowtable"})
 _process_result_format_funs = {
-    "dataframe": lambda x: to_df(x),
     "arrowtable": lambda x: to_arrowTable(x),
 }
 
@@ -106,33 +105,6 @@ def to_arrowTable(res):
     return pa.RecordBatchFileReader(res.bytes()).read_all()
 
 
-# return pandas dataframe
-def to_df(r):
-    """Convert query result to pandas DataFrame.
-
-    Converts a chDB query result to a pandas DataFrame by first converting to
-    PyArrow Table and then to pandas using multi-threading for better performance.
-
-    Args:
-        r: chDB query result object containing binary Arrow data
-
-    Returns:
-        pd.DataFrame: pandas DataFrame containing the query results
-
-    Raises:
-        ImportError: If pyarrow or pandas are not installed
-
-    Example:
-        >>> result = chdb.query("SELECT 1 as id, 'hello' as msg", "Arrow")
-        >>> df = chdb.to_df(result)
-        >>> print(df)
-           id    msg
-        0   1  hello
-    """
-    t = to_arrowTable(r)
-    return t.to_pandas(use_threads=True)
-
-
 # global connection lock, for multi-threading use of legacy chdb.query()
 g_conn_lock = threading.Lock()
 
diff --git a/chdb/state/sqlitelike.py b/chdb/state/sqlitelike.py
index 7694cb42ece..e9016a8417c 100644
--- a/chdb/state/sqlitelike.py
+++ b/chdb/state/sqlitelike.py
@@ -10,9 +10,8 @@
     raise ImportError("Failed to import pyarrow") from None
 
 
-_arrow_format = set({"dataframe", "arrowtable"})
+_arrow_format = set({"arrowtable"})
 _process_result_format_funs = {
-    "dataframe": lambda x: to_df(x),
     "arrowtable": lambda x: to_arrowTable(x),
 }
 
@@ -65,47 +64,6 @@ def to_arrowTable(res):
     return pa.RecordBatchFileReader(res.bytes()).read_all()
 
 
-# return pandas dataframe
-def to_df(r):
-    """Convert query result to Pandas DataFrame.
-
-    This function converts chdb query results to a Pandas DataFrame format
-    by first converting to PyArrow Table and then to DataFrame. This provides
-    convenient data analysis capabilities with Pandas API.
-
-    Args:
-        r: Query result object from chdb containing Arrow format data
-
-    Returns:
-        pandas.DataFrame: DataFrame containing the query results with
-        appropriate column names and data types
-
-    Raises:
-        ImportError: If pyarrow or pandas packages are not installed
-
-    .. note::
-        This function uses multi-threading for the Arrow to Pandas conversion
-        to improve performance on large datasets.
-
-    .. seealso::
-        :func:`to_arrowTable` - For PyArrow Table format conversion
-
-    Examples:
-        >>> import chdb
-        >>> result = chdb.query("SELECT 1 as num, 'hello' as text", "Arrow")
-        >>> df = to_df(result)
-        >>> print(df)
-           num   text
-        0    1  hello
-        >>> print(df.dtypes)
-        num      int64
-        text    object
-        dtype: object
-    """
-    t = to_arrowTable(r)
-    return t.to_pandas(use_threads=True)
-
-
 class StreamingResult:
     def __init__(self, c_result, conn, result_func, supports_record_batch):
         self._result = c_result
diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt
index 83095fe2dd0..614d70f1cbc 100644
--- a/programs/local/CMakeLists.txt
+++ b/programs/local/CMakeLists.txt
@@ -12,6 +12,7 @@ endif()
 if (USE_PYTHON)
     set (CHDB_SOURCES
         chdb.cpp
+        ChunkCollectorOutputFormat.cpp
         FormatHelper.cpp
         ListScan.cpp
         LocalChdb.cpp
@@ -19,6 +20,7 @@ if (USE_PYTHON)
         NumpyType.cpp
         PandasAnalyzer.cpp
         PandasDataFrame.cpp
+        PandasDataFrameBuilder.cpp
         PandasScan.cpp
         PybindWrapper.cpp
         PythonConversion.cpp
diff --git a/programs/local/ChunkCollectorOutputFormat.cpp b/programs/local/ChunkCollectorOutputFormat.cpp
index 3e4451dc09d..38d31883a60 100644
--- a/programs/local/ChunkCollectorOutputFormat.cpp
+++ b/programs/local/ChunkCollectorOutputFormat.cpp
@@ -3,6 +3,7 @@
 
 #include <IO/NullWriteBuffer.h>
 #include <Processors/Port.h>
+#include <Client/ClientBase.h>
 #include <base/defines.h>
 
 namespace DB
@@ -46,4 +47,40 @@ void ChunkCollectorOutputFormat::finalizeImpl()
     chunks.clear();
 }
 
+/// Global dataframe builder
+static std::unique_ptr<PandasDataFrameBuilder> g_dataframe_builder = nullptr;
+
+PandasDataFrameBuilder * getGlobalDataFrameBuilder()
+{
+    return g_dataframe_builder.get();
+}
+
+void setGlobalDataFrameBuilder(std::unique_ptr<PandasDataFrameBuilder> builder)
+{
+    g_dataframe_builder = std::move(builder);
+}
+
+void resetGlobalDataFrameBuilder()
+{
+    g_dataframe_builder.reset();
+}
+
+/// create ChunkCollectorOutputFormat for use with function pointer
+std::shared_ptr<IOutputFormat> createDataFrameOutputFormat(const Block & header)
+{
+    /// Create a PandasDataFrameBuilder and set it globally
+    auto dataframe_builder = std::make_unique<PandasDataFrameBuilder>(header);
+    PandasDataFrameBuilder * builder_ptr = dataframe_builder.get();
+    setGlobalDataFrameBuilder(std::move(dataframe_builder));
+
+    /// Create and return the format with the builder
+    return std::make_shared<ChunkCollectorOutputFormat>(header, *builder_ptr);
+}
+
+/// Registration function to be called during initialization
+void registerDataFrameOutputFormat()
+{
+    ClientBase::setDataFrameFormatCreator(&createDataFrameOutputFormat);
+}
+
 }
diff --git a/programs/local/ChunkCollectorOutputFormat.h b/programs/local/ChunkCollectorOutputFormat.h
index 707b2ae6bb3..8c588cd9711 100644
--- a/programs/local/ChunkCollectorOutputFormat.h
+++ b/programs/local/ChunkCollectorOutputFormat.h
@@ -41,4 +41,16 @@ class ChunkCollectorOutputFormat : public IOutputFormat
     static NullWriteBuffer out;
 };
 
+/// Registration function to be called during initialization
+void registerDataFrameOutputFormat();
+
+/// Get the global dataframe builder
+PandasDataFrameBuilder * getGlobalDataFrameBuilder();
+
+/// Set the global dataframe builder
+void setGlobalDataFrameBuilder(std::unique_ptr<PandasDataFrameBuilder> builder);
+
+/// Reset the global dataframe builder
+void resetGlobalDataFrameBuilder();
+
 }
diff --git a/programs/local/LocalChdb.cpp b/programs/local/LocalChdb.cpp
index 7d430f75bbf..e40346c4ddb 100644
--- a/programs/local/LocalChdb.cpp
+++ b/programs/local/LocalChdb.cpp
@@ -1,14 +1,13 @@
 #include "LocalChdb.h"
-#include <cstring>
+#include "chdb-internal.h"
+#include "ChunkCollectorOutputFormat.h"
+#include "PandasDataFrameBuilder.h"
 #include "PythonImporter.h"
 #include "PythonTableCache.h"
 #include "StoragePython.h"
-#include "chdb-internal.h"
-#include "chdb.h"
 
 #include <pybind11/pybind11.h>
 #include <pybind11/detail/non_limited_api.h>
-
 #include <Common/logger_useful.h>
 
 namespace py = pybind11;
@@ -76,13 +75,31 @@ chdb_result * queryToBuffer(
 
 // Pybind11 will take over the ownership of the `query_result` object
 // using smart ptr will cause early free of the object
-query_result * query(
+py::object query(
     const std::string & queryStr,
     const std::string & output_format = "CSV",
     const std::string & path = {},
     const std::string & udfPath = {})
 {
-    return new query_result(queryToBuffer(queryStr, output_format, path, udfPath));
+    auto * result = queryToBuffer(queryStr, output_format, path, udfPath);
+
+    if (output_format == "dataframe")
+    {
+        chdb_destroy_query_result(result);
+
+        auto * builder = DB::getGlobalDataFrameBuilder();
+        if (builder && builder->hasData())
+        {
+            return builder->getDataFrame();
+        }
+        else
+        {
+            throw std::runtime_error("DataFrame not available - query may have failed");
+        }
+    }
+
+    // Default behavior - return query_result
+    return py::cast(new query_result(result));
 }
 
 // The `query_result` and `memoryview_wrapper` will hold `local_result_wrapper` with shared_ptr
@@ -260,17 +277,12 @@ void connection_wrapper::commit()
     // do nothing
 }
 
-query_result * connection_wrapper::query(const std::string & query_str, const std::string & format)
+py::object connection_wrapper::query(const std::string & query_str, const std::string & format)
 {
     CHDB::PythonTableCache::findQueryableObjFromQuery(query_str);
 
     py::gil_scoped_release release;
     auto * result = chdb_query_n(*conn, query_str.data(), query_str.size(), format.data(), format.size());
-    if (chdb_result_length(result))
-    {
-        LOG_DEBUG(getLogger("CHDB"), "Empty result returned for query: {}", query_str);
-    }
-
     auto error_msg = CHDB::chdb_result_error_string(result);
     if (!error_msg.empty())
     {
@@ -278,7 +290,28 @@ query_result * connection_wrapper::query(const std::string & query_str, const st
         chdb_destroy_query_result(result);
         throw std::runtime_error(msg_copy);
     }
-    return new query_result(result, false);
+
+    if (format == "dataframe")
+    {
+        chdb_destroy_query_result(result);
+
+        auto * builder = DB::getGlobalDataFrameBuilder();
+        if (builder && builder->hasData())
+        {
+            return builder->getDataFrame();
+        }
+        else
+        {
+            throw std::runtime_error("DataFrame not available - query may have failed");
+        }
+    }
+
+    if (chdb_result_length(result))
+    {
+        LOG_DEBUG(getLogger("CHDB"), "Empty result returned for query: {}", query_str);
+    }
+
+    return py::cast(new query_result(result, false));
 }
 
 streaming_query_result * connection_wrapper::send_query(const std::string & query_str, const std::string & format)
@@ -480,7 +513,7 @@ PYBIND11_MODULE(_chdb, m)
             &connection_wrapper::query,
             py::arg("query_str"),
             py::arg("format") = "CSV",
-            "Execute a query and return a query_result object")
+            "Execute a query and return a query_result object or DataFrame")
         .def(
             "send_query",
             &connection_wrapper::send_query,
@@ -506,7 +539,7 @@ PYBIND11_MODULE(_chdb, m)
         py::kw_only(),
         py::arg("path") = "",
         py::arg("udf_path") = "",
-        "Query chDB and return a query_result object");
+        "Query chDB and return a query_result object or DataFrame");
 
 	auto destroy_import_cache = []()
     {
diff --git a/programs/local/LocalChdb.h b/programs/local/LocalChdb.h
index 5cf30058135..076103cca19 100644
--- a/programs/local/LocalChdb.h
+++ b/programs/local/LocalChdb.h
@@ -30,7 +30,7 @@ class connection_wrapper
     cursor_wrapper * cursor();
     void commit();
     void close();
-    query_result * query(const std::string & query_str, const std::string & format = "CSV");
+    py::object query(const std::string & query_str, const std::string & format = "CSV");
     streaming_query_result * send_query(const std::string & query_str, const std::string & format = "CSV");
     query_result * streaming_fetch_result(streaming_query_result * streaming_result);
     void streaming_cancel_query(streaming_query_result * streaming_result);
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index edf3f67ad20..04023c82b2a 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -1,13 +1,12 @@
 #include "LocalServer.h"
-
 #include "chdb-internal.h"
 #if USE_PYTHON
+#include "ChunkCollectorOutputFormat.h"
 #include "TableFunctionPython.h"
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Storages/StorageFactory.h>
 #endif
 #include <Formats/FormatFactory.h>
-
 #include <sys/resource.h>
 #include <Common/Config/getLocalConfigPath.h>
 #include <Common/logger_useful.h>
@@ -656,6 +655,7 @@ try
 #if USE_PYTHON
         auto & storage_factory = StorageFactory::instance();
         registerStoragePython(storage_factory);
+        registerDataFrameOutputFormat();
 #endif
 
         registerDictionaries();
diff --git a/programs/local/QueryResult.h b/programs/local/QueryResult.h
index ebd79ec042e..bbd924e3931 100644
--- a/programs/local/QueryResult.h
+++ b/programs/local/QueryResult.h
@@ -64,6 +64,9 @@ class MaterializedQueryResult : public QueryResult {
 
     String string()
     {
+        if (!result_buffer)
+            return {};
+
         return String(result_buffer->begin(), result_buffer->end());
     }
 
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 0f1d0c3ae22..87f3c02d1be 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -138,6 +138,9 @@ namespace ErrorCodes
     extern const int CANNOT_WRITE_TO_FILE;
 }
 
+/// Custom DataFrame format creator function pointer
+static CustomOutputFormatCreator g_dataframe_format_creator = nullptr;
+
 }
 
 namespace ProfileEvents
@@ -615,6 +618,22 @@ try
 {
     if (!output_format)
     {
+#if USE_PYTHON
+        if (default_output_format == "dataframe")
+        {
+            auto creator = getDataFrameFormatCreator();
+            if (creator)
+            {
+                output_format = creator(block);
+                return;
+            }
+            else
+            {
+                throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DataFrame output format creator not set");
+            }
+        }
+#endif
+
         /// Ignore all results when fuzzing as they can be huge.
         if (query_fuzzer_runs)
         {
@@ -3755,4 +3774,16 @@ void ClientBase::showClientVersion()
     output_stream << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
 }
 
+#if USE_PYTHON
+void ClientBase::setDataFrameFormatCreator(CustomOutputFormatCreator creator)
+{
+    g_dataframe_format_creator = std::move(creator);
+}
+
+CustomOutputFormatCreator ClientBase::getDataFrameFormatCreator()
+{
+    return g_dataframe_format_creator;
+}
+#endif
+
 }
diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h
index cb3e0a939ac..4d1a5a897bd 100644
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@@ -88,6 +88,11 @@ struct StreamingQueryContext
     StreamingQueryContext() = default;
 };
 
+#if USE_PYTHON
+/// Function pointer type for creating custom output formats (e.g. DataFrame)
+using CustomOutputFormatCreator = std::function<std::shared_ptr<IOutputFormat>(const Block &)>;
+#endif
+
 /**
  * The base class which encapsulates the core functionality of a client.
  * Can be used in a standalone application (clickhouse-client or clickhouse-local),
@@ -329,6 +334,14 @@ class ClientBase
 
     String appendSmileyIfNeeded(const String & prompt);
 
+#if USE_PYTHON
+    /// Set custom DataFrame format creator
+    static void setDataFrameFormatCreator(CustomOutputFormatCreator creator);
+
+    /// Get custom DataFrame format creator
+    static CustomOutputFormatCreator getDataFrameFormatCreator();
+#endif
+
     /// Should be one of the first, to be destroyed the last,
     /// since other members can use them.
     /// This holder may not be initialized in case if we run the client in the embedded mode (SSH).

From 4b0276b2ef3121967ea6aaf9fd21a9f5f6392672 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Mon, 29 Sep 2025 22:18:32 +0800
Subject: [PATCH 03/22] chore: update NumpyType.cpp

---
 programs/local/NumpyType.cpp | 115 +++++++++++++++++++++++++++++++++++
 programs/local/NumpyType.h   |   1 +
 2 files changed, 116 insertions(+)

diff --git a/programs/local/NumpyType.cpp b/programs/local/NumpyType.cpp
index cf92c8dece9..0112731e02a 100644
--- a/programs/local/NumpyType.cpp
+++ b/programs/local/NumpyType.cpp
@@ -231,4 +231,119 @@ std::shared_ptr<IDataType> NumpyToDataType(const NumpyType & col_type)
 	}
 }
 
+String DataTypeToNumpyTypeStr(const std::shared_ptr<DB::IDataType> & data_type)
+{
+    if (!data_type)
+        return "object";
+
+    /// First, try to handle most types efficiently using getTypeId()
+    TypeIndex type_id = data_type->getTypeId();
+    switch (type_id)
+    {
+        case TypeIndex::Int8:
+            return "int8";
+        case TypeIndex::UInt8:
+            /// Special case: UInt8 could be Bool type, need to check getName()
+            {
+                const String & type_name = data_type->getName();
+                return (type_name == "Bool") ? "bool" : "uint8";
+            }
+        case TypeIndex::Int16:
+            return "int16";
+        case TypeIndex::UInt16:
+            return "uint16";
+        case TypeIndex::Int32:
+            return "int32";
+        case TypeIndex::UInt32:
+            return "uint32";
+        case TypeIndex::Int64:
+            return "int64";
+        case TypeIndex::UInt64:
+            return "uint64";
+        case TypeIndex::Float32:
+            return "float32";
+        case TypeIndex::Float64:
+            return "float64";
+        case TypeIndex::String:
+        case TypeIndex::FixedString:
+            return "object";
+        case TypeIndex::DateTime:
+            return "datetime64[s]";
+        case TypeIndex::DateTime64:
+            // DateTime64 needs precision info from the actual type
+            {
+                if (const auto * dt64 = typeid_cast<const DataTypeDateTime64 *>(data_type.get()))
+                {
+                    UInt32 scale = dt64->getScale();
+                    if (scale == 0)
+                        return "datetime64[s]";
+                    else if (scale == 3) 
+                        return "datetime64[ms]";
+                    else if (scale == 6)
+                        return "datetime64[us]";
+                    else if (scale == 9)
+                        return "datetime64[ns]";
+                    else
+                        return "datetime64[ns]"; // Default to nanoseconds
+                }
+                return "datetime64[ns]"; // Default fallback
+            }
+        case TypeIndex::Date:
+        case TypeIndex::Date32:
+            return "datetime64[D]";
+        case TypeIndex::UUID:
+        case TypeIndex::IPv4:
+        case TypeIndex::IPv6:
+            return "object";
+        case TypeIndex::Decimal32:
+        case TypeIndex::Decimal64:
+        case TypeIndex::Decimal128:
+        case TypeIndex::Decimal256:
+            return "float64"; // Decimals are converted to float64
+        case TypeIndex::Array:
+        case TypeIndex::Tuple:
+        case TypeIndex::Map:
+            return "object";
+        case TypeIndex::Nullable:
+            // Handle Nullable types - need to check inner type
+            {
+                const String & type_name = data_type->getName();
+                if (startsWith(type_name, "Nullable("))
+                {
+                    // Extract the inner type from "Nullable(InnerType)"
+                    size_t start = 9; // Length of "Nullable("
+                    size_t end = type_name.length() - 1; // Exclude the closing ")"
+                    if (end > start)
+                    {
+                        String inner_type_name = type_name.substr(start, end - start);
+                        // Nullable integers become float64 in pandas
+                        if (inner_type_name == "Int64" || inner_type_name == "Int32" || 
+                            inner_type_name == "Int16" || inner_type_name == "Int8" ||
+                            inner_type_name == "UInt64" || inner_type_name == "UInt32" ||
+                            inner_type_name == "UInt16" || inner_type_name == "UInt8")
+                            return "float64";
+                        else if (inner_type_name == "Float64")
+                            return "float64";
+                        else if (inner_type_name == "Float32")
+                            return "float32";
+                        else if (inner_type_name == "String")
+                            return "object";
+                    }
+                }
+                return "object";
+            }
+        default:
+            // For other complex types, fall back to getName() parsing
+            {
+                const String & type_name = data_type->getName();
+                if (startsWith(type_name, "Array(") || startsWith(type_name, "Tuple(") || 
+                    startsWith(type_name, "Map("))
+                    return "object";
+
+                // Default fallback for unknown types
+                return "object";
+            }
+    }
+}
+
 } // namespace CHDB
diff --git a/programs/local/NumpyType.h b/programs/local/NumpyType.h
index c58fee13768..91f0d3e3a85 100644
--- a/programs/local/NumpyType.h
+++ b/programs/local/NumpyType.h
@@ -49,5 +49,6 @@ enum class NumpyObjectType : uint8_t {
 
 NumpyType ConvertNumpyType(const py::handle & col_type);
 std::shared_ptr<DB::IDataType> NumpyToDataType(const NumpyType & col_type);
+String DataTypeToNumpyTypeStr(const std::shared_ptr<DB::IDataType> & data_type);
 
 } // namespace CHDB

From fbaf1c775439df6a37eb4c0e471c7108a4f636ad Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Wed, 29 Oct 2025 22:28:44 +0800
Subject: [PATCH 04/22] chore: add NumpyArray

---
 programs/local/LocalServer.cpp |   1 +
 programs/local/NumpyType.cpp   | 111 +++++++++++++++++++++------------
 2 files changed, 73 insertions(+), 39 deletions(-)

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 297f33d8040..32dac7fa278 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -1,6 +1,7 @@
 #include "LocalServer.h"
 #include "chdb-internal.h"
 #if USE_PYTHON
+#include "ChunkCollectorOutputFormat.h"
 #include "StoragePython.h"
 #include "TableFunctionPython.h"
 #else
diff --git a/programs/local/NumpyType.cpp b/programs/local/NumpyType.cpp
index 0112731e02a..98fb1bf76a0 100644
--- a/programs/local/NumpyType.cpp
+++ b/programs/local/NumpyType.cpp
@@ -1,11 +1,12 @@
 #include "NumpyType.h"
 
-#include <Common/StringUtils.h>
 #include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeInterval.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeObject.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeTime64.h>
 
 using namespace DB;
 
@@ -231,7 +232,7 @@ std::shared_ptr<IDataType> NumpyToDataType(const NumpyType & col_type)
 	}
 }
 
-String DataTypeToNumpyTypeStr(const std::shared_ptr<DB::IDataType> & data_type)
+String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type)
 {
     if (!data_type)
         return "object";
@@ -270,79 +271,111 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<DB::IDataType> & data_type)
         case TypeIndex::DateTime:
             return "datetime64[s]";
         case TypeIndex::DateTime64:
-            // DateTime64 needs precision info from the actual type
             {
                 if (const auto * dt64 = typeid_cast<const DataTypeDateTime64 *>(data_type.get()))
                 {
                     UInt32 scale = dt64->getScale();
                     if (scale == 0)
                         return "datetime64[s]";
-                    else if (scale == 3) 
+                    else if (scale == 3)
                         return "datetime64[ms]";
                     else if (scale == 6)
                         return "datetime64[us]";
                     else if (scale == 9)
                         return "datetime64[ns]";
                     else
-                        return "datetime64[ns]"; // Default to nanoseconds
+                        return "datetime64[ns]";
                 }
-                return "datetime64[ns]"; // Default fallback
+                return "datetime64[ns]";
             }
         case TypeIndex::Date:
         case TypeIndex::Date32:
             return "datetime64[D]";
+        case TypeIndex::Time:
+            return "timedelta64[s]";
+        case TypeIndex::Time64:
+            {
+                if (const auto * time64 = typeid_cast<const DataTypeTime64 *>(data_type.get()))
+                {
+                    UInt32 scale = time64->getScale();
+                    if (scale == 0)
+                        return "timedelta64[s]";
+                    else if (scale == 3)
+                        return "timedelta64[ms]";
+                    else if (scale == 6)
+                        return "timedelta64[us]";
+                    else if (scale == 9)
+                        return "timedelta64[ns]";
+                    else
+                        return "timedelta64[ns]";
+                }
+                return "timedelta64[ns]";
+            }
+        case TypeIndex::Interval:
+            {
+                if (const auto * interval = typeid_cast<const DataTypeInterval *>(data_type.get()))
+                {
+                    IntervalKind kind = interval->getKind();
+                    switch (kind.kind)
+                    {
+                        case IntervalKind::Kind::Nanosecond:
+                            return "timedelta64[ns]";
+                        case IntervalKind::Kind::Microsecond:
+                            return "timedelta64[us]";
+                        case IntervalKind::Kind::Millisecond:
+                            return "timedelta64[ms]";
+                        case IntervalKind::Kind::Second:
+                            return "timedelta64[s]";
+                        case IntervalKind::Kind::Minute:
+                            return "timedelta64[m]";
+                        case IntervalKind::Kind::Hour:
+                            return "timedelta64[h]";
+                        case IntervalKind::Kind::Day:
+                            return "timedelta64[D]";
+                        case IntervalKind::Kind::Week:
+                            return "timedelta64[W]";
+                        case IntervalKind::Kind::Month:
+                            return "timedelta64[M]";
+                        case IntervalKind::Kind::Quarter:
+                            return "object";
+                        case IntervalKind::Kind::Year:
+                            return "timedelta64[Y]";
+                        default:
+                            return "timedelta64[s]";
+                    }
+                }
+                return "timedelta64[s]";
+            }
+
         case TypeIndex::UUID:
         case TypeIndex::IPv4:
         case TypeIndex::IPv6:
             return "object";
+        case TypeIndex::BFloat16:
         case TypeIndex::Decimal32:
         case TypeIndex::Decimal64:
         case TypeIndex::Decimal128:
         case TypeIndex::Decimal256:
-            return "float64"; // Decimals are converted to float64
+            return "object";
         case TypeIndex::Array:
         case TypeIndex::Tuple:
         case TypeIndex::Map:
+        case TypeIndex::Set:
+        case TypeIndex::Dynamic:
+        case TypeIndex::Variant:
+        case TypeIndex::Object:
             return "object";
         case TypeIndex::Nullable:
-            // Handle Nullable types - need to check inner type
             {
-                const String & type_name = data_type->getName();
-                if (startsWith(type_name, "Nullable("))
+                if (const auto * nullable = typeid_cast<const DataTypeNullable *>(data_type.get()))
                 {
-                    // Extract the inner type from "Nullable(InnerType)"
-                    size_t start = 9; // Length of "Nullable("
-                    size_t end = type_name.length() - 1; // Exclude the closing ")"
-                    if (end > start)
-                    {
-                        String inner_type_name = type_name.substr(start, end - start);
-                        // Nullable integers become float64 in pandas
-                        if (inner_type_name == "Int64" || inner_type_name == "Int32" || 
-                            inner_type_name == "Int16" || inner_type_name == "Int8" ||
-                            inner_type_name == "UInt64" || inner_type_name == "UInt32" ||
-                            inner_type_name == "UInt16" || inner_type_name == "UInt8")
-                            return "float64";
-                        else if (inner_type_name == "Float64")
-                            return "float64";
-                        else if (inner_type_name == "Float32")
-                            return "float32";
-                        else if (inner_type_name == "String")
-                            return "object";
-                    }
+                    return DataTypeToNumpyTypeStr(nullable->getNestedType());
                 }
                 return "object";
             }
         default:
-            // For other complex types, fall back to getName() parsing
-            {
-                const String & type_name = data_type->getName();
-                if (startsWith(type_name, "Array(") || startsWith(type_name, "Tuple(") || 
-                    startsWith(type_name, "Map("))
-                    return "object";
-
-                // Default fallback for unknown types
-                return "object";
-            }
+            return "object";
+        }
     }
 }
 

From 8391e2fcc31b472c1157a37f02da0c63ae220971 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Thu, 30 Oct 2025 01:45:02 +0800
Subject: [PATCH 05/22] chore: update NumpyArray

---
 programs/local/NumpyArray.cpp             | 231 +++++++++++++++++
 programs/local/NumpyArray.h               |  45 ++++
 programs/local/NumpyCacheItem.h           |  66 +++++
 programs/local/NumpyType.cpp              | 286 +++++++++++++---------
 programs/local/NumpyType.h                |   4 +
 programs/local/PandasAnalyzer.cpp         |   2 +-
 programs/local/PandasDataFrameBuilder.cpp | 112 +++++++++
 programs/local/PandasDataFrameBuilder.h   |  46 ++++
 programs/local/PythonImportCache.h        |   2 +
 9 files changed, 671 insertions(+), 123 deletions(-)
 create mode 100644 programs/local/NumpyArray.cpp
 create mode 100644 programs/local/NumpyArray.h
 create mode 100644 programs/local/NumpyCacheItem.h
 create mode 100644 programs/local/PandasDataFrameBuilder.cpp
 create mode 100644 programs/local/PandasDataFrameBuilder.h

diff --git a/programs/local/NumpyArray.cpp b/programs/local/NumpyArray.cpp
new file mode 100644
index 00000000000..35440e5b03d
--- /dev/null
+++ b/programs/local/NumpyArray.cpp
@@ -0,0 +1,231 @@
+#include "NumpyArray.h"
+#include "NumpyType.h"
+
+#include <Processors/Chunk.h>
+#include <base/defines.h>
+#include <Columns/ColumnFixedSizeHelper.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/IColumn.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <base/types.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+}
+
+using namespace DB;
+
+namespace CHDB
+{
+
+struct NumpyAppendData
+{
+public:
+	explicit NumpyAppendData(const IColumn & column)
+		: column(column)
+	{
+	}
+
+	const IColumn & column;
+
+	size_t count;
+	size_t dest_offset;
+	UInt8 * target_data;
+	bool * target_mask;
+};
+
+struct RegularConvert
+{
+	template <class CHTYPE, class NUMPYTYPE>
+	static NUMPYTYPE convertValue(CHTYPE val, NumpyAppendData & append_data)
+	{
+		(void)append_data;
+		return (NUMPYTYPE)val;
+	}
+
+	template <class NUMPYTYPE>
+	static NUMPYTYPE nullValue(bool & set_mask)
+	{
+		set_mask = true;
+		return 0;
+	}
+};
+
+template <class CHTYPE, class NUMPYTYPE, class CONVERT>
+static bool TransformColumn(NumpyAppendData & append_data)
+{
+	bool has_null = false;
+	const IColumn * data_column = &append_data.column;
+	const ColumnNullable * nullable_column = nullptr;
+
+	/// Check if column is nullable
+	if (const auto * nullable = typeid_cast<const ColumnNullable *>(&append_data.column))
+	{
+		nullable_column = nullable;
+		data_column = &nullable->getNestedColumn();
+	}
+
+	const auto * src_ptr = static_cast<const ColumnFixedSizeHelper *>(data_column)->getRawDataBegin<sizeof(CHTYPE)>();
+	auto * dest_ptr = reinterpret_cast<NUMPYTYPE *>(append_data.target_data);
+	auto * mask_ptr = append_data.target_mask;
+
+	for (size_t i = 0; i < append_data.count; i++) {
+		size_t offset = append_data.dest_offset + i;
+		if (nullable_column && nullable_column->isNullAt(i)) {
+			dest_ptr[offset] = CONVERT::template nullValue<NUMPYTYPE>(mask_ptr[offset]);
+			has_null = has_null || mask_ptr[offset];
+		} else {
+			dest_ptr[offset] = CONVERT::template convertValue<CHTYPE, NUMPYTYPE>(src_ptr[i], append_data);
+			mask_ptr[offset] = false;
+		}
+	}
+	return has_null;
+}
+
+template <class T>
+static bool CHColumnToNumpyArray(NumpyAppendData & append_data)
+{
+	return TransformColumn<T, T, RegularConvert>(append_data);
+}
+
+InternalNumpyArray::InternalNumpyArray(const DataTypePtr & type_)
+	: data(nullptr)
+	, type(type_)
+	, count(0)
+{
+}
+
+void InternalNumpyArray::init(size_t capacity)
+{
+	String type_str = DataTypeToNumpyTypeStr(type);
+
+	array = py::array(py::dtype(type_str), capacity);
+	data = reinterpret_cast<UInt8 *>(array.mutable_data());
+}
+
+void InternalNumpyArray::resize(size_t capacity)
+{
+	std::vector<py::ssize_t> new_shape {py::ssize_t(capacity)};
+
+	array.resize(new_shape, false);
+	data = reinterpret_cast<UInt8 *>(array.mutable_data());
+}
+
+NumpyArray::NumpyArray(const DataTypePtr & type_)
+	: hava_null(false)
+{
+	data_array = std::make_unique<InternalNumpyArray>(type_);
+	mask_array = std::make_unique<InternalNumpyArray>(DataTypeFactory::instance().get("Bool"));
+}
+
+void NumpyArray::init(size_t capacity)
+{
+	data_array->init(capacity);
+	mask_array->init(capacity);
+}
+
+void NumpyArray::resize(size_t capacity)
+{
+	data_array->resize(capacity);
+	mask_array->resize(capacity);
+}
+
+void NumpyArray::append(const ColumnPtr & column)
+{
+	chassert(data_array);
+	chassert(mask_array);
+
+	auto * data_ptr = data_array->data;
+	auto * mask_ptr = reinterpret_cast<bool *>(mask_array->data);
+	chassert(data_ptr);
+	chassert(mask_ptr);
+	chassert(column->getDataType() == data_array->type->getColumnType());
+
+	size_t size = column->size();
+	data_array->count += size;
+	mask_array->count += size;
+	bool may_have_null = false;
+
+	NumpyAppendData append_data(*column);
+	append_data.count = size;
+	append_data.target_data = data_ptr;
+	append_data.target_mask = mask_ptr;
+	append_data.dest_offset = data_array->count - size;
+
+	switch (data_array->type->getTypeId())
+	{
+	case TypeIndex::Int8:
+		may_have_null = CHColumnToNumpyArray<Int8>(append_data);
+		break;
+	case TypeIndex::UInt8:
+		{
+			const String & type_name = data_array->type->getName();
+			if (type_name == "Bool")
+			{
+				may_have_null = CHColumnToNumpyArray<bool>(append_data);
+			}
+			else
+			{
+				may_have_null = CHColumnToNumpyArray<UInt8>(append_data);
+			}
+		}
+		break;
+	case TypeIndex::Int16:
+		may_have_null = CHColumnToNumpyArray<Int16>(append_data);
+		break;
+	case TypeIndex::UInt16:
+		may_have_null = CHColumnToNumpyArray<UInt16>(append_data);
+		break;
+	case TypeIndex::Int32:
+		may_have_null = CHColumnToNumpyArray<Int32>(append_data);
+		break;
+	case TypeIndex::UInt32:
+		may_have_null = CHColumnToNumpyArray<UInt32>(append_data);
+		break;
+	case TypeIndex::Int64:
+		may_have_null = CHColumnToNumpyArray<Int64>(append_data);
+		break;
+	case TypeIndex::UInt64:
+		may_have_null = CHColumnToNumpyArray<UInt64>(append_data);
+		break;
+	case TypeIndex::Float32:
+		may_have_null = CHColumnToNumpyArray<Float32>(append_data);
+		break;
+	case TypeIndex::Float64:
+		may_have_null = CHColumnToNumpyArray<Float64>(append_data);
+		break;
+	default:
+		throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type {}", data_array->type->getName());
+	}
+
+	if (may_have_null)
+	{
+		hava_null = true;
+	}
+}
+
+py::object NumpyArray::toArray() const
+{
+	chassert(data_array && mask_array);
+
+	data_array->resize(data_array->count);
+	if (!hava_null)
+	{
+		return std::move(data_array->array);
+	}
+
+	mask_array->resize(mask_array->count);
+	auto data_values = std::move(data_array->array);
+	auto null_values = std::move(mask_array->array);
+
+	auto masked_array = py::module::import("numpy.ma").attr("masked_array")(data_values, null_values);
+	return masked_array;
+}
+
+} // namespace CHDB
diff --git a/programs/local/NumpyArray.h b/programs/local/NumpyArray.h
new file mode 100644
index 00000000000..7927faf1ec0
--- /dev/null
+++ b/programs/local/NumpyArray.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include "PybindWrapper.h"
+
+#include <Columns/IColumn_fwd.h>
+#include <Processors/Formats/IRowOutputFormat.h>
+#include <base/types.h>
+
+namespace CHDB
+{
+
+class InternalNumpyArray
+{
+public:
+	explicit InternalNumpyArray(const DB::DataTypePtr & type);
+
+	void init(size_t capacity);
+
+	void resize(size_t capacity);
+
+	py::array array;
+	UInt8 * data;
+	DB::DataTypePtr type;
+	size_t count;
+};
+
+class NumpyArray {
+public:
+	explicit NumpyArray(const DB::DataTypePtr & type_);
+
+	void init(size_t capacity);
+
+	void resize(size_t capacity);
+
+	void append(const DB::ColumnPtr & column);
+
+	py::object toArray() const;
+
+private:
+	bool hava_null;
+	std::unique_ptr<InternalNumpyArray> data_array;
+	std::unique_ptr<InternalNumpyArray> mask_array;
+};
+
+} // namespace CHDB
diff --git a/programs/local/NumpyCacheItem.h b/programs/local/NumpyCacheItem.h
new file mode 100644
index 00000000000..5d75cc5ed0a
--- /dev/null
+++ b/programs/local/NumpyCacheItem.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include "PythonImportCacheItem.h"
+
+namespace CHDB {
+
+struct NumpyMaCacheItem : public PythonImportCacheItem
+{
+public:
+	NumpyMaCacheItem(PythonImportCacheItem * parent)
+		: PythonImportCacheItem("ma", parent), masked("masked", this), masked_array("masked_array", this) {
+	}
+	~NumpyMaCacheItem() override = default;
+
+	PythonImportCacheItem masked;
+	PythonImportCacheItem masked_array;
+};
+
+struct NumpyCacheItem : public PythonImportCacheItem
+{
+public:
+	static constexpr const char * Name = "numpy";
+
+	NumpyCacheItem()
+		: PythonImportCacheItem("numpy"), ma(this), ndarray("ndarray", this), datetime64("datetime64", this),
+			generic("generic", this), int64("int64", this), bool_("bool_", this), byte("byte", this),
+			ubyte("ubyte", this), short_("short", this), ushort_("ushort", this), intc("intc", this),
+			uintc("uintc", this), int_("int_", this), uint("uint", this), longlong("longlong", this),
+			ulonglong("ulonglong", this), half("half", this), float16("float16", this), single("single", this),
+			longdouble("longdouble", this), csingle("csingle", this), cdouble("cdouble", this),
+			clongdouble("clongdouble", this) {
+	}
+	~NumpyCacheItem() override = default;
+
+	NumpyMaCacheItem ma;
+	PythonImportCacheItem ndarray;
+	PythonImportCacheItem datetime64;
+	PythonImportCacheItem generic;
+	PythonImportCacheItem int64;
+	PythonImportCacheItem bool_;
+	PythonImportCacheItem byte;
+	PythonImportCacheItem ubyte;
+	PythonImportCacheItem short_;
+	PythonImportCacheItem ushort_;
+	PythonImportCacheItem intc;
+	PythonImportCacheItem uintc;
+	PythonImportCacheItem int_;
+	PythonImportCacheItem uint;
+	PythonImportCacheItem longlong;
+	PythonImportCacheItem ulonglong;
+	PythonImportCacheItem half;
+	PythonImportCacheItem float16;
+	PythonImportCacheItem single;
+	PythonImportCacheItem longdouble;
+	PythonImportCacheItem csingle;
+	PythonImportCacheItem cdouble;
+	PythonImportCacheItem clongdouble;
+
+protected:
+	bool IsRequired() const override final
+	{
+		return false;
+	}
+};
+
+} // namespace CHDB
diff --git a/programs/local/NumpyType.cpp b/programs/local/NumpyType.cpp
index 98fb1bf76a0..cf487e6d644 100644
--- a/programs/local/NumpyType.cpp
+++ b/programs/local/NumpyType.cpp
@@ -1,4 +1,5 @@
 #include "NumpyType.h"
+#include "PythonImporter.h"
 
 #include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeInterval.h>
@@ -237,146 +238,187 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
     if (!data_type)
         return "object";
 
-    /// First, try to handle most types efficiently using getTypeId()
     TypeIndex type_id = data_type->getTypeId();
     switch (type_id)
     {
-        case TypeIndex::Int8:
-            return "int8";
-        case TypeIndex::UInt8:
-            /// Special case: UInt8 could be Bool type, need to check getName()
+    case TypeIndex::Int8:
+        return "int8";
+    case TypeIndex::UInt8:
+        /// Special case: UInt8 could be Bool type, need to check getName()
+        {
+            const String & type_name = data_type->getName();
+            return (type_name == "Bool") ? "bool" : "uint8";
+        }
+    case TypeIndex::Int16:
+        return "int16";
+    case TypeIndex::UInt16:
+        return "uint16";
+    case TypeIndex::Int32:
+        return "int32";
+    case TypeIndex::UInt32:
+        return "uint32";
+    case TypeIndex::Int64:
+        return "int64";
+    case TypeIndex::UInt64:
+        return "uint64";
+    case TypeIndex::Float32:
+        return "float32";
+    case TypeIndex::Float64:
+        return "float64";
+    case TypeIndex::String:
+    case TypeIndex::FixedString:
+        return "object";
+    case TypeIndex::DateTime:
+        return "datetime64[s]";
+    case TypeIndex::DateTime64:
+        {
+            if (const auto * dt64 = typeid_cast<const DataTypeDateTime64 *>(data_type.get()))
             {
-                const String & type_name = data_type->getName();
-                return (type_name == "Bool") ? "bool" : "uint8";
+                UInt32 scale = dt64->getScale();
+                if (scale == 0)
+                    return "datetime64[s]";
+                else if (scale == 3)
+                    return "datetime64[ms]";
+                else if (scale == 6)
+                    return "datetime64[us]";
+                else if (scale == 9)
+                    return "datetime64[ns]";
+                else
+                    return "datetime64[ns]";
             }
-        case TypeIndex::Int16:
-            return "int16";
-        case TypeIndex::UInt16:
-            return "uint16";
-        case TypeIndex::Int32:
-            return "int32";
-        case TypeIndex::UInt32:
-            return "uint32";
-        case TypeIndex::Int64:
-            return "int64";
-        case TypeIndex::UInt64:
-            return "uint64";
-        case TypeIndex::Float32:
-            return "float32";
-        case TypeIndex::Float64:
-            return "float64";
-        case TypeIndex::String:
-        case TypeIndex::FixedString:
-            return "object";
-        case TypeIndex::DateTime:
-            return "datetime64[s]";
-        case TypeIndex::DateTime64:
+            return "datetime64[ns]";
+        }
+    case TypeIndex::Date:
+    case TypeIndex::Date32:
+        return "datetime64[D]";
+    case TypeIndex::Time:
+        return "timedelta64[s]";
+    case TypeIndex::Time64:
+        {
+            if (const auto * time64 = typeid_cast<const DataTypeTime64 *>(data_type.get()))
             {
-                if (const auto * dt64 = typeid_cast<const DataTypeDateTime64 *>(data_type.get()))
-                {
-                    UInt32 scale = dt64->getScale();
-                    if (scale == 0)
-                        return "datetime64[s]";
-                    else if (scale == 3)
-                        return "datetime64[ms]";
-                    else if (scale == 6)
-                        return "datetime64[us]";
-                    else if (scale == 9)
-                        return "datetime64[ns]";
-                    else
-                        return "datetime64[ns]";
-                }
-                return "datetime64[ns]";
+                UInt32 scale = time64->getScale();
+                if (scale == 0)
+                    return "timedelta64[s]";
+                else if (scale == 3)
+                    return "timedelta64[ms]";
+                else if (scale == 6)
+                    return "timedelta64[us]";
+                else if (scale == 9)
+                    return "timedelta64[ns]";
+                else
+                    return "timedelta64[ns]";
             }
-        case TypeIndex::Date:
-        case TypeIndex::Date32:
-            return "datetime64[D]";
-        case TypeIndex::Time:
-            return "timedelta64[s]";
-        case TypeIndex::Time64:
+            return "timedelta64[ns]";
+        }
+    case TypeIndex::Interval:
+        {
+            if (const auto * interval = typeid_cast<const DataTypeInterval *>(data_type.get()))
             {
-                if (const auto * time64 = typeid_cast<const DataTypeTime64 *>(data_type.get()))
+                IntervalKind kind = interval->getKind();
+                switch (kind.kind)
                 {
-                    UInt32 scale = time64->getScale();
-                    if (scale == 0)
-                        return "timedelta64[s]";
-                    else if (scale == 3)
-                        return "timedelta64[ms]";
-                    else if (scale == 6)
-                        return "timedelta64[us]";
-                    else if (scale == 9)
-                        return "timedelta64[ns]";
-                    else
+                    case IntervalKind::Kind::Nanosecond:
                         return "timedelta64[ns]";
+                    case IntervalKind::Kind::Microsecond:
+                        return "timedelta64[us]";
+                    case IntervalKind::Kind::Millisecond:
+                        return "timedelta64[ms]";
+                    case IntervalKind::Kind::Second:
+                        return "timedelta64[s]";
+                    case IntervalKind::Kind::Minute:
+                        return "timedelta64[m]";
+                    case IntervalKind::Kind::Hour:
+                        return "timedelta64[h]";
+                    case IntervalKind::Kind::Day:
+                        return "timedelta64[D]";
+                    case IntervalKind::Kind::Week:
+                        return "timedelta64[W]";
+                    case IntervalKind::Kind::Month:
+                        return "timedelta64[M]";
+                    case IntervalKind::Kind::Quarter:
+                        return "object";
+                    case IntervalKind::Kind::Year:
+                        return "timedelta64[Y]";
+                    default:
+                        return "timedelta64[s]";
                 }
-                return "timedelta64[ns]";
-            }
-        case TypeIndex::Interval:
-            {
-                if (const auto * interval = typeid_cast<const DataTypeInterval *>(data_type.get()))
-                {
-                    IntervalKind kind = interval->getKind();
-                    switch (kind.kind)
-                    {
-                        case IntervalKind::Kind::Nanosecond:
-                            return "timedelta64[ns]";
-                        case IntervalKind::Kind::Microsecond:
-                            return "timedelta64[us]";
-                        case IntervalKind::Kind::Millisecond:
-                            return "timedelta64[ms]";
-                        case IntervalKind::Kind::Second:
-                            return "timedelta64[s]";
-                        case IntervalKind::Kind::Minute:
-                            return "timedelta64[m]";
-                        case IntervalKind::Kind::Hour:
-                            return "timedelta64[h]";
-                        case IntervalKind::Kind::Day:
-                            return "timedelta64[D]";
-                        case IntervalKind::Kind::Week:
-                            return "timedelta64[W]";
-                        case IntervalKind::Kind::Month:
-                            return "timedelta64[M]";
-                        case IntervalKind::Kind::Quarter:
-                            return "object";
-                        case IntervalKind::Kind::Year:
-                            return "timedelta64[Y]";
-                        default:
-                            return "timedelta64[s]";
-                    }
-                }
-                return "timedelta64[s]";
             }
+            return "timedelta64[s]";
+        }
 
-        case TypeIndex::UUID:
-        case TypeIndex::IPv4:
-        case TypeIndex::IPv6:
-            return "object";
-        case TypeIndex::BFloat16:
-        case TypeIndex::Decimal32:
-        case TypeIndex::Decimal64:
-        case TypeIndex::Decimal128:
-        case TypeIndex::Decimal256:
-            return "object";
-        case TypeIndex::Array:
-        case TypeIndex::Tuple:
-        case TypeIndex::Map:
-        case TypeIndex::Set:
-        case TypeIndex::Dynamic:
-        case TypeIndex::Variant:
-        case TypeIndex::Object:
-            return "object";
-        case TypeIndex::Nullable:
+    case TypeIndex::UUID:
+    case TypeIndex::IPv4:
+    case TypeIndex::IPv6:
+        return "object";
+    case TypeIndex::BFloat16:
+    case TypeIndex::Decimal32:
+    case TypeIndex::Decimal64:
+    case TypeIndex::Decimal128:
+    case TypeIndex::Decimal256:
+        return "object";
+    case TypeIndex::Array:
+    case TypeIndex::Tuple:
+    case TypeIndex::Map:
+    case TypeIndex::Set:
+    case TypeIndex::Dynamic:
+    case TypeIndex::Variant:
+    case TypeIndex::Object:
+        return "object";
+    case TypeIndex::Nullable:
+        {
+            if (const auto * nullable = typeid_cast<const DataTypeNullable *>(data_type.get()))
             {
-                if (const auto * nullable = typeid_cast<const DataTypeNullable *>(data_type.get()))
-                {
-                    return DataTypeToNumpyTypeStr(nullable->getNestedType());
-                }
-                return "object";
+                return DataTypeToNumpyTypeStr(nullable->getNestedType());
             }
-        default:
             return "object";
         }
+    default:
+        return "object";
     }
 }
 
+py::object ConvertNumpyDtype(const py::handle & numpy_array)
+{
+    chassert(py::gil_check());
+
+	auto & import_cache = PythonImporter::ImportCache();
+
+	auto dtype = numpy_array.attr("dtype");
+	if (!py::isinstance(numpy_array, import_cache.numpy.ma.masked_array()))
+    {
+		return dtype;
+	}
+
+	auto numpy_type = ConvertNumpyType(dtype);
+	switch (numpy_type.type)
+    {
+    case NumpyNullableType::BOOL:
+        return import_cache.pandas.BooleanDtype()();
+    case NumpyNullableType::UINT_8:
+        return import_cache.pandas.UInt8Dtype()();
+    case NumpyNullableType::UINT_16:
+        return import_cache.pandas.UInt16Dtype()();
+    case NumpyNullableType::UINT_32:
+        return import_cache.pandas.UInt32Dtype()();
+    case NumpyNullableType::UINT_64:
+        return import_cache.pandas.UInt64Dtype()();
+    case NumpyNullableType::INT_8:
+        return import_cache.pandas.Int8Dtype()();
+    case NumpyNullableType::INT_16:
+        return import_cache.pandas.Int16Dtype()();
+    case NumpyNullableType::INT_32:
+        return import_cache.pandas.Int32Dtype()();
+    case NumpyNullableType::INT_64:
+        return import_cache.pandas.Int64Dtype()();
+    case NumpyNullableType::FLOAT_32:
+        return import_cache.pandas.Float32Dtype()();
+    case NumpyNullableType::FLOAT_64:
+        return import_cache.pandas.Float64Dtype()();
+    case NumpyNullableType::FLOAT_16:
+    default:
+        return dtype;
+	}
+}
+
 } // namespace CHDB
diff --git a/programs/local/NumpyType.h b/programs/local/NumpyType.h
index 91f0d3e3a85..8a72ece2bb1 100644
--- a/programs/local/NumpyType.h
+++ b/programs/local/NumpyType.h
@@ -48,7 +48,11 @@ enum class NumpyObjectType : uint8_t {
 };
 
 NumpyType ConvertNumpyType(const py::handle & col_type);
+
 std::shared_ptr<DB::IDataType> NumpyToDataType(const NumpyType & col_type);
+
 String DataTypeToNumpyTypeStr(const std::shared_ptr<DB::IDataType> & data_type);
 
+py::object ConvertNumpyDtype(py::handle & numpy_array);
+
 } // namespace CHDB
diff --git a/programs/local/PandasAnalyzer.cpp b/programs/local/PandasAnalyzer.cpp
index f1c97c96772..57d6140c692 100644
--- a/programs/local/PandasAnalyzer.cpp
+++ b/programs/local/PandasAnalyzer.cpp
@@ -38,7 +38,7 @@ PandasAnalyzer::PandasAnalyzer(const DB::Settings & settings)
 
 bool PandasAnalyzer::Analyze(py::object column) {
 #if USE_JEMALLOC
-	::Memory::MemoryCheckScope memory_check_scope; 
+	::Memory::MemoryCheckScope memory_check_scope;
 #endif
 	if (sample_size == 0)
 		return false;
diff --git a/programs/local/PandasDataFrameBuilder.cpp b/programs/local/PandasDataFrameBuilder.cpp
new file mode 100644
index 00000000000..4878af0c9f3
--- /dev/null
+++ b/programs/local/PandasDataFrameBuilder.cpp
@@ -0,0 +1,112 @@
+#include "PandasDataFrameBuilder.h"
+#include "NumpyType.h"
+#include "PythonUtils.h"
+#include "PythonConversion.h"
+#include "PythonImporter.h"
+
+#include <Columns/IColumn.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDateTime.h>
+
+using namespace CHDB;
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+PandasDataFrameBuilder::PandasDataFrameBuilder(const Block & sample)
+{
+    column_names.reserve(sample.columns());
+    column_types.reserve(sample.columns());
+
+    for (const auto & column : sample)
+    {
+        column_names.push_back(column.name);
+        column_types.push_back(column.type);
+    }
+}
+
+void PandasDataFrameBuilder::addChunk(const Chunk & chunk)
+{
+    if (chunk.hasRows())
+    {
+        chunks.push_back(chunk.clone());
+        total_rows += chunk.getNumRows();
+    }
+}
+
+py::object PandasDataFrameBuilder::genDataFrame(const py::handle & dict)
+{
+    auto & import_cache = PythonImporter::ImportCache();
+	auto pandas = import_cache.pandas();
+	if (!pandas)
+    {
+		throw Exception(ErrorCodes::LOGICAL_ERROR, "Pandas is not installed");
+	}
+
+	py::object items = dict.attr("items")();
+	for (const py::handle & item : items) {
+		auto key_value = py::cast<py::tuple>(item);
+		py::handle key = key_value[0];
+		py::handle value = key_value[1];
+
+		auto dtype = ConvertNumpyDtype(value);
+		if (py::isinstance(value, import_cache.numpy.ma.masked_array()))
+        {
+			auto series = pandas.attr("Series")(value.attr("data"), py::arg("dtype") = dtype);
+			series.attr("__setitem__")(value.attr("mask"), import_cache.pandas.NA());
+			dict.attr("__setitem__")(key, series);
+		}
+	}
+
+	auto df = pandas.attr("DataFrame").attr("from_dict")(dict);
+	return df;
+}
+
+void PandasDataFrameBuilder::finalize()
+{
+    if (is_finalized)
+        return;
+
+    columns_data.reserve(column_types.size());
+    for (const auto & type : column_types)
+    {
+        columns_data.emplace_back(type);
+    }
+
+    for (auto & column_data : columns_data)
+    {
+        column_data.init(total_rows);
+    }
+
+    /// Process all chunks and append column data
+    for (const auto & chunk : chunks)
+    {
+        const auto & columns = chunk.getColumns();
+        for (size_t col_idx = 0; col_idx < columns.size(); ++col_idx)
+        {
+            columns_data[col_idx].append(columns[col_idx]);
+        }
+    }
+
+    /// Create pandas DataFrame
+    py::dict res;
+	for (size_t col_idx = 0; col_idx < column_names.size(); ++col_idx) {
+		auto & name = column_names[col_idx];
+        auto & column_data = columns_data[col_idx];
+        res[name.c_str()] = column_data.toArray();
+	}
+    final_dataframe = genDataFrame(res);
+
+    is_finalized = true;
+}
+
+}
diff --git a/programs/local/PandasDataFrameBuilder.h b/programs/local/PandasDataFrameBuilder.h
new file mode 100644
index 00000000000..cbb17811da7
--- /dev/null
+++ b/programs/local/PandasDataFrameBuilder.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include "PybindWrapper.h"
+#include "NumpyArray.h"
+
+#include <Core/Block.h>
+#include <Processors/Chunk.h>
+#include <DataTypes/IDataType.h>
+#include <Common/logger_useful.h>
+
+namespace DB
+{
+
+/// Builder class to convert ClickHouse Chunks to Pandas DataFrame
+/// Accumulates chunks and provides conversion to Python pandas DataFrame object
+class PandasDataFrameBuilder
+{
+public:
+    explicit PandasDataFrameBuilder(const Block & sample);
+
+    /// Add data chunk
+    void addChunk(const Chunk & chunk);
+
+    /// Finalize and build pandas DataFrame from all collected chunks
+    void finalize();
+
+    /// Get the finalized pandas DataFrame
+    pybind11::object getDataFrame() const { return final_dataframe; }
+
+private:
+    pybind11::object genDataFrame(const pybind11::handle & dict);
+
+    std::vector<String> column_names;
+    std::vector<DataTypePtr> column_types;
+
+    std::vector<Chunk> chunks;
+    std::vector<CHDB::NumpyArray> columns_data;
+
+    size_t total_rows = 0;
+    bool is_finalized = false;
+    pybind11::object final_dataframe;
+
+    Poco::Logger * log = &Poco::Logger::get("PandasDataFrameBuilder");
+};
+
+}
diff --git a/programs/local/PythonImportCache.h b/programs/local/PythonImportCache.h
index 6bdf5cf7c8f..1703a5103a5 100644
--- a/programs/local/PythonImportCache.h
+++ b/programs/local/PythonImportCache.h
@@ -2,6 +2,7 @@
 
 #include "DatetimeCacheItem.h"
 #include "DecimalCacheItem.h"
+#include "NumpyCacheItem.h"
 #include "PandasCacheItem.h"
 #include "PyArrowCacheItem.h"
 #include "PythonImportCacheItem.h"
@@ -23,6 +24,7 @@ struct PythonImportCache {
 	PyarrowCacheItem pyarrow;
 	DatetimeCacheItem datetime;
 	DecimalCacheItem decimal;
+	NumpyCacheItem numpy;
 
 	py::handle AddCache(py::object item);
 

From 8af227408e728785cfa4e4d93d830ecde8d6cbf2 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Thu, 30 Oct 2025 02:40:00 +0800
Subject: [PATCH 06/22] chore: update NumpyArray

---
 programs/local/NumpyArray.cpp | 26 ++++++++++++++++++++------
 programs/local/NumpyType.cpp  | 10 +++++++---
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/programs/local/NumpyArray.cpp b/programs/local/NumpyArray.cpp
index 35440e5b03d..a6eca447e2c 100644
--- a/programs/local/NumpyArray.cpp
+++ b/programs/local/NumpyArray.cpp
@@ -165,15 +165,11 @@ void NumpyArray::append(const ColumnPtr & column)
 		break;
 	case TypeIndex::UInt8:
 		{
-			const String & type_name = data_array->type->getName();
-			if (type_name == "Bool")
-			{
+			auto is_bool = isBool(data_array->type);
+			if (is_bool)
 				may_have_null = CHColumnToNumpyArray<bool>(append_data);
-			}
 			else
-			{
 				may_have_null = CHColumnToNumpyArray<UInt8>(append_data);
-			}
 		}
 		break;
 	case TypeIndex::Int16:
@@ -200,6 +196,24 @@ void NumpyArray::append(const ColumnPtr & column)
 	case TypeIndex::Float64:
 		may_have_null = CHColumnToNumpyArray<Float64>(append_data);
 		break;
+	case TypeIndex::Int128:
+		may_have_null = TransformColumn<Int128, Float64, RegularConvert>(append_data);
+		break;
+	case TypeIndex::Int256:
+		may_have_null = TransformColumn<Int256, Float64, RegularConvert>(append_data);
+		break;
+	case TypeIndex::UInt128:
+		may_have_null = TransformColumn<UInt128, Float64, RegularConvert>(append_data);
+		break;
+	case TypeIndex::UInt256:
+		may_have_null = TransformColumn<UInt256, Float64, RegularConvert>(append_data);
+		break;
+	case TypeIndex::BFloat16:
+		may_have_null = TransformColumn<BFloat16, Float32, RegularConvert>(append_data);
+		break;
+	/// case TypeIndex::Date:
+	/// 	may_have_null = TransformColumn<Date, Int32, RegularConvert>(append_data);
+	/// 	break;
 	default:
 		throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type {}", data_array->type->getName());
 	}
diff --git a/programs/local/NumpyType.cpp b/programs/local/NumpyType.cpp
index cf487e6d644..1b5fa53c79b 100644
--- a/programs/local/NumpyType.cpp
+++ b/programs/local/NumpyType.cpp
@@ -246,8 +246,8 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
     case TypeIndex::UInt8:
         /// Special case: UInt8 could be Bool type, need to check getName()
         {
-            const String & type_name = data_type->getName();
-            return (type_name == "Bool") ? "bool" : "uint8";
+            auto is_bool = isBool(data_type);
+            return is_bool ? "bool" : "uint8";
         }
     case TypeIndex::Int16:
         return "int16";
@@ -261,8 +261,13 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
         return "int64";
     case TypeIndex::UInt64:
         return "uint64";
+    case TypeIndex::BFloat16:
     case TypeIndex::Float32:
         return "float32";
+    case TypeIndex::Int256:
+    case TypeIndex::UInt256:
+    case TypeIndex::Int128:
+    case TypeIndex::UInt128:
     case TypeIndex::Float64:
         return "float64";
     case TypeIndex::String:
@@ -351,7 +356,6 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
     case TypeIndex::IPv4:
     case TypeIndex::IPv6:
         return "object";
-    case TypeIndex::BFloat16:
     case TypeIndex::Decimal32:
     case TypeIndex::Decimal64:
     case TypeIndex::Decimal128:

From 0255b90dfd356caf5d51f26109c27d71eb95e130 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Thu, 30 Oct 2025 11:00:40 +0800
Subject: [PATCH 07/22] chore: update NumpyArray

---
 programs/local/NumpyArray.cpp | 18 ++++++++++++++----
 programs/local/NumpyType.cpp  |  2 +-
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/programs/local/NumpyArray.cpp b/programs/local/NumpyArray.cpp
index a6eca447e2c..b7bf32ca7cf 100644
--- a/programs/local/NumpyArray.cpp
+++ b/programs/local/NumpyArray.cpp
@@ -71,7 +71,8 @@ static bool TransformColumn(NumpyAppendData & append_data)
 		data_column = &nullable->getNestedColumn();
 	}
 
-	const auto * src_ptr = static_cast<const ColumnFixedSizeHelper *>(data_column)->getRawDataBegin<sizeof(CHTYPE)>();
+	const auto * tmp_ptr = static_cast<const ColumnFixedSizeHelper *>(data_column)->getRawDataBegin<sizeof(CHTYPE)>();
+	const auto * src_ptr = reinterpret_cast<const CHTYPE *>(tmp_ptr);
 	auto * dest_ptr = reinterpret_cast<NUMPYTYPE *>(append_data.target_data);
 	auto * mask_ptr = append_data.target_mask;
 
@@ -211,9 +212,18 @@ void NumpyArray::append(const ColumnPtr & column)
 	case TypeIndex::BFloat16:
 		may_have_null = TransformColumn<BFloat16, Float32, RegularConvert>(append_data);
 		break;
-	/// case TypeIndex::Date:
-	/// 	may_have_null = TransformColumn<Date, Int32, RegularConvert>(append_data);
-	/// 	break;
+	case TypeIndex::Date:
+		may_have_null = TransformColumn<UInt16, Int64, RegularConvert>(append_data);
+		break;
+	case TypeIndex::Date32:
+		may_have_null = TransformColumn<Int32, Int64, RegularConvert>(append_data);
+		break;
+	case TypeIndex::DateTime:
+		may_have_null = TransformColumn<UInt32, Int64, RegularConvert>(append_data);
+		break;
+	case TypeIndex::DateTime64:
+		may_have_null = CHColumnToNumpyArray<Int64>(append_data);
+		break;
 	default:
 		throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type {}", data_array->type->getName());
 	}
diff --git a/programs/local/NumpyType.cpp b/programs/local/NumpyType.cpp
index 1b5fa53c79b..45d03ac786a 100644
--- a/programs/local/NumpyType.cpp
+++ b/programs/local/NumpyType.cpp
@@ -378,7 +378,7 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
             return "object";
         }
     default:
-        return "object";
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type {}", data_type->getName());
     }
 }
 

From 1b6dade168c2da7f8f43faaf6fe04851fca691af Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Fri, 31 Oct 2025 18:15:43 +0800
Subject: [PATCH 08/22] chore: support timezone

---
 programs/local/PandasDataFrameBuilder.cpp | 53 +++++++++++++++++++++--
 programs/local/PandasDataFrameBuilder.h   |  5 +++
 2 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/programs/local/PandasDataFrameBuilder.cpp b/programs/local/PandasDataFrameBuilder.cpp
index 4878af0c9f3..5764f6eab39 100644
--- a/programs/local/PandasDataFrameBuilder.cpp
+++ b/programs/local/PandasDataFrameBuilder.cpp
@@ -1,16 +1,22 @@
 #include "PandasDataFrameBuilder.h"
-#include "NumpyType.h"
-#include "PythonUtils.h"
-#include "PythonConversion.h"
 #include "PythonImporter.h"
+#include "NumpyType.h"
 
+#include <DataTypes/Serializations/SerializationNullable.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypeTime.h>
+#include <DataTypes/DataTypeTime64.h>
+#include <Common/DateLUTImpl.h>
+#include <Processors/Chunk.h>
 #include <Columns/IColumn.h>
+#include <Common/Exception.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeDateTime.h>
+#include <base/Decimal.h>
 
 using namespace CHDB;
 
@@ -31,6 +37,16 @@ PandasDataFrameBuilder::PandasDataFrameBuilder(const Block & sample)
     {
         column_names.push_back(column.name);
         column_types.push_back(column.type);
+
+        /// Record timezone for timezone-aware types
+        if (const auto * dt = typeid_cast<const DataTypeDateTime *>(column.type.get()))
+            column_timezones[column.name] = dt->getTimeZone().getTimeZone();
+        else if (const auto * dt64 = typeid_cast<const DataTypeDateTime64 *>(column.type.get()))
+            column_timezones[column.name] = dt64->getTimeZone().getTimeZone();
+        else if (const auto * t = typeid_cast<const DataTypeTime *>(column.type.get()))
+            column_timezones[column.name] = t->getTimeZone().getTimeZone();
+        else if (const auto * t64 = typeid_cast<const DataTypeTime64 *>(column.type.get()))
+            column_timezones[column.name] = t64->getTimeZone().getTimeZone();
     }
 }
 
@@ -68,9 +84,38 @@ py::object PandasDataFrameBuilder::genDataFrame(const py::handle & dict)
 	}
 
 	auto df = pandas.attr("DataFrame").attr("from_dict")(dict);
+
+	/// Apply timezone conversion for timezone-aware columns
+	changeToTZType(df);
+
 	return df;
 }
 
+void PandasDataFrameBuilder::changeToTZType(py::object & df)
+{
+    if (column_timezones.empty())
+        return;
+
+    for (const auto & [column_name, timezone_str] : column_timezones)
+    {
+        /// Check if column exists in DataFrame
+        if (!df.attr("__contains__")(column_name).cast<bool>())
+            continue;
+
+        /// Get the column
+        auto column = df[column_name.c_str()];
+
+        /// First localize to UTC (assuming the timestamps are in UTC)
+        auto utc_localized = column.attr("dt").attr("tz_localize")("UTC");
+
+        /// Then convert to the target timezone
+        auto tz_converted = utc_localized.attr("dt").attr("tz_convert")(timezone_str);
+
+        /// Update the column in DataFrame
+        df.attr("__setitem__")(column_name.c_str(), tz_converted);
+    }
+}
+
 void PandasDataFrameBuilder::finalize()
 {
     if (is_finalized)
diff --git a/programs/local/PandasDataFrameBuilder.h b/programs/local/PandasDataFrameBuilder.h
index cbb17811da7..2f45b08e866 100644
--- a/programs/local/PandasDataFrameBuilder.h
+++ b/programs/local/PandasDataFrameBuilder.h
@@ -7,6 +7,7 @@
 #include <Processors/Chunk.h>
 #include <DataTypes/IDataType.h>
 #include <Common/logger_useful.h>
+#include <unordered_map>
 
 namespace DB
 {
@@ -29,10 +30,14 @@ class PandasDataFrameBuilder
 
 private:
     pybind11::object genDataFrame(const pybind11::handle & dict);
+    void changeToTZType(pybind11::object & df);
 
     std::vector<String> column_names;
     std::vector<DataTypePtr> column_types;
 
+    /// Map column name to timezone string for timezone-aware types
+    std::unordered_map<String, String> column_timezones;
+
     std::vector<Chunk> chunks;
     std::vector<CHDB::NumpyArray> columns_data;
 

From 2a09093f5174e2c2ae04d5e5fceb22dae5bed2a2 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Sat, 1 Nov 2025 21:39:35 +0800
Subject: [PATCH 09/22] chore: add more CH types

---
 programs/local/NumpyArray.cpp | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/programs/local/NumpyArray.cpp b/programs/local/NumpyArray.cpp
index b7bf32ca7cf..a73103eb518 100644
--- a/programs/local/NumpyArray.cpp
+++ b/programs/local/NumpyArray.cpp
@@ -224,6 +224,35 @@ void NumpyArray::append(const ColumnPtr & column)
 	case TypeIndex::DateTime64:
 		may_have_null = CHColumnToNumpyArray<Int64>(append_data);
 		break;
+	case TypeIndex::Time:
+	case TypeIndex::Time64:
+	case TypeIndex::String:
+	case TypeIndex::FixedString:
+	case TypeIndex::Enum8:
+	case TypeIndex::Enum16:
+	case TypeIndex::Decimal32:
+	case TypeIndex::Decimal64:
+	case TypeIndex::Decimal128:
+	case TypeIndex::Decimal256:
+	case TypeIndex::UUID:
+	case TypeIndex::Array:
+	case TypeIndex::Tuple:
+	case TypeIndex::Set:
+	case TypeIndex::Interval:
+	case TypeIndex::Map:
+    case TypeIndex::Object:
+    case TypeIndex::IPv4:
+    case TypeIndex::IPv6:
+    case TypeIndex::JSONPaths:
+    case TypeIndex::Variant:
+    case TypeIndex::Dynamic:
+		/// TODO
+		break;
+
+	case TypeIndex::ObjectDeprecated:  /// Deprecated type, should not appear in normal data processing
+	case TypeIndex::Function:          /// Function types are not data types, should not appear here
+	case TypeIndex::AggregateFunction: /// Aggregate function types are not data types, should not appear here
+	case TypeIndex::LowCardinality:    /// LowCardinality should be unwrapped before reaching this point
 	default:
 		throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type {}", data_array->type->getName());
 	}

From af9cc164dbff3790f215558c2934653bef780f75 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Sun, 2 Nov 2025 23:52:35 +0800
Subject: [PATCH 10/22] chore: support time and time64 types

---
 programs/local/NumpyArray.cpp             | 4 ++++
 programs/local/PandasDataFrameBuilder.cpp | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/programs/local/NumpyArray.cpp b/programs/local/NumpyArray.cpp
index a73103eb518..c4335e699c6 100644
--- a/programs/local/NumpyArray.cpp
+++ b/programs/local/NumpyArray.cpp
@@ -225,7 +225,11 @@ void NumpyArray::append(const ColumnPtr & column)
 		may_have_null = CHColumnToNumpyArray<Int64>(append_data);
 		break;
 	case TypeIndex::Time:
+		may_have_null = TransformColumn<Int32, Int64, RegularConvert>(append_data);
+		break;
 	case TypeIndex::Time64:
+		may_have_null = CHColumnToNumpyArray<Int64>(append_data);
+		break;
 	case TypeIndex::String:
 	case TypeIndex::FixedString:
 	case TypeIndex::Enum8:
diff --git a/programs/local/PandasDataFrameBuilder.cpp b/programs/local/PandasDataFrameBuilder.cpp
index 5764f6eab39..9d81271a563 100644
--- a/programs/local/PandasDataFrameBuilder.cpp
+++ b/programs/local/PandasDataFrameBuilder.cpp
@@ -43,10 +43,6 @@ PandasDataFrameBuilder::PandasDataFrameBuilder(const Block & sample)
             column_timezones[column.name] = dt->getTimeZone().getTimeZone();
         else if (const auto * dt64 = typeid_cast<const DataTypeDateTime64 *>(column.type.get()))
             column_timezones[column.name] = dt64->getTimeZone().getTimeZone();
-        else if (const auto * t = typeid_cast<const DataTypeTime *>(column.type.get()))
-            column_timezones[column.name] = t->getTimeZone().getTimeZone();
-        else if (const auto * t64 = typeid_cast<const DataTypeTime64 *>(column.type.get()))
-            column_timezones[column.name] = t64->getTimeZone().getTimeZone();
     }
 }
 

From 373bd5e3556d3980a089e31cae559ca74d454816 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Mon, 3 Nov 2025 02:47:51 +0800
Subject: [PATCH 11/22] chore: support more types

---
 programs/local/IPAddressCacheItem.h |  25 +++
 programs/local/NumpyArray.cpp       | 304 +++++++++++++++++++++++++++-
 programs/local/NumpyType.cpp        |   9 +-
 programs/local/PythonImportCache.h  |   7 +-
 programs/local/UUIDCacheItem.h      |  21 ++
 5 files changed, 355 insertions(+), 11 deletions(-)
 create mode 100644 programs/local/IPAddressCacheItem.h
 create mode 100644 programs/local/UUIDCacheItem.h

diff --git a/programs/local/IPAddressCacheItem.h b/programs/local/IPAddressCacheItem.h
new file mode 100644
index 00000000000..2d51a1a3e43
--- /dev/null
+++ b/programs/local/IPAddressCacheItem.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "PythonImportCacheItem.h"
+
+namespace CHDB {
+
+struct IPAddressCacheItem : public PythonImportCacheItem
+{
+public:
+	static constexpr const char * Name = "ipaddress";
+
+	IPAddressCacheItem()
+        : PythonImportCacheItem("ipaddress")
+        , ipv4_address("IPv4Address", this)
+        , ipv6_address("IPv6Address", this)
+	{
+	}
+
+	~IPAddressCacheItem() override = default;
+
+	PythonImportCacheItem ipv4_address;
+	PythonImportCacheItem ipv6_address;
+};
+
+} // namespace CHDB
diff --git a/programs/local/NumpyArray.cpp b/programs/local/NumpyArray.cpp
index c4335e699c6..cb0c949f001 100644
--- a/programs/local/NumpyArray.cpp
+++ b/programs/local/NumpyArray.cpp
@@ -1,13 +1,23 @@
 #include "NumpyArray.h"
 #include "NumpyType.h"
+#include "PythonImporter.h"
 
 #include <Processors/Chunk.h>
 #include <base/defines.h>
 #include <Columns/ColumnFixedSizeHelper.h>
+#include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnNullable.h>
+#include <Columns/ColumnString.h>
 #include <Columns/IColumn.h>
 #include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesDecimal.h>
 #include <base/types.h>
+#include <base/UUID.h>
+#include <base/IPv4andIPv6.h>
+#include <IO/WriteHelpers.h>
+#include <Common/formatIPv6.h>
+#include <pybind11/pytypes.h>
 
 namespace DB
 {
@@ -15,6 +25,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
+	extern const int LOGICAL_ERROR
 }
 
 }
@@ -76,16 +87,21 @@ static bool TransformColumn(NumpyAppendData & append_data)
 	auto * dest_ptr = reinterpret_cast<NUMPYTYPE *>(append_data.target_data);
 	auto * mask_ptr = append_data.target_mask;
 
-	for (size_t i = 0; i < append_data.count; i++) {
+	for (size_t i = 0; i < append_data.count; i++)
+	{
 		size_t offset = append_data.dest_offset + i;
-		if (nullable_column && nullable_column->isNullAt(i)) {
+		if (nullable_column && nullable_column->isNullAt(i))
+		{
 			dest_ptr[offset] = CONVERT::template nullValue<NUMPYTYPE>(mask_ptr[offset]);
 			has_null = has_null || mask_ptr[offset];
-		} else {
+		}
+		else
+		{
 			dest_ptr[offset] = CONVERT::template convertValue<CHTYPE, NUMPYTYPE>(src_ptr[i], append_data);
 			mask_ptr[offset] = false;
 		}
 	}
+
 	return has_null;
 }
 
@@ -95,6 +111,243 @@ static bool CHColumnToNumpyArray(NumpyAppendData & append_data)
 	return TransformColumn<T, T, RegularConvert>(append_data);
 }
 
+template <typename DecimalType>
+static bool CHColumnDecimalToNumpyArray(NumpyAppendData & append_data, const DataTypePtr & data_type)
+{
+	bool has_null = false;
+	const IColumn * data_column = &append_data.column;
+	const ColumnNullable * nullable_column = nullptr;
+
+	/// Check if column is nullable
+	if (const auto * nullable = typeid_cast<const ColumnNullable *>(&append_data.column))
+	{
+		nullable_column = nullable;
+		data_column = &nullable->getNestedColumn();
+	}
+
+	const auto * decimal_column = typeid_cast<const ColumnDecimal<DecimalType> *>(data_column);
+	if (!decimal_column)
+		throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnDecimal");
+
+	/// Get scale from data type to convert integer to actual decimal value
+	const auto * decimal_type = typeid_cast<const DataTypeDecimal<DecimalType> *>(data_type.get());
+	if (!decimal_type)
+		throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected DataTypeDecimal");
+
+	auto scale_multiplier = decimal_type->getScaleMultiplier();
+	double scale_multiplier_double = static_cast<double>(scale_multiplier.value);
+
+	auto * dest_ptr = reinterpret_cast<double *>(append_data.target_data);
+	auto * mask_ptr = append_data.target_mask;
+
+	for (size_t i = 0; i < append_data.count; i++)
+	{
+		size_t offset = append_data.dest_offset + i;
+		if (nullable_column && nullable_column->isNullAt(i))
+		{
+			/// Set to 0.0 for null values
+			dest_ptr[offset] = 0.0;
+			mask_ptr[offset] = true;
+			has_null = true;
+		}
+		else
+		{
+			/// Convert decimal integer value to actual decimal by dividing by scale multiplier
+			auto decimal_value = decimal_column->getElement(i);
+			dest_ptr[offset] = static_cast<double>(decimal_value.value) / scale_multiplier_double;
+			mask_ptr[offset] = false;
+		}
+	}
+
+	return has_null;
+}
+
+static bool CHColumnUUIDToNumpyArray(NumpyAppendData & append_data)
+{
+	bool has_null = false;
+	const IColumn * data_column = &append_data.column;
+	const ColumnNullable * nullable_column = nullptr;
+
+	/// Check if column is nullable
+	if (const auto * nullable = typeid_cast<const ColumnNullable *>(&append_data.column))
+	{
+		nullable_column = nullable;
+		data_column = &nullable->getNestedColumn();
+	}
+
+	const auto * uuid_column = typeid_cast<const ColumnVector<UUID> *>(data_column);
+	if (!uuid_column)
+		throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnVector<UUID>");
+
+	auto * dest_ptr = reinterpret_cast<PyObject **>(append_data.target_data);
+	auto * mask_ptr = append_data.target_mask;
+
+	for (size_t i = 0; i < append_data.count; i++)
+	{
+		size_t offset = append_data.dest_offset + i;
+		if (nullable_column && nullable_column->isNullAt(i))
+		{
+			Py_INCREF(Py_None);
+			dest_ptr[offset] = Py_None;
+			has_null = true;
+			mask_ptr[offset] = true;
+		}
+		else
+		{
+			/// Convert UUID to Python uuid.UUID object
+			UUID uuid_value = uuid_column->getElement(i);
+			const auto formatted_uuid = formatUUID(uuid_value);
+			const char * uuid_str = formatted_uuid.data();
+			const size_t uuid_str_len = formatted_uuid.size();
+
+			/// Create Python uuid.UUID object
+			auto & import_cache = PythonImporter::ImportCache();
+			py::handle uuid_handle = import_cache.uuid.UUID()(String(uuid_str, uuid_str_len)).release();
+			dest_ptr[offset] = uuid_handle.ptr();
+			mask_ptr[offset] = false;
+		}
+	}
+
+	return has_null;
+}
+
+static bool CHColumnIPv4ToNumpyArray(NumpyAppendData & append_data)
+{
+	bool has_null = false;
+	const IColumn * data_column = &append_data.column;
+	const ColumnNullable * nullable_column = nullptr;
+
+	/// Check if column is nullable
+	if (const auto * nullable = typeid_cast<const ColumnNullable *>(&append_data.column))
+	{
+		nullable_column = nullable;
+		data_column = &nullable->getNestedColumn();
+	}
+
+	const auto * ipv4_column = typeid_cast<const ColumnVector<IPv4> *>(data_column);
+	if (!ipv4_column)
+		throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnVector<IPv4>");
+
+	auto * dest_ptr = reinterpret_cast<PyObject **>(append_data.target_data);
+
+	for (size_t i = 0; i < append_data.count; i++)
+	{
+		size_t offset = append_data.dest_offset + i;
+		if (nullable_column && nullable_column->isNullAt(i))
+		{
+			Py_INCREF(Py_None);
+			dest_ptr[offset] = Py_None;
+			has_null = true;
+		}
+		else
+		{
+			/// Convert IPv4 to Python ipaddress.IPv4Address object
+			IPv4 ipv4_value = ipv4_column->getElement(i);
+
+			char ipv4_str[IPV4_MAX_TEXT_LENGTH];
+			char * ptr = ipv4_str;
+			formatIPv4(reinterpret_cast<const unsigned char*>(&ipv4_value), ptr);
+			const size_t ipv4_str_len = ptr - ipv4_str;
+
+			/// Create Python ipaddress.IPv4Address object
+			auto & import_cache = PythonImporter::ImportCache();
+			py::handle ipv4_handle = import_cache.ipaddress.ipv4_address()(String(ipv4_str, ipv4_str_len)).release();
+			dest_ptr[offset] = ipv4_handle.ptr();
+		}
+	}
+
+	return has_null;
+}
+
+static bool CHColumnIPv6ToNumpyArray(NumpyAppendData & append_data)
+{
+	bool has_null = false;
+	const IColumn * data_column = &append_data.column;
+	const ColumnNullable * nullable_column = nullptr;
+
+	/// Check if column is nullable
+	if (const auto * nullable = typeid_cast<const ColumnNullable *>(&append_data.column))
+	{
+		nullable_column = nullable;
+		data_column = &nullable->getNestedColumn();
+	}
+
+	const auto * ipv6_column = typeid_cast<const ColumnVector<IPv6> *>(data_column);
+	if (!ipv6_column)
+		throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnVector<IPv6>");
+
+	auto * dest_ptr = reinterpret_cast<PyObject **>(append_data.target_data);
+
+	for (size_t i = 0; i < append_data.count; i++)
+	{
+		size_t offset = append_data.dest_offset + i;
+		if (nullable_column && nullable_column->isNullAt(i))
+		{
+			Py_INCREF(Py_None);
+			dest_ptr[offset] = Py_None;
+			has_null = true;
+		}
+		else
+		{
+			/// Convert IPv6 to Python ipaddress.IPv6Address object
+			IPv6 ipv6_value = ipv6_column->getElement(i);
+
+			/// Use ClickHouse's built-in IPv6 formatting function
+			char ipv6_str[IPV6_MAX_TEXT_LENGTH];
+			char * ptr = ipv6_str;
+			formatIPv6(reinterpret_cast<const unsigned char*>(&ipv6_value), ptr);
+			const size_t ipv6_str_len = ptr - ipv6_str;
+
+			/// Create Python ipaddress.IPv6Address object
+			auto & import_cache = PythonImporter::ImportCache();
+			py::handle ipv6_handle = import_cache.ipaddress.ipv6_address()(String(ipv6_str, ipv6_str_len)).release();
+			dest_ptr[offset] = ipv6_handle.ptr();
+		}
+	}
+
+	return has_null;
+}
+
+template <typename StringColumnType>
+static bool CHColumnStringToNumpyArray(NumpyAppendData & append_data)
+{
+	bool has_null = false;
+	const IColumn * data_column = &append_data.column;
+	const ColumnNullable * nullable_column = nullptr;
+
+	/// Check if column is nullable
+	if (const auto * nullable = typeid_cast<const ColumnNullable *>(&append_data.column))
+	{
+		nullable_column = nullable;
+		data_column = &nullable->getNestedColumn();
+	}
+
+	const auto * string_column = typeid_cast<const StringColumnType *>(data_column);
+	if (!string_column)
+		throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected String ColumnType");
+
+	auto * dest_ptr = reinterpret_cast<PyObject **>(append_data.target_data);
+
+	for (size_t i = 0; i < append_data.count; i++)
+	{
+		size_t offset = append_data.dest_offset + i;
+		if (nullable_column && nullable_column->isNullAt(i))
+		{
+			Py_INCREF(Py_None);
+			dest_ptr[offset] = Py_None;
+		}
+		else
+		{
+			StringRef str_ref = string_column->getDataAt(i);
+			auto * str_ptr = const_cast<char *>(str_ref.data);
+			auto str_size = str_ref.size;
+			dest_ptr[offset] = PyUnicode_FromStringAndSize(str_ptr, str_size);
+		}
+	}
+
+	return has_null;
+}
+
 InternalNumpyArray::InternalNumpyArray(const DataTypePtr & type_)
 	: data(nullptr)
 	, type(type_)
@@ -159,7 +412,14 @@ void NumpyArray::append(const ColumnPtr & column)
 	append_data.target_mask = mask_ptr;
 	append_data.dest_offset = data_array->count - size;
 
-	switch (data_array->type->getTypeId())
+	/// For nullable types, we need to get the nested type
+	DataTypePtr actual_type = data_array->type;
+	if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(data_array->type.get()))
+	{
+		actual_type = nullable_type->getNestedType();
+	}
+
+	switch (actual_type->getTypeId())
 	{
 	case TypeIndex::Int8:
 		may_have_null = CHColumnToNumpyArray<Int8>(append_data);
@@ -231,32 +491,60 @@ void NumpyArray::append(const ColumnPtr & column)
 		may_have_null = CHColumnToNumpyArray<Int64>(append_data);
 		break;
 	case TypeIndex::String:
+		may_have_null = CHColumnStringToNumpyArray<ColumnString>(append_data);
+		break;
 	case TypeIndex::FixedString:
+		may_have_null = CHColumnStringToNumpyArray<ColumnFixedString>(append_data);
+		break;
 	case TypeIndex::Enum8:
+		may_have_null = CHColumnToNumpyArray<Int8>(append_data);
+		break;
 	case TypeIndex::Enum16:
+		may_have_null = CHColumnToNumpyArray<Int16>(append_data);
+		break;
 	case TypeIndex::Decimal32:
+		may_have_null = CHColumnDecimalToNumpyArray<Decimal32>(append_data, actual_type);
+		break;
 	case TypeIndex::Decimal64:
+		may_have_null = CHColumnDecimalToNumpyArray<Decimal64>(append_data, actual_type);
+		break;
 	case TypeIndex::Decimal128:
+		may_have_null = CHColumnDecimalToNumpyArray<Decimal128>(append_data, actual_type);
+		break;
 	case TypeIndex::Decimal256:
+		may_have_null = CHColumnDecimalToNumpyArray<Decimal256>(append_data, actual_type);
+		break;
 	case TypeIndex::UUID:
+		may_have_null = CHColumnUUIDToNumpyArray(append_data);
+		break;
 	case TypeIndex::Array:
 	case TypeIndex::Tuple:
 	case TypeIndex::Set:
 	case TypeIndex::Interval:
+		may_have_null = CHColumnToNumpyArray<Int64>(append_data);
+		break;
 	case TypeIndex::Map:
     case TypeIndex::Object:
     case TypeIndex::IPv4:
+		may_have_null = CHColumnIPv4ToNumpyArray(append_data);
+		break;
     case TypeIndex::IPv6:
+		may_have_null = CHColumnIPv6ToNumpyArray(append_data);
+		break;
     case TypeIndex::JSONPaths:
     case TypeIndex::Variant:
     case TypeIndex::Dynamic:
 		/// TODO
 		break;
 
-	case TypeIndex::ObjectDeprecated:  /// Deprecated type, should not appear in normal data processing
-	case TypeIndex::Function:          /// Function types are not data types, should not appear here
-	case TypeIndex::AggregateFunction: /// Aggregate function types are not data types, should not appear here
-	case TypeIndex::LowCardinality:    /// LowCardinality should be unwrapped before reaching this point
+	/// Deprecated type, should not appear in normal data processing
+	case TypeIndex::ObjectDeprecated:
+	/// Function types are not data types, should not appear here
+	case TypeIndex::Function:
+	/// Aggregate function types are not data types, should not appear here
+	case TypeIndex::AggregateFunction:
+	/// LowCardinality should be unwrapped before reaching this point
+	case TypeIndex::LowCardinality:
 	default:
 		throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type {}", data_array->type->getName());
 	}
diff --git a/programs/local/NumpyType.cpp b/programs/local/NumpyType.cpp
index 45d03ac786a..aa5f760a79c 100644
--- a/programs/local/NumpyType.cpp
+++ b/programs/local/NumpyType.cpp
@@ -342,7 +342,8 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
                     case IntervalKind::Kind::Month:
                         return "timedelta64[M]";
                     case IntervalKind::Kind::Quarter:
-                        return "object";
+                        /// numpy doesn't have quarter type, use int64
+                        return "int64";
                     case IntervalKind::Kind::Year:
                         return "timedelta64[Y]";
                     default:
@@ -360,7 +361,7 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
     case TypeIndex::Decimal64:
     case TypeIndex::Decimal128:
     case TypeIndex::Decimal256:
-        return "object";
+        return "float64";
     case TypeIndex::Array:
     case TypeIndex::Tuple:
     case TypeIndex::Map:
@@ -369,6 +370,10 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
     case TypeIndex::Variant:
     case TypeIndex::Object:
         return "object";
+    case TypeIndex::Enum8:
+        return "int8";
+    case TypeIndex::Enum16:
+        return "int16";
     case TypeIndex::Nullable:
         {
             if (const auto * nullable = typeid_cast<const DataTypeNullable *>(data_type.get()))
diff --git a/programs/local/PythonImportCache.h b/programs/local/PythonImportCache.h
index 1703a5103a5..382bb34358d 100644
--- a/programs/local/PythonImportCache.h
+++ b/programs/local/PythonImportCache.h
@@ -6,6 +6,8 @@
 #include "PandasCacheItem.h"
 #include "PyArrowCacheItem.h"
 #include "PythonImportCacheItem.h"
+#include "UUIDCacheItem.h"
+#include "IPAddressCacheItem.h"
 
 #include <vector>
 
@@ -14,7 +16,8 @@ namespace CHDB {
 struct PythonImportCache;
 using PythonImportCachePtr = std::shared_ptr<PythonImportCache>;
 
-struct PythonImportCache {
+struct PythonImportCache
+{
 public:
 	explicit PythonImportCache()  = default;
 
@@ -25,6 +28,8 @@ struct PythonImportCache {
 	DatetimeCacheItem datetime;
 	DecimalCacheItem decimal;
 	NumpyCacheItem numpy;
+	UUIDCacheItem uuid;
+	IPAddressCacheItem ipaddress;
 
 	py::handle AddCache(py::object item);
 
diff --git a/programs/local/UUIDCacheItem.h b/programs/local/UUIDCacheItem.h
new file mode 100644
index 00000000000..ee21b48ca22
--- /dev/null
+++ b/programs/local/UUIDCacheItem.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "PythonImportCacheItem.h"
+
+namespace CHDB {
+
+struct UUIDCacheItem : public PythonImportCacheItem
+{
+public:
+	static constexpr const char * Name = "uuid";
+
+	UUIDCacheItem() : PythonImportCacheItem("uuid"), UUID("UUID", this)
+	{
+	}
+
+	~UUIDCacheItem() override = default;
+
+	PythonImportCacheItem UUID;
+};
+
+} // namespace CHDB

From dd55a088a1fd7f9e4177b07c2d4f1824bcd92618 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Mon, 3 Nov 2025 17:23:55 +0800
Subject: [PATCH 12/22] chore: support nested types

---
 programs/local/NumpyArray.cpp       | 121 +++++++++++++------
 programs/local/NumpyArray.h         |  17 +++
 programs/local/NumpyNestedTypes.cpp | 180 ++++++++++++++++++++++++++++
 programs/local/NumpyNestedTypes.h   |  20 ++++
 programs/local/NumpyType.cpp        |   4 +-
 programs/local/NumpyType.h          |   2 +-
 6 files changed, 304 insertions(+), 40 deletions(-)
 create mode 100644 programs/local/NumpyNestedTypes.cpp
 create mode 100644 programs/local/NumpyNestedTypes.h

diff --git a/programs/local/NumpyArray.cpp b/programs/local/NumpyArray.cpp
index cb0c949f001..470478052ea 100644
--- a/programs/local/NumpyArray.cpp
+++ b/programs/local/NumpyArray.cpp
@@ -1,5 +1,6 @@
 #include "NumpyArray.h"
 #include "NumpyType.h"
+#include "NumpyNestedTypes.h"
 #include "PythonImporter.h"
 
 #include <Processors/Chunk.h>
@@ -25,7 +26,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
-	extern const int LOGICAL_ERROR
+	extern const int LOGICAL_ERROR;
 }
 
 }
@@ -35,22 +36,6 @@ using namespace DB;
 namespace CHDB
 {
 
-struct NumpyAppendData
-{
-public:
-	explicit NumpyAppendData(const IColumn & column)
-		: column(column)
-	{
-	}
-
-	const IColumn & column;
-
-	size_t count;
-	size_t dest_offset;
-	UInt8 * target_data;
-	bool * target_mask;
-};
-
 struct RegularConvert
 {
 	template <class CHTYPE, class NUMPYTYPE>
@@ -87,7 +72,7 @@ static bool TransformColumn(NumpyAppendData & append_data)
 	auto * dest_ptr = reinterpret_cast<NUMPYTYPE *>(append_data.target_data);
 	auto * mask_ptr = append_data.target_mask;
 
-	for (size_t i = 0; i < append_data.count; i++)
+	for (size_t i = append_data.src_offset; i < append_data.src_offset + append_data.src_count; i++)
 	{
 		size_t offset = append_data.dest_offset + i;
 		if (nullable_column && nullable_column->isNullAt(i))
@@ -140,7 +125,7 @@ static bool CHColumnDecimalToNumpyArray(NumpyAppendData & append_data, const Dat
 	auto * dest_ptr = reinterpret_cast<double *>(append_data.target_data);
 	auto * mask_ptr = append_data.target_mask;
 
-	for (size_t i = 0; i < append_data.count; i++)
+	for (size_t i = append_data.src_offset; i < append_data.src_offset + append_data.src_count; i++)
 	{
 		size_t offset = append_data.dest_offset + i;
 		if (nullable_column && nullable_column->isNullAt(i))
@@ -182,13 +167,12 @@ static bool CHColumnUUIDToNumpyArray(NumpyAppendData & append_data)
 	auto * dest_ptr = reinterpret_cast<PyObject **>(append_data.target_data);
 	auto * mask_ptr = append_data.target_mask;
 
-	for (size_t i = 0; i < append_data.count; i++)
+	for (size_t i = append_data.src_offset; i < append_data.src_offset + append_data.src_count; i++)
 	{
 		size_t offset = append_data.dest_offset + i;
 		if (nullable_column && nullable_column->isNullAt(i))
 		{
-			Py_INCREF(Py_None);
-			dest_ptr[offset] = Py_None;
+			dest_ptr[offset] = nullptr;
 			has_null = true;
 			mask_ptr[offset] = true;
 		}
@@ -229,15 +213,16 @@ static bool CHColumnIPv4ToNumpyArray(NumpyAppendData & append_data)
 		throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnVector<IPv4>");
 
 	auto * dest_ptr = reinterpret_cast<PyObject **>(append_data.target_data);
+	auto * mask_ptr = append_data.target_mask;
 
-	for (size_t i = 0; i < append_data.count; i++)
+	for (size_t i = append_data.src_offset; i < append_data.src_offset + append_data.src_count; i++)
 	{
 		size_t offset = append_data.dest_offset + i;
 		if (nullable_column && nullable_column->isNullAt(i))
 		{
-			Py_INCREF(Py_None);
-			dest_ptr[offset] = Py_None;
+			dest_ptr[offset] = nullptr;
 			has_null = true;
+			mask_ptr[offset] = true;
 		}
 		else
 		{
@@ -253,6 +238,7 @@ static bool CHColumnIPv4ToNumpyArray(NumpyAppendData & append_data)
 			auto & import_cache = PythonImporter::ImportCache();
 			py::handle ipv4_handle = import_cache.ipaddress.ipv4_address()(String(ipv4_str, ipv4_str_len)).release();
 			dest_ptr[offset] = ipv4_handle.ptr();
+			mask_ptr[offset] = false;
 		}
 	}
 
@@ -277,15 +263,16 @@ static bool CHColumnIPv6ToNumpyArray(NumpyAppendData & append_data)
 		throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnVector<IPv6>");
 
 	auto * dest_ptr = reinterpret_cast<PyObject **>(append_data.target_data);
+	auto * mask_ptr = append_data.target_mask;
 
-	for (size_t i = 0; i < append_data.count; i++)
+	for (size_t i = append_data.src_offset; i < append_data.src_offset + append_data.src_count; i++)
 	{
 		size_t offset = append_data.dest_offset + i;
 		if (nullable_column && nullable_column->isNullAt(i))
 		{
-			Py_INCREF(Py_None);
-			dest_ptr[offset] = Py_None;
+			dest_ptr[offset] = nullptr;
 			has_null = true;
+			mask_ptr[offset] = true;
 		}
 		else
 		{
@@ -302,6 +289,7 @@ static bool CHColumnIPv6ToNumpyArray(NumpyAppendData & append_data)
 			auto & import_cache = PythonImporter::ImportCache();
 			py::handle ipv6_handle = import_cache.ipaddress.ipv6_address()(String(ipv6_str, ipv6_str_len)).release();
 			dest_ptr[offset] = ipv6_handle.ptr();
+			mask_ptr[offset] = false;
 		}
 	}
 
@@ -328,7 +316,7 @@ static bool CHColumnStringToNumpyArray(NumpyAppendData & append_data)
 
 	auto * dest_ptr = reinterpret_cast<PyObject **>(append_data.target_data);
 
-	for (size_t i = 0; i < append_data.count; i++)
+	for (size_t i = append_data.src_offset; i < append_data.src_offset + append_data.src_count; i++)
 	{
 		size_t offset = append_data.dest_offset + i;
 		if (nullable_column && nullable_column->isNullAt(i))
@@ -348,6 +336,16 @@ static bool CHColumnStringToNumpyArray(NumpyAppendData & append_data)
 	return has_null;
 }
 
+NumpyAppendData::NumpyAppendData(const DB::IColumn & column)
+	: column(column)
+	, src_offset(0)
+	, src_count(0)
+	, dest_offset(0)
+	, target_data(nullptr)
+	, target_mask(nullptr)
+{
+}
+
 InternalNumpyArray::InternalNumpyArray(const DataTypePtr & type_)
 	: data(nullptr)
 	, type(type_)
@@ -390,7 +388,34 @@ void NumpyArray::resize(size_t capacity)
 	mask_array->resize(capacity);
 }
 
+static bool CHColumnNothingToNumpyArray(NumpyAppendData & append_data)
+{
+	/// Nothing type represents columns with no actual values, so we fill all positions with None
+	bool has_null = true;
+	auto * dest_ptr = reinterpret_cast<PyObject **>(append_data.target_data);
+	auto * mask_ptr = append_data.target_mask;
+
+	for (size_t i = append_data.src_offset; i < append_data.src_offset + append_data.src_count; i++)
+	{
+		size_t offset = append_data.dest_offset + i;
+
+		Py_INCREF(Py_None);
+		dest_ptr[offset] = Py_None;
+		mask_ptr[offset] = true;
+	}
+
+	return has_null;
+}
+
 void NumpyArray::append(const ColumnPtr & column)
+{
+	append(column, 0, column->size());
+}
+
+void NumpyArray::append(
+	const ColumnPtr & column,
+	size_t offset,
+	size_t count)
 {
 	chassert(data_array);
 	chassert(mask_array);
@@ -407,7 +432,8 @@ void NumpyArray::append(const ColumnPtr & column)
 	bool may_have_null = false;
 
 	NumpyAppendData append_data(*column);
-	append_data.count = size;
+	append_data.src_offset = offset;
+	append_data.src_offset + append_data.src_count = count;
 	append_data.target_data = data_ptr;
 	append_data.target_mask = mask_ptr;
 	append_data.dest_offset = data_array->count - size;
@@ -421,6 +447,9 @@ void NumpyArray::append(const ColumnPtr & column)
 
 	switch (actual_type->getTypeId())
 	{
+	case TypeIndex::Nothing:
+		may_have_null = CHColumnNothingToNumpyArray(append_data);
+		break;
 	case TypeIndex::Int8:
 		may_have_null = CHColumnToNumpyArray<Int8>(append_data);
 		break;
@@ -518,33 +547,49 @@ void NumpyArray::append(const ColumnPtr & column)
 		may_have_null = CHColumnUUIDToNumpyArray(append_data);
 		break;
 	case TypeIndex::Array:
+		may_have_null = CHColumnArrayToNumpyArray(append_data, actual_type);
+		break;
 	case TypeIndex::Tuple:
-	case TypeIndex::Set:
+		may_have_null = CHColumnTupleToNumpyArray(append_data, actual_type);
+		break;
 	case TypeIndex::Interval:
 		may_have_null = CHColumnToNumpyArray<Int64>(append_data);
 		break;
 	case TypeIndex::Map:
+		may_have_null = CHColumnMapToNumpyArray(append_data, actual_type);
+		break;
     case TypeIndex::Object:
-    case TypeIndex::IPv4:
+		may_have_null = CHColumnObjectToNumpyArray(append_data, actual_type);
+		break;
+	case TypeIndex::IPv4:
 		may_have_null = CHColumnIPv4ToNumpyArray(append_data);
 		break;
     case TypeIndex::IPv6:
 		may_have_null = CHColumnIPv6ToNumpyArray(append_data);
 		break;
-    case TypeIndex::JSONPaths:
-    case TypeIndex::Variant:
-    case TypeIndex::Dynamic:
-		/// TODO
+	case TypeIndex::Variant:
+		may_have_null = CHColumnVariantToNumpyArray(append_data, actual_type);
+		break;
+	case TypeIndex::Dynamic:
+		may_have_null = CHColumnDynamicToNumpyArray(append_data, actual_type);
 		break;
 
+	/// Set types are used only in WHERE clauses for IN operations, not in actual data storage
+	case TypeIndex::Set:
+	/// JSONPaths is an internal type used only for JSON schema inference,
+	case TypeIndex::JSONPaths:
 	/// Deprecated type, should not appear in normal data processing
 	case TypeIndex::ObjectDeprecated:
-	/// Function types are not data types, should not appear here
+	/// Function types are not actual data types, should not appear here
 	case TypeIndex::Function:
-	/// Aggregate function types are not data types, should not appear here
+	/// Aggregate function types are not actual data types, should not appear here
 	case TypeIndex::AggregateFunction:
 	/// LowCardinality should be unwrapped before reaching this point
 	case TypeIndex::LowCardinality:
+	/// Nullable cannot contain another Nullable type, so this should not appear in nested conversion
+	case TypeIndex::Nullable:
+	/// QBit type is supported in newer versions of ClickHouse
+	/// case TypeIndex::QBit:
 	default:
 		throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type {}", data_array->type->getName());
 	}
diff --git a/programs/local/NumpyArray.h b/programs/local/NumpyArray.h
index 7927faf1ec0..3c014dc79f8 100644
--- a/programs/local/NumpyArray.h
+++ b/programs/local/NumpyArray.h
@@ -9,6 +9,21 @@
 namespace CHDB
 {
 
+/// Data structure for appending column data to numpy arrays
+class NumpyAppendData
+{
+public:
+	explicit NumpyAppendData(const DB::IColumn & column);
+
+	const DB::IColumn & column;
+
+	size_t src_offset;
+	size_t src_count;
+	size_t dest_offset;
+	UInt8 * target_data;
+	bool * target_mask;
+};
+
 class InternalNumpyArray
 {
 public:
@@ -32,6 +47,8 @@ class NumpyArray {
 
 	void resize(size_t capacity);
 
+	void append(const DB::ColumnPtr & column, size_t offset, size_t count);
+
 	void append(const DB::ColumnPtr & column);
 
 	py::object toArray() const;
diff --git a/programs/local/NumpyNestedTypes.cpp b/programs/local/NumpyNestedTypes.cpp
new file mode 100644
index 00000000000..eafc3477fbb
--- /dev/null
+++ b/programs/local/NumpyNestedTypes.cpp
@@ -0,0 +1,180 @@
+#include "NumpyNestedTypes.h"
+#include "NumpyArray.h"
+
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnMap.h>
+#include <Columns/ColumnObject.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeObject.h>
+#include <Common/typeid_cast.h>
+#include <Common/Exception.h>
+#include <DataTypes/DataTypeVariant.h>
+#include <Processors/Formats/Impl/CHColumnToArrowColumn.h>
+#include <pybind11/pybind11.h>
+
+namespace CHDB
+{
+
+using namespace DB;
+namespace py = pybind11;
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
+}
+
+template <typename ColumnType>
+struct ColumnTraits;
+
+template <>
+struct ColumnTraits<ColumnArray>
+{
+    using DataType = DataTypeArray;
+
+    static py::object convertElement(const ColumnArray * column, const DataTypePtr & data_type, size_t index)
+    {
+        const auto * array_data_type = typeid_cast<const DataType *>(data_type.get());
+        if (!array_data_type)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected DataTypeArray");
+
+        const auto & offsets = column->getOffsets();
+        const auto & nested_column = column->getDataPtr();
+
+        size_t start_offset = (index == 0) ? 0 : offsets[index - 1];
+        size_t end_offset = offsets[index];
+        size_t array_size = end_offset - start_offset;
+
+        NumpyArray numpy_array(data_type);
+        numpy_array.init(array_size);
+        numpy_array.append(nested_column, start_offset, array_size);
+
+        return numpy_array.toArray();
+    }
+};
+
+template <>
+struct ColumnTraits<ColumnTuple>
+{
+    using DataType = DataTypeTuple;
+
+    static py::object convertElement(const ColumnTuple * column, const DataTypePtr & data_type, size_t index)
+    {
+    }
+};
+
+template <>
+struct ColumnTraits<ColumnMap>
+{
+    using DataType = DataTypeMap;
+
+    static py::object convertElement(const ColumnMap * column, const DataTypePtr & data_type, size_t index)
+    {
+    }
+};
+
+template <>
+struct ColumnTraits<ColumnObject>
+{
+    using DataType = DataTypeObject;
+
+    static py::object convertElement(const ColumnObject * column, const DataTypePtr & data_type, size_t index)
+    {
+    }
+};
+
+template <>
+struct ColumnTraits<ColumnVariant>
+{
+    using DataType = DataTypeVariant;
+
+    static py::object convertElement(const ColumnVariant * column, const DataTypePtr & data_type, size_t index)
+    {
+    }
+};
+
+template <>
+struct ColumnTraits<ColumnDynamic>
+{
+    using DataType = DataTypeDynamic;
+
+    static py::object convertElement(const ColumnDynamic * column, const DataTypePtr & data_type, size_t index)
+    {
+    }
+};
+
+template <typename ColumnType>
+bool CHNestedColumnToNumpyArray(NumpyAppendData & append_data, const DataTypePtr & data_type)
+{
+    bool has_null = false;
+    const IColumn * data_column = &append_data.column;
+    const ColumnNullable * nullable_column = nullptr;
+
+    if (const auto * nullable = typeid_cast<const ColumnNullable *>(&append_data.column))
+    {
+        nullable_column = nullable;
+        data_column = &nullable->getNestedColumn();
+        has_null = true;
+    }
+
+    const auto * typed_column = typeid_cast<const ColumnType *>(data_column);
+    if (!typed_column)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected specific column type");
+
+    auto * dest_ptr = reinterpret_cast<py::object *>(append_data.target_data);
+    auto * mask_ptr = append_data.target_mask;
+
+    for (size_t i = append_data.src_offset; i < append_data.src_offset + append_data.src_count; i++)
+    {
+        size_t offset = append_data.dest_offset + i;
+        if (nullable_column && nullable_column->isNullAt(i))
+        {
+            dest_ptr[offset] = py::none();
+            mask_ptr[offset] = true;
+            has_null = true;
+        }
+        else
+        {
+            dest_ptr[offset] = ColumnTraits<ColumnType>::convertElement(typed_column, data_type, i);
+            mask_ptr[offset] = false;
+        }
+    }
+
+    return has_null;
+}
+
+bool CHColumnArrayToNumpyArray(NumpyAppendData & append_data, const DataTypePtr & data_type)
+{
+    return CHNestedColumnToNumpyArray<ColumnArray>(append_data, data_type);
+}
+
+bool CHColumnTupleToNumpyArray(NumpyAppendData & append_data, const DataTypePtr & data_type)
+{
+    return CHNestedColumnToNumpyArray<ColumnTuple>(append_data, data_type);
+}
+
+bool CHColumnMapToNumpyArray(NumpyAppendData & append_data, const DataTypePtr & data_type)
+{
+    return CHNestedColumnToNumpyArray<ColumnMap>(append_data, data_type);
+}
+
+bool CHColumnObjectToNumpyArray(NumpyAppendData & append_data, const DataTypePtr & data_type)
+{
+    return CHNestedColumnToNumpyArray<ColumnObject>(append_data, data_type);
+}
+
+bool CHColumnVariantToNumpyArray(NumpyAppendData & append_data, const DataTypePtr & data_type)
+{
+    return CHNestedColumnToNumpyArray<ColumnVariant>(append_data, data_type);
+}
+
+bool CHColumnDynamicToNumpyArray(NumpyAppendData & append_data, const DataTypePtr & data_type)
+{
+    return CHNestedColumnToNumpyArray<ColumnDynamic>(append_data, data_type);
+}
+
+} // namespace CHDB
diff --git a/programs/local/NumpyNestedTypes.h b/programs/local/NumpyNestedTypes.h
new file mode 100644
index 00000000000..b3e0a68520e
--- /dev/null
+++ b/programs/local/NumpyNestedTypes.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "NumpyArray.h"
+
+namespace CHDB
+{
+
+bool CHColumnArrayToNumpyArray(NumpyAppendData & append_data, const DB::DataTypePtr & data_type);
+
+bool CHColumnTupleToNumpyArray(NumpyAppendData & append_data, const DB::DataTypePtr & data_type);
+
+bool CHColumnMapToNumpyArray(NumpyAppendData & append_data, const DB::DataTypePtr & data_type);
+
+bool CHColumnObjectToNumpyArray(NumpyAppendData & append_data, const DB::DataTypePtr & data_type);
+
+bool CHColumnVariantToNumpyArray(NumpyAppendData & append_data, const DB::DataTypePtr & data_type);
+
+bool CHColumnDynamicToNumpyArray(NumpyAppendData & append_data, const DB::DataTypePtr & data_type);
+
+} // namespace CHDB
diff --git a/programs/local/NumpyType.cpp b/programs/local/NumpyType.cpp
index aa5f760a79c..9f2bb23216a 100644
--- a/programs/local/NumpyType.cpp
+++ b/programs/local/NumpyType.cpp
@@ -241,6 +241,8 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
     TypeIndex type_id = data_type->getTypeId();
     switch (type_id)
     {
+    case TypeIndex::Nothing:
+        return "object";
     case TypeIndex::Int8:
         return "int8";
     case TypeIndex::UInt8:
@@ -380,7 +382,7 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
             {
                 return DataTypeToNumpyTypeStr(nullable->getNestedType());
             }
-            return "object";
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected nullable type {}", data_type->getName());
         }
     default:
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type {}", data_type->getName());
diff --git a/programs/local/NumpyType.h b/programs/local/NumpyType.h
index 8a72ece2bb1..787bfcd857a 100644
--- a/programs/local/NumpyType.h
+++ b/programs/local/NumpyType.h
@@ -51,7 +51,7 @@ NumpyType ConvertNumpyType(const py::handle & col_type);
 
 std::shared_ptr<DB::IDataType> NumpyToDataType(const NumpyType & col_type);
 
-String DataTypeToNumpyTypeStr(const std::shared_ptr<DB::IDataType> & data_type);
+String DataTypeToNumpyTypeStr(const std::shared_ptr<const DB::IDataType> & data_type);
 
 py::object ConvertNumpyDtype(py::handle & numpy_array);
 

From afd902aca59ef27d5928614207ba4044414c0bf0 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Tue, 4 Nov 2025 03:49:27 +0800
Subject: [PATCH 13/22] chore: support converting filed to python object

---
 programs/local/FieldToPython.cpp    | 386 ++++++++++++++++++++++++++++
 programs/local/FieldToPython.h      |  14 +
 programs/local/NumpyArray.cpp       | 257 ++++++++++++++++--
 programs/local/NumpyArray.h         |  15 +-
 programs/local/NumpyNestedTypes.cpp |  19 ++
 programs/local/NumpyType.cpp        |  63 +++--
 programs/local/PythonImportCache.h  |   2 +
 programs/local/PytzCacheItem.h      |  19 ++
 8 files changed, 717 insertions(+), 58 deletions(-)
 create mode 100644 programs/local/FieldToPython.cpp
 create mode 100644 programs/local/FieldToPython.h
 create mode 100644 programs/local/PytzCacheItem.h

diff --git a/programs/local/FieldToPython.cpp b/programs/local/FieldToPython.cpp
new file mode 100644
index 00000000000..e5fb03380c2
--- /dev/null
+++ b/programs/local/FieldToPython.cpp
@@ -0,0 +1,386 @@
+#include "FieldToPython.h"
+#include "PythonImporter.h"
+
+#include <Core/TypeId.h>
+#include <Core/UUID.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime64.h>
+#include <base/IPv4andIPv6.h>
+#include <Common/Exception.h>
+#include <Common/LocalDate.h>
+#include <Common/LocalDateTime.h>
+#include <Common/DateLUTImpl.h>
+#include <Common/formatIPv6.h>
+#include <Core/DecimalFunctions.h>
+#include <base/types.h>
+
+namespace CHDB
+{
+
+using namespace DB;
+namespace py = pybind11;
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+static py::object convertLocalDateToPython(const LocalDate & local_date, auto & import_cache, const Field & field)
+{
+    auto year = local_date.year();
+    auto month = local_date.month();
+    auto day = local_date.day();
+
+    try
+    {
+        return import_cache.datetime.date()(year, month, day);
+    }
+    catch (py::error_already_set &)
+    {
+        return py::str(toString(field));
+    }
+}
+
+py::object convertFieldToPython(
+    const Field & field,
+    const DB::DataTypePtr & type)
+{
+    chassert(type);
+
+    auto filed_type = field.getType();
+    if (filed_type == Field::Types::Null)
+    {
+        return py::none();
+    }
+
+    DataTypePtr actual_type = type;
+    if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(type.get()))
+    {
+        actual_type = nullable_type->getNestedType();
+    }
+
+    auto & import_cache = PythonImporter::ImportCache();
+
+	switch (actual_type->getTypeId())
+	{
+	case TypeIndex::Nothing:
+		return py::none();
+
+	case TypeIndex::Int8:
+		return py::cast(field.safeGet<Int64>());
+
+	case TypeIndex::UInt8:
+        if (filed_type == Field::Types::Bool)
+            return py::cast(field.safeGet<bool>());
+
+		return py::cast(field.safeGet<UInt64>());
+
+	case TypeIndex::Int16:
+		return py::cast(field.safeGet<Int64>());
+
+	case TypeIndex::UInt16:
+		return py::cast(field.safeGet<UInt64>());
+
+	case TypeIndex::Int32:
+		return py::cast(field.safeGet<Int64>());
+
+	case TypeIndex::UInt32:
+		return py::cast(field.safeGet<UInt64>());
+
+	case TypeIndex::Int64:
+		return py::cast(field.safeGet<Int64>());
+
+	case TypeIndex::UInt64:
+		return py::cast(field.safeGet<UInt64>());
+
+	case TypeIndex::Float32:
+		return py::cast(field.safeGet<Float64>());
+
+	case TypeIndex::Float64:
+		return py::cast(field.safeGet<Float64>());
+
+	case TypeIndex::Int128:
+		return py::cast((double)field.safeGet<Int128>());
+
+	case TypeIndex::Int256:
+		return py::cast((double)field.safeGet<Int256>());
+
+	case TypeIndex::UInt128:
+		return py::cast((double)field.safeGet<UInt128>());
+
+	case TypeIndex::UInt256:
+		return py::cast((double)field.safeGet<UInt256>());
+
+	case TypeIndex::BFloat16:
+		return py::cast((double)field.safeGet<Float64>());
+
+	case TypeIndex::Date:
+        {
+            auto days = field.safeGet<UInt64>();
+            LocalDate local_date(static_cast<UInt16>(days));
+            return convertLocalDateToPython(local_date, import_cache, field);
+        }
+
+    case TypeIndex::Date32:
+        {
+            auto days = field.safeGet<Int64>();
+            LocalDate local_date(static_cast<Int32>(days));
+            return convertLocalDateToPython(local_date, import_cache, field);
+        }
+
+    case TypeIndex::DateTime:
+        {
+            auto seconds = field.safeGet<UInt64>();
+
+            const auto * datetime_type = typeid_cast<const DataTypeDateTime *>(type.get());
+            const auto & time_zone = datetime_type ? datetime_type->getTimeZone() : DateLUT::instance("UTC");
+
+            time_t timestamp = static_cast<time_t>(seconds);
+            LocalDateTime local_dt(timestamp, time_zone);
+
+            int year = local_dt.year();
+            int month = local_dt.month();
+            int day = local_dt.day();
+            int hour = local_dt.hour();
+            int minute = local_dt.minute();
+            int second = local_dt.second();
+            int microsecond = 0;
+
+            try
+            {
+                py::object timestamp_object = import_cache.datetime.datetime()(
+                    year, month, day, hour, minute, second, microsecond
+                );
+
+                const String & tz_name = time_zone.getTimeZone();
+                auto tz_obj = import_cache.pytz.timezone()(tz_name);
+                return tz_obj.attr("localize")(timestamp_object);
+            }
+            catch (py::error_already_set &)
+            {
+                return py::str(toString(field));
+            }
+        }
+
+    case TypeIndex::DateTime64:
+        {
+            auto datetime64_field = field.safeGet<DecimalField<DateTime64>>();
+            auto datetime64_value = datetime64_field.getValue();
+            Int64 datetime64_ticks = datetime64_value.value;
+
+            const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(type.get());
+            const auto & time_zone = datetime64_type ? datetime64_type->getTimeZone() : DateLUT::instance("UTC");
+
+            UInt32 scale = datetime64_field.getScale();
+            Int64 scale_multiplier = DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale);
+
+            auto seconds = static_cast<time_t>(datetime64_ticks / scale_multiplier);
+            auto fractional = datetime64_ticks % scale_multiplier;
+
+            LocalDateTime local_dt(seconds, time_zone);
+
+            int year = local_dt.year();
+            int month = local_dt.month();
+            int day = local_dt.day();
+            int hour = local_dt.hour();
+            int minute = local_dt.minute();
+            int second = local_dt.second();
+            int microsecond = static_cast<int>((fractional * 1000000) / scale_multiplier);
+
+            try
+            {
+                py::object timestamp_object = import_cache.datetime.datetime()(
+                    year, month, day, hour, minute, second, microsecond
+                );
+
+                const String & tz_name = time_zone.getTimeZone();
+                auto tz_obj = import_cache.pytz.timezone()(tz_name);
+                return tz_obj.attr("localize")(timestamp_object);
+            }
+            catch (py::error_already_set &)
+            {
+                return py::str(toString(field));
+            }
+        }
+
+    case TypeIndex::Time:
+        {
+            auto time_seconds = field.safeGet<Int64>();
+
+            if (time_seconds < 0)
+            {
+                return py::str(toString(field));
+            }
+
+            /// Handle time overflow (should be within 24 hours)
+            /// ClickHouse Time range is [-999:59:59, 999:59:59]
+            time_seconds = time_seconds % 86400;
+
+            int hour = static_cast<int>(time_seconds / 3600);
+            int minute = static_cast<int>((time_seconds % 3600) / 60);
+            int second = static_cast<int>(time_seconds % 60);
+            int microsecond = 0;
+
+            try
+            {
+                return import_cache.datetime.time()(hour, minute, second, microsecond);
+            }
+            catch (py::error_already_set &)
+            {
+                return py::str(toString(field));
+            }
+        }
+
+    case TypeIndex::Time64:
+        {
+            auto time64_field = field.safeGet<DecimalField<Decimal64>>();
+            auto time64_value = time64_field.getValue();
+            Int64 time64_ticks = time64_value.value;
+
+            if (time64_ticks < 0)
+            {
+                return py::str(toString(field));
+            }
+
+            UInt32 scale = time64_field.getScale();
+            Int64 scale_multiplier = DecimalUtils::scaleMultiplier<Decimal64::NativeType>(scale);
+
+            /// Convert to seconds and fractional part within a day
+            Int64 total_seconds = time64_ticks / scale_multiplier;
+            Int64 fractional = time64_ticks % scale_multiplier;
+
+            /// Handle time overflow (should be within 24 hours)
+            /// ClickHouse Time range is [-999:59:59, 999:59:59]
+            total_seconds = total_seconds % 86400;
+
+            int hour = static_cast<int>(total_seconds / 3600);
+            int minute = static_cast<int>((total_seconds % 3600) / 60);
+            int second = static_cast<int>(total_seconds % 60);
+            int microsecond = static_cast<int>((fractional * 1000000) / scale_multiplier);
+
+            try
+            {
+                return import_cache.datetime.time()(hour, minute, second, microsecond);
+            }
+            catch (py::error_already_set &)
+            {
+                return py::str(toString(field));
+            }
+        }
+
+    case TypeIndex::String:
+    case TypeIndex::FixedString:
+        return py::cast(field.safeGet<String>());
+
+    case TypeIndex::Enum8:
+    case TypeIndex::Enum16:
+        return py::cast(field.safeGet<Int64>());
+
+    case TypeIndex::Decimal32:
+        {
+            auto decimal_field = field.safeGet<DecimalField<Decimal32>>();
+            auto decimal_value = decimal_field.getValue();
+            UInt32 scale = decimal_field.getScale();
+            double result = DecimalUtils::convertTo<double>(decimal_value, scale);
+            return py::cast(result);
+        }
+
+    case TypeIndex::Decimal64:
+        {
+            auto decimal_field = field.safeGet<DecimalField<Decimal64>>();
+            auto decimal_value = decimal_field.getValue();
+            UInt32 scale = decimal_field.getScale();
+            double result = DecimalUtils::convertTo<double>(decimal_value, scale);
+            return py::cast(result);
+        }
+
+    case TypeIndex::Decimal128:
+        {
+            auto decimal_field = field.safeGet<DecimalField<Decimal128>>();
+            auto decimal_value = decimal_field.getValue();
+            UInt32 scale = decimal_field.getScale();
+            double result = DecimalUtils::convertTo<double>(decimal_value, scale);
+            return py::cast(result);
+        }
+
+    case TypeIndex::Decimal256:
+        {
+            auto decimal_field = field.safeGet<DecimalField<Decimal256>>();
+            auto decimal_value = decimal_field.getValue();
+            UInt32 scale = decimal_field.getScale();
+            double result = DecimalUtils::convertTo<double>(decimal_value, scale);
+            return py::cast(result);
+        }
+
+    case TypeIndex::UUID:
+		break;
+
+	// case TypeIndex::Array:
+	// 	may_have_null = CHColumnArrayToNumpyArray(append_data, actual_type);
+	// 	break;
+
+	// case TypeIndex::Tuple:
+	// 	may_have_null = CHColumnTupleToNumpyArray(append_data, actual_type);
+	// 	break;
+
+	// case TypeIndex::Interval:
+	// 	{
+	// 		const auto * interval_type = typeid_cast<const DataTypeInterval *>(actual_type.get());
+	// 		if (interval_type && interval_type->getKind() == IntervalKind::Kind::Quarter)
+	// 		{
+	// 			may_have_null = CHColumnIntervalToNumpyArray(append_data);
+	// 		}
+	// 		else
+	// 		{
+	// 			may_have_null = CHColumnToNumpyArray<Int64>(append_data);
+	// 		}
+	// 	}
+	// 	break;
+
+	// case TypeIndex::Map:
+	// 	may_have_null = CHColumnMapToNumpyArray(append_data, actual_type);
+	// 	break;
+
+    // case TypeIndex::Object:
+	// 	may_have_null = CHColumnObjectToNumpyArray(append_data, actual_type);
+	// 	break;
+
+	// case TypeIndex::IPv4:
+	// 	may_have_null = CHColumnIPv4ToNumpyArray(append_data);
+	// 	break;
+
+    // case TypeIndex::IPv6:
+	// 	may_have_null = CHColumnIPv6ToNumpyArray(append_data);
+	// 	break;
+
+	// case TypeIndex::Variant:
+	// 	may_have_null = CHColumnVariantToNumpyArray(append_data, actual_type);
+	// 	break;
+
+	// case TypeIndex::Dynamic:
+	// 	may_have_null = CHColumnDynamicToNumpyArray(append_data, actual_type);
+	// 	break;
+
+	/// Set types are used only in WHERE clauses for IN operations, not in actual data storage
+	case TypeIndex::Set:
+	/// JSONPaths is an internal type used only for JSON schema inference,
+	case TypeIndex::JSONPaths:
+	/// Deprecated type, should not appear in normal data processing
+	case TypeIndex::ObjectDeprecated:
+	/// Function types are not actual data types, should not appear here
+	case TypeIndex::Function:
+	/// Aggregate function types are not actual data types, should not appear here
+	case TypeIndex::AggregateFunction:
+	/// LowCardinality should be unwrapped before reaching this point
+	case TypeIndex::LowCardinality:
+	/// Nullable cannot contain another Nullable type, so this should not appear in nested conversion
+	case TypeIndex::Nullable:
+	/// QBit type is supported in newer versions of ClickHouse
+	/// case TypeIndex::QBit:
+	default:
+		throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type {}", type->getName());
+	}
+}
+
+} // namespace CHDB
diff --git a/programs/local/FieldToPython.h b/programs/local/FieldToPython.h
new file mode 100644
index 00000000000..a47e6d94773
--- /dev/null
+++ b/programs/local/FieldToPython.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <Core/Field.h>
+#include <DataTypes/IDataType.h>
+#include <pybind11/pybind11.h>
+
+namespace CHDB
+{
+
+pybind11::object convertFieldToPython(
+    const DB::Field & field,
+    const DB::DataTypePtr & type);
+
+} // namespace CHDB
diff --git a/programs/local/NumpyArray.cpp b/programs/local/NumpyArray.cpp
index 470478052ea..f054a95d27a 100644
--- a/programs/local/NumpyArray.cpp
+++ b/programs/local/NumpyArray.cpp
@@ -2,6 +2,7 @@
 #include "NumpyType.h"
 #include "NumpyNestedTypes.h"
 #include "PythonImporter.h"
+#include "FieldToPython.h"
 
 #include <Processors/Chunk.h>
 #include <base/defines.h>
@@ -13,10 +14,15 @@
 #include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypeInterval.h>
+#include <DataTypes/DataTypeTime64.h>
+#include <DataTypes/DataTypeTime.h>
 #include <base/types.h>
 #include <base/UUID.h>
 #include <base/IPv4andIPv6.h>
 #include <IO/WriteHelpers.h>
+#include <Common/exp10_i32.h>
 #include <Common/formatIPv6.h>
 #include <pybind11/pytypes.h>
 
@@ -53,6 +59,46 @@ struct RegularConvert
 	}
 };
 
+struct TimeConvert
+{
+	template <class CHTYPE, class NUMPYTYPE>
+	static NUMPYTYPE convertValue(CHTYPE val, NumpyAppendData & append_data)
+	{
+		chassert(append_data.type);
+
+		Field field(static_cast<Int64>(val));
+		auto time_object = convertFieldToPython(field, append_data.type);
+		return time_object.release().ptr();
+	}
+
+	template <class NUMPYTYPE>
+	static NUMPYTYPE nullValue(bool & set_mask)
+	{
+		set_mask = true;
+		return nullptr;
+	}
+};
+
+struct Time64Convert
+{
+	template <class CHTYPE, class NUMPYTYPE>
+	static NUMPYTYPE convertValue(CHTYPE val, NumpyAppendData & append_data)
+	{
+		chassert(append_data.type);
+
+		Field field(val);
+		auto time64_object = convertFieldToPython(field, append_data.type);
+		return time64_object.release().ptr();
+	}
+
+	template <class NUMPYTYPE>
+	static NUMPYTYPE nullValue(bool & set_mask)
+	{
+		set_mask = true;
+		return nullptr;
+	}
+};
+
 template <class CHTYPE, class NUMPYTYPE, class CONVERT>
 static bool TransformColumn(NumpyAppendData & append_data)
 {
@@ -119,8 +165,7 @@ static bool CHColumnDecimalToNumpyArray(NumpyAppendData & append_data, const Dat
 	if (!decimal_type)
 		throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected DataTypeDecimal");
 
-	auto scale_multiplier = decimal_type->getScaleMultiplier();
-	double scale_multiplier_double = static_cast<double>(scale_multiplier.value);
+	UInt32 scale = decimal_type->getScale();
 
 	auto * dest_ptr = reinterpret_cast<double *>(append_data.target_data);
 	auto * mask_ptr = append_data.target_mask;
@@ -137,9 +182,100 @@ static bool CHColumnDecimalToNumpyArray(NumpyAppendData & append_data, const Dat
 		}
 		else
 		{
-			/// Convert decimal integer value to actual decimal by dividing by scale multiplier
 			auto decimal_value = decimal_column->getElement(i);
-			dest_ptr[offset] = static_cast<double>(decimal_value.value) / scale_multiplier_double;
+			dest_ptr[offset] = DecimalUtils::convertTo<double>(decimal_value, scale);
+			mask_ptr[offset] = false;
+		}
+	}
+
+	return has_null;
+}
+
+static bool CHColumnDateTime64ToNumpyArray(NumpyAppendData & append_data)
+{
+	bool has_null = false;
+	const IColumn * data_column = &append_data.column;
+	const ColumnNullable * nullable_column = nullptr;
+
+	if (const auto * nullable = typeid_cast<const ColumnNullable *>(&append_data.column))
+	{
+		nullable_column = nullable;
+		data_column = &nullable->getNestedColumn();
+	}
+
+	const auto * decimal_column = typeid_cast<const ColumnDecimal<DateTime64> *>(data_column);
+	if (!decimal_column)
+		throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnDecimal<DateTime64>");
+
+	auto * dest_ptr = reinterpret_cast<Int64 *>(append_data.target_data);
+	auto * mask_ptr = append_data.target_mask;
+
+	for (size_t i = append_data.src_offset; i < append_data.src_offset + append_data.src_count; i++)
+	{
+		size_t offset = append_data.dest_offset + i;
+		if (nullable_column && nullable_column->isNullAt(i))
+		{
+			dest_ptr[offset] = 0;
+			mask_ptr[offset] = true;
+			has_null = true;
+		}
+		else
+		{
+			/// Get the DateTime64 value and convert to nanoseconds
+			Int64 raw_value = decimal_column->getInt(i);
+			auto scale = decimal_column->getScale();
+
+			Int64 ns_value;
+			chassert(scale <= 9);
+			Int64 multiplier = common::exp10_i32(9 - scale);
+			ns_value = raw_value * multiplier;
+
+			dest_ptr[offset] = ns_value;
+			mask_ptr[offset] = false;
+		}
+	}
+
+	return has_null;
+}
+
+static bool CHColumnIntervalToNumpyArray(NumpyAppendData & append_data)
+{
+	bool has_null = false;
+	const IColumn * data_column = &append_data.column;
+	const ColumnNullable * nullable_column = nullptr;
+
+	/// Check if column is nullable
+	if (const auto * nullable = typeid_cast<const ColumnNullable *>(&append_data.column))
+	{
+		nullable_column = nullable;
+		data_column = &nullable->getNestedColumn();
+	}
+
+	const auto * int64_column = typeid_cast<const ColumnVector<Int64> *>(data_column);
+	if (!int64_column)
+		throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnVector<Int64> for Interval");
+
+	auto * dest_ptr = reinterpret_cast<Int64 *>(append_data.target_data);
+	auto * mask_ptr = append_data.target_mask;
+
+	for (size_t i = append_data.src_offset; i < append_data.src_offset + append_data.src_count; i++)
+	{
+		size_t offset = append_data.dest_offset + i;
+		if (nullable_column && nullable_column->isNullAt(i))
+		{
+			dest_ptr[offset] = 0;
+			mask_ptr[offset] = true;
+			has_null = true;
+		}
+		else
+		{
+			Int64 interval_value = int64_column->getElement(i);
+
+			/// Convert quarter to month by multiplying by 3
+			/// This function is only called for Quarter intervals
+			interval_value *= 3;
+
+			dest_ptr[offset] = interval_value;
 			mask_ptr[offset] = false;
 		}
 	}
@@ -336,8 +472,11 @@ static bool CHColumnStringToNumpyArray(NumpyAppendData & append_data)
 	return has_null;
 }
 
-NumpyAppendData::NumpyAppendData(const DB::IColumn & column)
-	: column(column)
+NumpyAppendData::NumpyAppendData(
+	const DB::IColumn & column_,
+	const DB::DataTypePtr & type_)
+	: column(column_)
+	, type(type_)
 	, src_offset(0)
 	, src_count(0)
 	, dest_offset(0)
@@ -376,16 +515,24 @@ NumpyArray::NumpyArray(const DataTypePtr & type_)
 	mask_array = std::make_unique<InternalNumpyArray>(DataTypeFactory::instance().get("Bool"));
 }
 
-void NumpyArray::init(size_t capacity)
+void NumpyArray::init(size_t capacity, bool may_have_null)
 {
 	data_array->init(capacity);
-	mask_array->init(capacity);
+
+	if (may_have_null)
+	{
+		mask_array->init(capacity);
+	}
 }
 
-void NumpyArray::resize(size_t capacity)
+void NumpyArray::resize(size_t capacity, bool may_have_null)
 {
 	data_array->resize(capacity);
-	mask_array->resize(capacity);
+
+	if (may_have_null)
+	{
+		mask_array->resize(capacity);
+	}
 }
 
 static bool CHColumnNothingToNumpyArray(NumpyAppendData & append_data)
@@ -431,13 +578,6 @@ void NumpyArray::append(
 	mask_array->count += size;
 	bool may_have_null = false;
 
-	NumpyAppendData append_data(*column);
-	append_data.src_offset = offset;
-	append_data.src_offset + append_data.src_count = count;
-	append_data.target_data = data_ptr;
-	append_data.target_mask = mask_ptr;
-	append_data.dest_offset = data_array->count - size;
-
 	/// For nullable types, we need to get the nested type
 	DataTypePtr actual_type = data_array->type;
 	if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(data_array->type.get()))
@@ -445,14 +585,23 @@ void NumpyArray::append(
 		actual_type = nullable_type->getNestedType();
 	}
 
+	NumpyAppendData append_data(*column, actual_type);
+	append_data.src_offset = offset;
+	append_data.src_count = count;
+	append_data.target_data = data_ptr;
+	append_data.target_mask = mask_ptr;
+	append_data.dest_offset = data_array->count - size;
+
 	switch (actual_type->getTypeId())
 	{
 	case TypeIndex::Nothing:
 		may_have_null = CHColumnNothingToNumpyArray(append_data);
 		break;
+
 	case TypeIndex::Int8:
 		may_have_null = CHColumnToNumpyArray<Int8>(append_data);
 		break;
+
 	case TypeIndex::UInt8:
 		{
 			auto is_bool = isBool(data_array->type);
@@ -462,114 +611,161 @@ void NumpyArray::append(
 				may_have_null = CHColumnToNumpyArray<UInt8>(append_data);
 		}
 		break;
+
 	case TypeIndex::Int16:
 		may_have_null = CHColumnToNumpyArray<Int16>(append_data);
 		break;
+
 	case TypeIndex::UInt16:
 		may_have_null = CHColumnToNumpyArray<UInt16>(append_data);
 		break;
+
 	case TypeIndex::Int32:
 		may_have_null = CHColumnToNumpyArray<Int32>(append_data);
 		break;
+
 	case TypeIndex::UInt32:
 		may_have_null = CHColumnToNumpyArray<UInt32>(append_data);
 		break;
+
 	case TypeIndex::Int64:
 		may_have_null = CHColumnToNumpyArray<Int64>(append_data);
 		break;
+
 	case TypeIndex::UInt64:
 		may_have_null = CHColumnToNumpyArray<UInt64>(append_data);
 		break;
+
 	case TypeIndex::Float32:
 		may_have_null = CHColumnToNumpyArray<Float32>(append_data);
 		break;
+
 	case TypeIndex::Float64:
 		may_have_null = CHColumnToNumpyArray<Float64>(append_data);
 		break;
+
 	case TypeIndex::Int128:
 		may_have_null = TransformColumn<Int128, Float64, RegularConvert>(append_data);
 		break;
+
 	case TypeIndex::Int256:
 		may_have_null = TransformColumn<Int256, Float64, RegularConvert>(append_data);
 		break;
+
 	case TypeIndex::UInt128:
 		may_have_null = TransformColumn<UInt128, Float64, RegularConvert>(append_data);
 		break;
+
 	case TypeIndex::UInt256:
 		may_have_null = TransformColumn<UInt256, Float64, RegularConvert>(append_data);
 		break;
+
 	case TypeIndex::BFloat16:
 		may_have_null = TransformColumn<BFloat16, Float32, RegularConvert>(append_data);
 		break;
+
 	case TypeIndex::Date:
 		may_have_null = TransformColumn<UInt16, Int64, RegularConvert>(append_data);
 		break;
+
 	case TypeIndex::Date32:
 		may_have_null = TransformColumn<Int32, Int64, RegularConvert>(append_data);
 		break;
+
 	case TypeIndex::DateTime:
 		may_have_null = TransformColumn<UInt32, Int64, RegularConvert>(append_data);
 		break;
+
 	case TypeIndex::DateTime64:
-		may_have_null = CHColumnToNumpyArray<Int64>(append_data);
+		may_have_null = CHColumnDateTime64ToNumpyArray(append_data);
 		break;
+
 	case TypeIndex::Time:
-		may_have_null = TransformColumn<Int32, Int64, RegularConvert>(append_data);
+		may_have_null = TransformColumn<Int32, PyObject *, TimeConvert>(append_data);
 		break;
+
 	case TypeIndex::Time64:
-		may_have_null = CHColumnToNumpyArray<Int64>(append_data);
+		may_have_null = TransformColumn<Decimal64, PyObject *, Time64Convert>(append_data);
 		break;
+
 	case TypeIndex::String:
 		may_have_null = CHColumnStringToNumpyArray<ColumnString>(append_data);
 		break;
+
 	case TypeIndex::FixedString:
 		may_have_null = CHColumnStringToNumpyArray<ColumnFixedString>(append_data);
 		break;
+
 	case TypeIndex::Enum8:
 		may_have_null = CHColumnToNumpyArray<Int8>(append_data);
 		break;
+
 	case TypeIndex::Enum16:
 		may_have_null = CHColumnToNumpyArray<Int16>(append_data);
 		break;
+
 	case TypeIndex::Decimal32:
 		may_have_null = CHColumnDecimalToNumpyArray<Decimal32>(append_data, actual_type);
 		break;
+
 	case TypeIndex::Decimal64:
 		may_have_null = CHColumnDecimalToNumpyArray<Decimal64>(append_data, actual_type);
 		break;
+
 	case TypeIndex::Decimal128:
 		may_have_null = CHColumnDecimalToNumpyArray<Decimal128>(append_data, actual_type);
 		break;
+
 	case TypeIndex::Decimal256:
 		may_have_null = CHColumnDecimalToNumpyArray<Decimal256>(append_data, actual_type);
 		break;
+
 	case TypeIndex::UUID:
 		may_have_null = CHColumnUUIDToNumpyArray(append_data);
 		break;
+
 	case TypeIndex::Array:
 		may_have_null = CHColumnArrayToNumpyArray(append_data, actual_type);
 		break;
+
 	case TypeIndex::Tuple:
 		may_have_null = CHColumnTupleToNumpyArray(append_data, actual_type);
 		break;
+
 	case TypeIndex::Interval:
-		may_have_null = CHColumnToNumpyArray<Int64>(append_data);
+		{
+			const auto * interval_type = typeid_cast<const DataTypeInterval *>(actual_type.get());
+			if (interval_type && interval_type->getKind() == IntervalKind::Kind::Quarter)
+			{
+				may_have_null = CHColumnIntervalToNumpyArray(append_data);
+			}
+			else
+			{
+				may_have_null = CHColumnToNumpyArray<Int64>(append_data);
+			}
+		}
 		break;
+
 	case TypeIndex::Map:
 		may_have_null = CHColumnMapToNumpyArray(append_data, actual_type);
 		break;
+
     case TypeIndex::Object:
 		may_have_null = CHColumnObjectToNumpyArray(append_data, actual_type);
 		break;
+
 	case TypeIndex::IPv4:
 		may_have_null = CHColumnIPv4ToNumpyArray(append_data);
 		break;
+
     case TypeIndex::IPv6:
 		may_have_null = CHColumnIPv6ToNumpyArray(append_data);
 		break;
+
 	case TypeIndex::Variant:
 		may_have_null = CHColumnVariantToNumpyArray(append_data, actual_type);
 		break;
+
 	case TypeIndex::Dynamic:
 		may_have_null = CHColumnDynamicToNumpyArray(append_data, actual_type);
 		break;
@@ -600,9 +796,24 @@ void NumpyArray::append(
 	}
 }
 
+void NumpyArray::append(const DB::Field & field, const DB::DataTypePtr & type)
+{
+	chassert(data_array);
+	chassert(!mask_array);
+
+	auto * data_ptr = data_array->data;
+	chassert(data_ptr);
+
+	auto * dest_ptr = reinterpret_cast<py::object *>(data_ptr) + data_array->count;
+
+	*dest_ptr = convertFieldToPython(field, type);
+
+	data_array->count += 1;
+}
+
 py::object NumpyArray::toArray() const
 {
-	chassert(data_array && mask_array);
+	chassert(data_array);
 
 	data_array->resize(data_array->count);
 	if (!hava_null)
@@ -610,6 +821,8 @@ py::object NumpyArray::toArray() const
 		return std::move(data_array->array);
 	}
 
+	chassert(mask_array);
+
 	mask_array->resize(mask_array->count);
 	auto data_values = std::move(data_array->array);
 	auto null_values = std::move(mask_array->array);
diff --git a/programs/local/NumpyArray.h b/programs/local/NumpyArray.h
index 3c014dc79f8..03f2fd5f360 100644
--- a/programs/local/NumpyArray.h
+++ b/programs/local/NumpyArray.h
@@ -4,6 +4,8 @@
 
 #include <Columns/IColumn_fwd.h>
 #include <Processors/Formats/IRowOutputFormat.h>
+#include <Core/Field.h>
+#include <DataTypes/IDataType.h>
 #include <base/types.h>
 
 namespace CHDB
@@ -13,9 +15,12 @@ namespace CHDB
 class NumpyAppendData
 {
 public:
-	explicit NumpyAppendData(const DB::IColumn & column);
+	explicit NumpyAppendData(
+		const DB::IColumn & column_,
+		const DB::DataTypePtr & type_);
 
 	const DB::IColumn & column;
+	const DB::DataTypePtr & type;
 
 	size_t src_offset;
 	size_t src_count;
@@ -43,13 +48,15 @@ class NumpyArray {
 public:
 	explicit NumpyArray(const DB::DataTypePtr & type_);
 
-	void init(size_t capacity);
+	void init(size_t capacity, bool may_have_null = true);
 
-	void resize(size_t capacity);
+	void resize(size_t capacity, bool may_have_null = true);
+
+	void append(const DB::ColumnPtr & column);
 
 	void append(const DB::ColumnPtr & column, size_t offset, size_t count);
 
-	void append(const DB::ColumnPtr & column);
+	void append(const DB::Field & field, const DB::DataTypePtr & type);
 
 	py::object toArray() const;
 
diff --git a/programs/local/NumpyNestedTypes.cpp b/programs/local/NumpyNestedTypes.cpp
index eafc3477fbb..34468320bbe 100644
--- a/programs/local/NumpyNestedTypes.cpp
+++ b/programs/local/NumpyNestedTypes.cpp
@@ -64,6 +64,25 @@ struct ColumnTraits<ColumnTuple>
 
     static py::object convertElement(const ColumnTuple * column, const DataTypePtr & data_type, size_t index)
     {
+        const auto * tuple_data_type = typeid_cast<const DataType *>(data_type.get());
+        if (!tuple_data_type)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected DataTypeTuple");
+
+        const auto & element_types = tuple_data_type->getElements();
+        size_t tuple_size = column->tupleSize();
+
+        Field tuple_field = column->operator[](index);
+        const Tuple & tuple_value = tuple_field.safeGet<Tuple>();
+
+        NumpyArray numpy_array({});
+        numpy_array.init(tuple_size);
+
+        for (size_t i = 0; i < tuple_size; ++i)
+        {
+            numpy_array.append(tuple_value[i], element_types[i]);
+        }
+
+        return numpy_array.toArray();
     }
 };
 
diff --git a/programs/local/NumpyType.cpp b/programs/local/NumpyType.cpp
index 9f2bb23216a..83408d13278 100644
--- a/programs/local/NumpyType.cpp
+++ b/programs/local/NumpyType.cpp
@@ -243,81 +243,74 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
     {
     case TypeIndex::Nothing:
         return "object";
+
     case TypeIndex::Int8:
         return "int8";
+
     case TypeIndex::UInt8:
         /// Special case: UInt8 could be Bool type, need to check getName()
         {
             auto is_bool = isBool(data_type);
             return is_bool ? "bool" : "uint8";
         }
+
     case TypeIndex::Int16:
         return "int16";
+
     case TypeIndex::UInt16:
         return "uint16";
+
     case TypeIndex::Int32:
         return "int32";
+
     case TypeIndex::UInt32:
         return "uint32";
+
     case TypeIndex::Int64:
         return "int64";
+
     case TypeIndex::UInt64:
         return "uint64";
+
     case TypeIndex::BFloat16:
     case TypeIndex::Float32:
         return "float32";
+
     case TypeIndex::Int256:
     case TypeIndex::UInt256:
     case TypeIndex::Int128:
     case TypeIndex::UInt128:
     case TypeIndex::Float64:
         return "float64";
+
     case TypeIndex::String:
     case TypeIndex::FixedString:
         return "object";
+
     case TypeIndex::DateTime:
         return "datetime64[s]";
+
     case TypeIndex::DateTime64:
         {
             if (const auto * dt64 = typeid_cast<const DataTypeDateTime64 *>(data_type.get()))
             {
                 UInt32 scale = dt64->getScale();
-                if (scale == 0)
-                    return "datetime64[s]";
-                else if (scale == 3)
-                    return "datetime64[ms]";
-                else if (scale == 6)
-                    return "datetime64[us]";
-                else if (scale == 9)
-                    return "datetime64[ns]";
-                else
+                if (scale <= 9)
                     return "datetime64[ns]";
+
+                throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type {}, scale {}", data_type->getName(), scale);
             }
-            return "datetime64[ns]";
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected datetime64 type {}", data_type->getName());
         }
+
     case TypeIndex::Date:
     case TypeIndex::Date32:
         return "datetime64[D]";
+
     case TypeIndex::Time:
-        return "timedelta64[s]";
     case TypeIndex::Time64:
-        {
-            if (const auto * time64 = typeid_cast<const DataTypeTime64 *>(data_type.get()))
-            {
-                UInt32 scale = time64->getScale();
-                if (scale == 0)
-                    return "timedelta64[s]";
-                else if (scale == 3)
-                    return "timedelta64[ms]";
-                else if (scale == 6)
-                    return "timedelta64[us]";
-                else if (scale == 9)
-                    return "timedelta64[ns]";
-                else
-                    return "timedelta64[ns]";
-            }
-            return "timedelta64[ns]";
-        }
+        return "object";
+
     case TypeIndex::Interval:
         {
             if (const auto * interval = typeid_cast<const DataTypeInterval *>(data_type.get()))
@@ -344,26 +337,28 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
                     case IntervalKind::Kind::Month:
                         return "timedelta64[M]";
                     case IntervalKind::Kind::Quarter:
-                        /// numpy doesn't have quarter type, use int64
-                        return "int64";
+                        /// numpy doesn't have quarter type
+                        return "timedelta64[M]";
                     case IntervalKind::Kind::Year:
                         return "timedelta64[Y]";
                     default:
-                        return "timedelta64[s]";
+                        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected interval kind {}", kind.kind);
                 }
             }
-            return "timedelta64[s]";
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected interval type {}", data_type->getName());
         }
 
     case TypeIndex::UUID:
     case TypeIndex::IPv4:
     case TypeIndex::IPv6:
         return "object";
+
     case TypeIndex::Decimal32:
     case TypeIndex::Decimal64:
     case TypeIndex::Decimal128:
     case TypeIndex::Decimal256:
         return "float64";
+
     case TypeIndex::Array:
     case TypeIndex::Tuple:
     case TypeIndex::Map:
@@ -372,10 +367,13 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
     case TypeIndex::Variant:
     case TypeIndex::Object:
         return "object";
+
     case TypeIndex::Enum8:
         return "int8";
+
     case TypeIndex::Enum16:
         return "int16";
+
     case TypeIndex::Nullable:
         {
             if (const auto * nullable = typeid_cast<const DataTypeNullable *>(data_type.get()))
@@ -384,6 +382,7 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
             }
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected nullable type {}", data_type->getName());
         }
+
     default:
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type {}", data_type->getName());
     }
diff --git a/programs/local/PythonImportCache.h b/programs/local/PythonImportCache.h
index 382bb34358d..598069a60e2 100644
--- a/programs/local/PythonImportCache.h
+++ b/programs/local/PythonImportCache.h
@@ -8,6 +8,7 @@
 #include "PythonImportCacheItem.h"
 #include "UUIDCacheItem.h"
 #include "IPAddressCacheItem.h"
+#include "PytzCacheItem.h"
 
 #include <vector>
 
@@ -30,6 +31,7 @@ struct PythonImportCache
 	NumpyCacheItem numpy;
 	UUIDCacheItem uuid;
 	IPAddressCacheItem ipaddress;
+	PytzCacheItem pytz;
 
 	py::handle AddCache(py::object item);
 
diff --git a/programs/local/PytzCacheItem.h b/programs/local/PytzCacheItem.h
new file mode 100644
index 00000000000..3c6fccbe858
--- /dev/null
+++ b/programs/local/PytzCacheItem.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "PythonImportCacheItem.h"
+
+namespace CHDB {
+
+struct PytzCacheItem : public PythonImportCacheItem
+{
+public:
+	static constexpr const char *Name = "pytz";
+
+	PytzCacheItem() : PythonImportCacheItem("pytz"), timezone("timezone", this) {}
+
+	~PytzCacheItem() override = default;
+
+	PythonImportCacheItem timezone;
+};
+
+} // namespace CHDB

From 670d87c53c2a22b4397a65e513ce8ba41b65bba7 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Tue, 4 Nov 2025 14:47:27 +0800
Subject: [PATCH 14/22] chore: support more types

---
 programs/local/FieldToPython.cpp | 200 ++++++++++++++++++++++++++-----
 1 file changed, 169 insertions(+), 31 deletions(-)

diff --git a/programs/local/FieldToPython.cpp b/programs/local/FieldToPython.cpp
index e5fb03380c2..140b3beeced 100644
--- a/programs/local/FieldToPython.cpp
+++ b/programs/local/FieldToPython.cpp
@@ -6,6 +6,10 @@
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypeInterval.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeMap.h>
 #include <base/IPv4andIPv6.h>
 #include <Common/Exception.h>
 #include <Common/LocalDate.h>
@@ -13,6 +17,7 @@
 #include <Common/DateLUTImpl.h>
 #include <Common/formatIPv6.h>
 #include <Core/DecimalFunctions.h>
+#include <IO/WriteHelpers.h>
 #include <base/types.h>
 
 namespace CHDB
@@ -24,6 +29,7 @@ namespace py = pybind11;
 namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
+    extern const int LOGICAL_ERROR;
 }
 
 static py::object convertLocalDateToPython(const LocalDate & local_date, auto & import_cache, const Field & field)
@@ -133,7 +139,7 @@ py::object convertFieldToPython(
         {
             auto seconds = field.safeGet<UInt64>();
 
-            const auto * datetime_type = typeid_cast<const DataTypeDateTime *>(type.get());
+            const auto * datetime_type = typeid_cast<const DataTypeDateTime *>(actual_type.get());
             const auto & time_zone = datetime_type ? datetime_type->getTimeZone() : DateLUT::instance("UTC");
 
             time_t timestamp = static_cast<time_t>(seconds);
@@ -169,7 +175,7 @@ py::object convertFieldToPython(
             auto datetime64_value = datetime64_field.getValue();
             Int64 datetime64_ticks = datetime64_value.value;
 
-            const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(type.get());
+            const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(actual_type.get());
             const auto & time_zone = datetime64_type ? datetime64_type->getTimeZone() : DateLUT::instance("UTC");
 
             UInt32 scale = datetime64_field.getScale();
@@ -314,45 +320,177 @@ py::object convertFieldToPython(
         }
 
     case TypeIndex::UUID:
-		break;
+        {
+            auto uuid_value = field.safeGet<UUID>();
+            const auto formatted_uuid = formatUUID(uuid_value);
+            return import_cache.uuid.UUID()(String(formatted_uuid.data(), formatted_uuid.size()));
+        }
 
-	// case TypeIndex::Array:
-	// 	may_have_null = CHColumnArrayToNumpyArray(append_data, actual_type);
-	// 	break;
+	case TypeIndex::Array:
+		{
+			auto array_field = field.safeGet<Array>();
 
-	// case TypeIndex::Tuple:
-	// 	may_have_null = CHColumnTupleToNumpyArray(append_data, actual_type);
-	// 	break;
+			const auto * array_type = typeid_cast<const DataTypeArray *>(actual_type.get());
+			chassert(array_type);
 
-	// case TypeIndex::Interval:
-	// 	{
-	// 		const auto * interval_type = typeid_cast<const DataTypeInterval *>(actual_type.get());
-	// 		if (interval_type && interval_type->getKind() == IntervalKind::Kind::Quarter)
-	// 		{
-	// 			may_have_null = CHColumnIntervalToNumpyArray(append_data);
-	// 		}
-	// 		else
-	// 		{
-	// 			may_have_null = CHColumnToNumpyArray<Int64>(append_data);
-	// 		}
-	// 	}
-	// 	break;
+			const auto & element_type = array_type->getNestedType();
 
-	// case TypeIndex::Map:
-	// 	may_have_null = CHColumnMapToNumpyArray(append_data, actual_type);
-	// 	break;
+			py::list python_list;
+			for (const auto & element : array_field)
+			{
+				auto python_element = convertFieldToPython(element, element_type);
+				python_list.append(python_element);
+			}
+
+			return python_list;
+		}
+
+	case TypeIndex::Tuple:
+		{
+			const auto & tuple_field = field.safeGet<Tuple>();
+
+			const auto * tuple_type = typeid_cast<const DataTypeTuple *>(actual_type.get());
+			chassert(tuple_type);
+
+			const auto & element_types = tuple_type->getElements();
+
+			py::tuple python_tuple(tuple_field.size());
+			for (size_t i = 0; i < tuple_field.size(); ++i)
+			{
+				auto python_element = convertFieldToPython(tuple_field[i], element_types[i]);
+				python_tuple[i] = python_element;
+			}
+
+			return python_tuple;
+		}
+
+	case TypeIndex::Interval:
+        {
+            auto interval_value = field.safeGet<Int64>();
+            const auto * interval_type = typeid_cast<const DataTypeInterval *>(actual_type.get());
+            chassert(interval_type);
+            IntervalKind::Kind interval_kind = interval_type->getKind();
+
+            switch (interval_kind)
+            {
+                case IntervalKind::Kind::Nanosecond:
+                    return import_cache.datetime.timedelta()(py::arg("microseconds") = interval_value / 1000);
+                case IntervalKind::Kind::Microsecond:
+                    return import_cache.datetime.timedelta()(py::arg("microseconds") = interval_value);
+                case IntervalKind::Kind::Millisecond:
+                    return import_cache.datetime.timedelta()(py::arg("milliseconds") = interval_value);
+                case IntervalKind::Kind::Second:
+                    return import_cache.datetime.timedelta()(py::arg("seconds") = interval_value);
+                case IntervalKind::Kind::Minute:
+                    return import_cache.datetime.timedelta()(py::arg("minutes") = interval_value);
+                case IntervalKind::Kind::Hour:
+                    return import_cache.datetime.timedelta()(py::arg("hours") = interval_value);
+                case IntervalKind::Kind::Day:
+                    return import_cache.datetime.timedelta()(py::arg("days") = interval_value);
+                case IntervalKind::Kind::Week:
+                    return import_cache.datetime.timedelta()(py::arg("weeks") = interval_value);
+                case IntervalKind::Kind::Month:
+                    /// Approximate: 1 month = 30 days
+                    return import_cache.datetime.timedelta()(py::arg("days") = interval_value * 30);
+                case IntervalKind::Kind::Quarter:
+                    /// 1 quarter = 3 months = 90 days
+                    return import_cache.datetime.timedelta()(py::arg("days") = interval_value * 90);
+                case IntervalKind::Kind::Year:
+                    /// 1 year = 365 days
+                    return import_cache.datetime.timedelta()(py::arg("days") = interval_value * 365);
+                default:
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unsupported interval kind");
+            }
+        }
+
+	case TypeIndex::Map:
+        {
+            const auto & map_field = field.safeGet<Map>();
+
+            const auto * map_type = typeid_cast<const DataTypeMap *>(actual_type.get());
+            chassert(map_type);
+
+            const auto & key_type = map_type->getKeyType();
+            const auto & value_type = map_type->getValueType();
+
+            py::list keys_list;
+            py::list values_list;
+            py::dict python_dict;
+            bool use_dict = true;
+
+            for (const auto & entry : map_field)
+            {
+                const auto & entry_tuple = entry.safeGet<Tuple>();
+                chassert(entry_tuple.size() == 2);
+
+                auto python_key = convertFieldToPython(entry_tuple[0], key_type);
+                auto python_value = convertFieldToPython(entry_tuple[1], value_type);
+
+                if (use_dict)
+                {
+                    try
+                    {
+                        python_dict[python_key] = python_value;
+                        keys_list.append(std::move(python_key));
+                        values_list.append(std::move(python_value));
+                    }
+                    catch (const std::exception &)
+                    {
+                        // Key is not hashable, switch to list format
+                        use_dict = false;
+                        keys_list.clear();
+                        values_list.clear();
+                        keys_list.append(std::move(python_key));
+                        values_list.append(std::move(python_value));
+                    }
+                }
+                else
+                {
+                    keys_list.append(std::move(python_key));
+                    values_list.append(std::move(python_value));
+                }
+            }
+
+            if (use_dict)
+            {
+                return python_dict;
+            }
+            else
+            {
+                py::dict result;
+                result["keys"] = keys_list;
+                result["values"] = values_list;
+                return result;
+            }
+        }
 
     // case TypeIndex::Object:
 	// 	may_have_null = CHColumnObjectToNumpyArray(append_data, actual_type);
 	// 	break;
 
-	// case TypeIndex::IPv4:
-	// 	may_have_null = CHColumnIPv4ToNumpyArray(append_data);
-	// 	break;
+	case TypeIndex::IPv4:
+		{
+			auto ipv4_value = field.safeGet<IPv4>();
 
-    // case TypeIndex::IPv6:
-	// 	may_have_null = CHColumnIPv6ToNumpyArray(append_data);
-	// 	break;
+			char ipv4_str[IPV4_MAX_TEXT_LENGTH];
+			char * ptr = ipv4_str;
+			formatIPv4(reinterpret_cast<const unsigned char*>(&ipv4_value), ptr);
+			const size_t ipv4_str_len = ptr - ipv4_str;
+
+			return import_cache.ipaddress.ipv4_address()(String(ipv4_str, ipv4_str_len));
+		}
+
+    case TypeIndex::IPv6:
+		{
+			auto ipv6_value = field.safeGet<IPv6>();
+
+			char ipv6_str[IPV6_MAX_TEXT_LENGTH];
+			char * ptr = ipv6_str;
+			formatIPv6(reinterpret_cast<const unsigned char*>(&ipv6_value), ptr);
+			const size_t ipv6_str_len = ptr - ipv6_str;
+
+			return import_cache.ipaddress.ipv6_address()(String(ipv6_str, ipv6_str_len));
+		}
 
 	// case TypeIndex::Variant:
 	// 	may_have_null = CHColumnVariantToNumpyArray(append_data, actual_type);

From b5b4de63f8f2f16a18609dbeae4df5fc343fafa8 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Tue, 4 Nov 2025 15:10:09 +0800
Subject: [PATCH 15/22] chore: support map type

---
 programs/local/FieldToPython.cpp | 59 ++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/programs/local/FieldToPython.cpp b/programs/local/FieldToPython.cpp
index 140b3beeced..010f8cfcbe2 100644
--- a/programs/local/FieldToPython.cpp
+++ b/programs/local/FieldToPython.cpp
@@ -32,6 +32,65 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+static bool canTypeBeUsedAsDictKey(TypeIndex key_type)
+{
+    switch (key_type)
+    {
+    case TypeIndex::Nothing:
+    case TypeIndex::Int8:
+    case TypeIndex::UInt8:
+    case TypeIndex::Int16:
+    case TypeIndex::UInt16:
+    case TypeIndex::Int32:
+    case TypeIndex::UInt32:
+    case TypeIndex::Int64:
+    case TypeIndex::UInt64:
+    case TypeIndex::Float32:
+    case TypeIndex::Float64:
+    case TypeIndex::Int128:
+    case TypeIndex::Int256:
+    case TypeIndex::UInt128:
+    case TypeIndex::UInt256:
+    case TypeIndex::BFloat16:
+    case TypeIndex::Date:
+    case TypeIndex::Date32:
+    case TypeIndex::DateTime:
+    case TypeIndex::DateTime64:
+    case TypeIndex::Time:
+    case TypeIndex::Time64:
+    case TypeIndex::String:
+    case TypeIndex::FixedString:
+    case TypeIndex::Enum8:
+    case TypeIndex::Enum16:
+    case TypeIndex::Decimal32:
+    case TypeIndex::Decimal64:
+    case TypeIndex::Decimal128:
+    case TypeIndex::Decimal256:
+    case TypeIndex::UUID:
+    case TypeIndex::Interval:
+    case TypeIndex::IPv4:
+    case TypeIndex::IPv6:
+        return true;
+
+    // Unsupported nested types
+    case TypeIndex::Array:
+    case TypeIndex::Tuple:
+    case TypeIndex::Map:
+        return false;
+
+    // Other unsupported types
+    case TypeIndex::Set:
+    case TypeIndex::JSONPaths:
+    case TypeIndex::ObjectDeprecated:
+    case TypeIndex::Function:
+    case TypeIndex::AggregateFunction:
+    case TypeIndex::LowCardinality:
+    case TypeIndex::Nullable:
+    default:
+        return false;
+    }
+}
+
 static py::object convertLocalDateToPython(const LocalDate & local_date, auto & import_cache, const Field & field)
 {
     auto year = local_date.year();

From f9f19706f0233f977217e30c2ecb11206dde4ffe Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Tue, 4 Nov 2025 21:27:43 +0800
Subject: [PATCH 16/22] chore: support more nested type

---
 programs/local/FieldToPython.cpp    | 363 ++++++++++++++++++----------
 programs/local/FieldToPython.h      |   6 +-
 programs/local/NumpyNestedTypes.cpp |  20 ++
 3 files changed, 258 insertions(+), 131 deletions(-)

diff --git a/programs/local/FieldToPython.cpp b/programs/local/FieldToPython.cpp
index 010f8cfcbe2..cfce3708b71 100644
--- a/programs/local/FieldToPython.cpp
+++ b/programs/local/FieldToPython.cpp
@@ -1,7 +1,10 @@
 #include "FieldToPython.h"
 #include "PythonImporter.h"
 
-#include <Core/TypeId.h>
+#include <Core/DecimalComparison.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnMap.h>
 #include <Core/UUID.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeDateTime.h>
@@ -10,6 +13,11 @@
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeVariant.h>
+#include <DataTypes/DataTypeDynamic.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypesNumber.h>
 #include <base/IPv4andIPv6.h>
 #include <Common/Exception.h>
 #include <Common/LocalDate.h>
@@ -32,63 +40,83 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-static bool canTypeBeUsedAsDictKey(TypeIndex key_type)
+static bool canTypeBeUsedAsDictKey(const DataTypePtr & type)
 {
-    switch (key_type)
+    DataTypePtr actual_type = type;
+    if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(type.get()))
     {
-    case TypeIndex::Nothing:
-    case TypeIndex::Int8:
-    case TypeIndex::UInt8:
-    case TypeIndex::Int16:
-    case TypeIndex::UInt16:
-    case TypeIndex::Int32:
-    case TypeIndex::UInt32:
-    case TypeIndex::Int64:
+        actual_type = nullable_type->getNestedType();
+    }
+
+    switch (actual_type->getTypeId())
+	{
+	case TypeIndex::Nothing:
+	case TypeIndex::Int8:
+	case TypeIndex::UInt8:
+	case TypeIndex::Int16:
+	case TypeIndex::UInt16:
+	case TypeIndex::Int32:
+	case TypeIndex::UInt32:
+	case TypeIndex::Int64:
     case TypeIndex::UInt64:
-    case TypeIndex::Float32:
-    case TypeIndex::Float64:
-    case TypeIndex::Int128:
-    case TypeIndex::Int256:
-    case TypeIndex::UInt128:
-    case TypeIndex::UInt256:
-    case TypeIndex::BFloat16:
-    case TypeIndex::Date:
-    case TypeIndex::Date32:
-    case TypeIndex::DateTime:
-    case TypeIndex::DateTime64:
-    case TypeIndex::Time:
-    case TypeIndex::Time64:
-    case TypeIndex::String:
-    case TypeIndex::FixedString:
-    case TypeIndex::Enum8:
-    case TypeIndex::Enum16:
-    case TypeIndex::Decimal32:
-    case TypeIndex::Decimal64:
-    case TypeIndex::Decimal128:
-    case TypeIndex::Decimal256:
-    case TypeIndex::UUID:
+	case TypeIndex::Float32:
+	case TypeIndex::Float64:
+	case TypeIndex::Int128:
+	case TypeIndex::Int256:
+	case TypeIndex::UInt128:
+	case TypeIndex::UInt256:
+	case TypeIndex::BFloat16:
+	case TypeIndex::Date:
+	case TypeIndex::Date32:
+	case TypeIndex::DateTime:
+	case TypeIndex::DateTime64:
+	case TypeIndex::Time:
+	case TypeIndex::Time64:
+	case TypeIndex::String:
+	case TypeIndex::FixedString:
+	case TypeIndex::Enum8:
+	case TypeIndex::Enum16:
+	case TypeIndex::Decimal32:
+	case TypeIndex::Decimal64:
+	case TypeIndex::Decimal128:
+	case TypeIndex::Decimal256:
+	case TypeIndex::UUID:
     case TypeIndex::Interval:
     case TypeIndex::IPv4:
-    case TypeIndex::IPv6:
+	case TypeIndex::IPv6:
         return true;
 
-    // Unsupported nested types
-    case TypeIndex::Array:
-    case TypeIndex::Tuple:
-    case TypeIndex::Map:
+	case TypeIndex::Array:
+	case TypeIndex::Tuple:
+	case TypeIndex::Map:
+	case TypeIndex::Object:
+	case TypeIndex::Dynamic:
         return false;
 
-    // Other unsupported types
-    case TypeIndex::Set:
-    case TypeIndex::JSONPaths:
-    case TypeIndex::ObjectDeprecated:
-    case TypeIndex::Function:
-    case TypeIndex::AggregateFunction:
-    case TypeIndex::LowCardinality:
-    case TypeIndex::Nullable:
-    default:
-        return false;
-    }
+	case TypeIndex::Variant:
+		{
+			const auto * variant_type = typeid_cast<const DataTypeVariant *>(type.get());
+            chassert(variant_type);
+
+			const auto & variants = variant_type->getVariants();
+			for (const auto & variant : variants)
+			{
+				if (!canTypeBeUsedAsDictKey(variant))
+					return false;
+			}
+			return true;
+		}
+
+	case TypeIndex::Set:
+	case TypeIndex::JSONPaths:
+	case TypeIndex::ObjectDeprecated:
+	case TypeIndex::Function:
+	case TypeIndex::AggregateFunction:
+	case TypeIndex::LowCardinality:
+	case TypeIndex::Nullable:
+	default:
+		return false;
+	}
 }
 
 static py::object convertLocalDateToPython(const LocalDate & local_date, auto & import_cache, const Field & field)
@@ -108,13 +136,11 @@ static py::object convertLocalDateToPython(const LocalDate & local_date, auto &
 }
 
 py::object convertFieldToPython(
-    const Field & field,
-    const DB::DataTypePtr & type)
+    const ColumnPtr & column,
+    const DataTypePtr & type,
+    size_t index)
 {
-    chassert(type);
-
-    auto filed_type = field.getType();
-    if (filed_type == Field::Types::Null)
+    if (column->isNullAt(index))
     {
         return py::none();
     }
@@ -133,55 +159,102 @@ py::object convertFieldToPython(
 		return py::none();
 
 	case TypeIndex::Int8:
-		return py::cast(field.safeGet<Int64>());
+        {
+            auto field = column->operator[](index);
+            return py::cast(field.safeGet<Int64>());
+        }
 
 	case TypeIndex::UInt8:
-        if (filed_type == Field::Types::Bool)
-            return py::cast(field.safeGet<bool>());
+        {
+            auto field = column->operator[](index);
+            auto filed_type = field.getType();
+            if (filed_type == Field::Types::Bool)
+                return py::cast(field.safeGet<bool>());
 
-		return py::cast(field.safeGet<UInt64>());
+            return py::cast(field.safeGet<UInt64>());
+        }
 
 	case TypeIndex::Int16:
-		return py::cast(field.safeGet<Int64>());
+        {
+            auto field = column->operator[](index);
+            return py::cast(field.safeGet<Int64>());
+        }
 
 	case TypeIndex::UInt16:
-		return py::cast(field.safeGet<UInt64>());
+        {
+            auto field = column->operator[](index);
+            return py::cast(field.safeGet<UInt64>());
+        }
 
 	case TypeIndex::Int32:
-		return py::cast(field.safeGet<Int64>());
+        {
+            auto field = column->operator[](index);
+            return py::cast(field.safeGet<Int64>());
+        }
 
 	case TypeIndex::UInt32:
-		return py::cast(field.safeGet<UInt64>());
+        {
+            auto field = column->operator[](index);
+            return py::cast(field.safeGet<UInt64>());
+        }
 
 	case TypeIndex::Int64:
-		return py::cast(field.safeGet<Int64>());
+        {
+            auto field = column->operator[](index);
+            return py::cast(field.safeGet<Int64>());
+        }
 
 	case TypeIndex::UInt64:
-		return py::cast(field.safeGet<UInt64>());
+        {
+            auto field = column->operator[](index);
+            return py::cast(field.safeGet<UInt64>());
+        }
 
 	case TypeIndex::Float32:
-		return py::cast(field.safeGet<Float64>());
+        {
+            auto field = column->operator[](index);
+            return py::cast(field.safeGet<Float64>());
+        }
 
 	case TypeIndex::Float64:
-		return py::cast(field.safeGet<Float64>());
+        {
+            auto field = column->operator[](index);
+            return py::cast(field.safeGet<Float64>());
+        }
 
 	case TypeIndex::Int128:
-		return py::cast((double)field.safeGet<Int128>());
+        {
+            auto field = column->operator[](index);
+            return py::cast((double)field.safeGet<Int128>());
+        }
 
 	case TypeIndex::Int256:
-		return py::cast((double)field.safeGet<Int256>());
+        {
+            auto field = column->operator[](index);
+            return py::cast((double)field.safeGet<Int256>());
+        }
 
 	case TypeIndex::UInt128:
-		return py::cast((double)field.safeGet<UInt128>());
+        {
+            auto field = column->operator[](index);
+            return py::cast((double)field.safeGet<UInt128>());
+        }
 
 	case TypeIndex::UInt256:
-		return py::cast((double)field.safeGet<UInt256>());
+        {
+            auto field = column->operator[](index);
+            return py::cast((double)field.safeGet<UInt256>());
+        }
 
 	case TypeIndex::BFloat16:
-		return py::cast((double)field.safeGet<Float64>());
+        {
+            auto field = column->operator[](index);
+            return py::cast((double)field.safeGet<Float64>());
+        }
 
 	case TypeIndex::Date:
         {
+            auto field = column->operator[](index);
             auto days = field.safeGet<UInt64>();
             LocalDate local_date(static_cast<UInt16>(days));
             return convertLocalDateToPython(local_date, import_cache, field);
@@ -189,6 +262,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::Date32:
         {
+            auto field = column->operator[](index);
             auto days = field.safeGet<Int64>();
             LocalDate local_date(static_cast<Int32>(days));
             return convertLocalDateToPython(local_date, import_cache, field);
@@ -196,6 +270,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::DateTime:
         {
+            auto field = column->operator[](index);
             auto seconds = field.safeGet<UInt64>();
 
             const auto * datetime_type = typeid_cast<const DataTypeDateTime *>(actual_type.get());
@@ -230,6 +305,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::DateTime64:
         {
+            auto field = column->operator[](index);
             auto datetime64_field = field.safeGet<DecimalField<DateTime64>>();
             auto datetime64_value = datetime64_field.getValue();
             Int64 datetime64_ticks = datetime64_value.value;
@@ -271,6 +347,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::Time:
         {
+            auto field = column->operator[](index);
             auto time_seconds = field.safeGet<Int64>();
 
             if (time_seconds < 0)
@@ -299,6 +376,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::Time64:
         {
+            auto field = column->operator[](index);
             auto time64_field = field.safeGet<DecimalField<Decimal64>>();
             auto time64_value = time64_field.getValue();
             Int64 time64_ticks = time64_value.value;
@@ -336,14 +414,21 @@ py::object convertFieldToPython(
 
     case TypeIndex::String:
     case TypeIndex::FixedString:
-        return py::cast(field.safeGet<String>());
+        {
+            auto field = column->operator[](index);
+            return py::cast(field.safeGet<String>());
+        }
 
     case TypeIndex::Enum8:
     case TypeIndex::Enum16:
-        return py::cast(field.safeGet<Int64>());
+        {
+            auto field = column->operator[](index);
+            return py::cast(field.safeGet<Int64>());
+        }
 
     case TypeIndex::Decimal32:
         {
+            auto field = column->operator[](index);
             auto decimal_field = field.safeGet<DecimalField<Decimal32>>();
             auto decimal_value = decimal_field.getValue();
             UInt32 scale = decimal_field.getScale();
@@ -353,6 +438,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::Decimal64:
         {
+            auto field = column->operator[](index);
             auto decimal_field = field.safeGet<DecimalField<Decimal64>>();
             auto decimal_value = decimal_field.getValue();
             UInt32 scale = decimal_field.getScale();
@@ -362,6 +448,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::Decimal128:
         {
+            auto field = column->operator[](index);
             auto decimal_field = field.safeGet<DecimalField<Decimal128>>();
             auto decimal_value = decimal_field.getValue();
             UInt32 scale = decimal_field.getScale();
@@ -371,6 +458,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::Decimal256:
         {
+            auto field = column->operator[](index);
             auto decimal_field = field.safeGet<DecimalField<Decimal256>>();
             auto decimal_value = decimal_field.getValue();
             UInt32 scale = decimal_field.getScale();
@@ -380,6 +468,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::UUID:
         {
+            auto field = column->operator[](index);
             auto uuid_value = field.safeGet<UUID>();
             const auto formatted_uuid = formatUUID(uuid_value);
             return import_cache.uuid.UUID()(String(formatted_uuid.data(), formatted_uuid.size()));
@@ -387,17 +476,24 @@ py::object convertFieldToPython(
 
 	case TypeIndex::Array:
 		{
-			auto array_field = field.safeGet<Array>();
+			const auto * array_column = typeid_cast<const ColumnArray *>(column.get());
+			if (!array_column)
+				throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnArray");
 
 			const auto * array_type = typeid_cast<const DataTypeArray *>(actual_type.get());
 			chassert(array_type);
 
 			const auto & element_type = array_type->getNestedType();
+			const auto & offsets = array_column->getOffsets();
+			const auto & nested_column = array_column->getDataPtr();
+
+			size_t start_offset = (index == 0) ? 0 : offsets[index - 1];
+			size_t end_offset = offsets[index];
 
 			py::list python_list;
-			for (const auto & element : array_field)
+			for (size_t i = start_offset; i < end_offset; ++i)
 			{
-				auto python_element = convertFieldToPython(element, element_type);
+				auto python_element = convertFieldToPython(nested_column, element_type, i);
 				python_list.append(python_element);
 			}
 
@@ -406,17 +502,20 @@ py::object convertFieldToPython(
 
 	case TypeIndex::Tuple:
 		{
-			const auto & tuple_field = field.safeGet<Tuple>();
+			const auto * tuple_column = typeid_cast<const ColumnTuple *>(column.get());
+			if (!tuple_column)
+				throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnTuple");
 
 			const auto * tuple_type = typeid_cast<const DataTypeTuple *>(actual_type.get());
 			chassert(tuple_type);
 
 			const auto & element_types = tuple_type->getElements();
+			const auto & tuple_columns = tuple_column->getColumns();
 
-			py::tuple python_tuple(tuple_field.size());
-			for (size_t i = 0; i < tuple_field.size(); ++i)
+			py::tuple python_tuple(tuple_columns.size());
+			for (size_t i = 0; i < tuple_columns.size(); ++i)
 			{
-				auto python_element = convertFieldToPython(tuple_field[i], element_types[i]);
+				auto python_element = convertFieldToPython(tuple_columns[i], element_types[i], index);
 				python_tuple[i] = python_element;
 			}
 
@@ -425,6 +524,7 @@ py::object convertFieldToPython(
 
 	case TypeIndex::Interval:
         {
+            auto field = column->operator[](index);
             auto interval_value = field.safeGet<Int64>();
             const auto * interval_type = typeid_cast<const DataTypeInterval *>(actual_type.get());
             chassert(interval_type);
@@ -464,7 +564,9 @@ py::object convertFieldToPython(
 
 	case TypeIndex::Map:
         {
-            const auto & map_field = field.safeGet<Map>();
+            const auto * map_column = typeid_cast<const ColumnMap *>(column.get());
+            if (!map_column)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnMap");
 
             const auto * map_type = typeid_cast<const DataTypeMap *>(actual_type.get());
             chassert(map_type);
@@ -472,63 +574,73 @@ py::object convertFieldToPython(
             const auto & key_type = map_type->getKeyType();
             const auto & value_type = map_type->getValueType();
 
-            py::list keys_list;
-            py::list values_list;
-            py::dict python_dict;
-            bool use_dict = true;
+            /// Get the nested array column containing tuples
+            const auto & nested_array = map_column->getNestedColumn();
+            const auto * array_column = typeid_cast<const ColumnArray *>(&nested_array);
+            if (!array_column)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnArray in ColumnMap");
 
-            for (const auto & entry : map_field)
-            {
-                const auto & entry_tuple = entry.safeGet<Tuple>();
-                chassert(entry_tuple.size() == 2);
+            const auto & offsets = array_column->getOffsets();
+            const auto & tuple_column_ptr = array_column->getDataPtr();
+            const auto * tuple_column = typeid_cast<const ColumnTuple *>(tuple_column_ptr.get());
+            if (!tuple_column)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnTuple in ColumnMap");
 
-                auto python_key = convertFieldToPython(entry_tuple[0], key_type);
-                auto python_value = convertFieldToPython(entry_tuple[1], value_type);
+            size_t start_offset = (index == 0) ? 0 : offsets[index - 1];
+            size_t end_offset = offsets[index];
 
-                if (use_dict)
+            const auto & key_column_ptr = tuple_column->getColumnPtr(0);
+            const auto & value_column_ptr = tuple_column->getColumnPtr(1);
+
+            bool use_dict = canTypeBeUsedAsDictKey(key_type);
+
+            if (use_dict)
+            {
+                py::dict python_dict;
+                for (size_t i = start_offset; i < end_offset; ++i)
                 {
-                    try
-                    {
-                        python_dict[python_key] = python_value;
-                        keys_list.append(std::move(python_key));
-                        values_list.append(std::move(python_value));
-                    }
-                    catch (const std::exception &)
-                    {
-                        // Key is not hashable, switch to list format
-                        use_dict = false;
-                        keys_list.clear();
-                        values_list.clear();
-                        keys_list.append(std::move(python_key));
-                        values_list.append(std::move(python_value));
-                    }
+                    auto python_key = convertFieldToPython(key_column_ptr, key_type, i);
+                    auto python_value = convertFieldToPython(value_column_ptr, value_type, i);
+
+                    python_dict[std::move(python_key)] = std::move(python_value);
                 }
-                else
+
+                return python_dict;
+            }
+            else
+            {
+                py::list keys_list;
+                py::list values_list;
+                for (size_t i = start_offset; i < end_offset; ++i)
                 {
+                    auto python_key = convertFieldToPython(key_column_ptr, key_type, i);
+                    auto python_value = convertFieldToPython(value_column_ptr, value_type, i);
+
                     keys_list.append(std::move(python_key));
                     values_list.append(std::move(python_value));
                 }
-            }
 
-            if (use_dict)
-            {
+                py::dict python_dict;
+                python_dict["keys"] = std::move(keys_list);
+                python_dict["values"] = std::move(values_list);
+
                 return python_dict;
             }
-            else
-            {
-                py::dict result;
-                result["keys"] = keys_list;
-                result["values"] = values_list;
-                return result;
-            }
         }
 
+	case TypeIndex::Variant:
+        {
+            
+        }
+
+
+    // case TypeIndex::Dynamic:
+
     // case TypeIndex::Object:
-	// 	may_have_null = CHColumnObjectToNumpyArray(append_data, actual_type);
-	// 	break;
 
 	case TypeIndex::IPv4:
 		{
+            auto field = column->operator[](index);
 			auto ipv4_value = field.safeGet<IPv4>();
 
 			char ipv4_str[IPV4_MAX_TEXT_LENGTH];
@@ -541,6 +653,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::IPv6:
 		{
+            auto field = column->operator[](index);
 			auto ipv6_value = field.safeGet<IPv6>();
 
 			char ipv6_str[IPV6_MAX_TEXT_LENGTH];
@@ -551,14 +664,6 @@ py::object convertFieldToPython(
 			return import_cache.ipaddress.ipv6_address()(String(ipv6_str, ipv6_str_len));
 		}
 
-	// case TypeIndex::Variant:
-	// 	may_have_null = CHColumnVariantToNumpyArray(append_data, actual_type);
-	// 	break;
-
-	// case TypeIndex::Dynamic:
-	// 	may_have_null = CHColumnDynamicToNumpyArray(append_data, actual_type);
-	// 	break;
-
 	/// Set types are used only in WHERE clauses for IN operations, not in actual data storage
 	case TypeIndex::Set:
 	/// JSONPaths is an internal type used only for JSON schema inference,
diff --git a/programs/local/FieldToPython.h b/programs/local/FieldToPython.h
index a47e6d94773..c108d4f6479 100644
--- a/programs/local/FieldToPython.h
+++ b/programs/local/FieldToPython.h
@@ -2,13 +2,15 @@
 
 #include <Core/Field.h>
 #include <DataTypes/IDataType.h>
+#include <Columns/IColumn.h>
 #include <pybind11/pybind11.h>
 
 namespace CHDB
 {
 
 pybind11::object convertFieldToPython(
-    const DB::Field & field,
-    const DB::DataTypePtr & type);
+    const DB::ColumnPtr & column,
+    const DB::DataTypePtr & type,
+    size_t index);
 
 } // namespace CHDB
diff --git a/programs/local/NumpyNestedTypes.cpp b/programs/local/NumpyNestedTypes.cpp
index 34468320bbe..e35774025fe 100644
--- a/programs/local/NumpyNestedTypes.cpp
+++ b/programs/local/NumpyNestedTypes.cpp
@@ -1,5 +1,6 @@
 #include "NumpyNestedTypes.h"
 #include "NumpyArray.h"
+#include "FieldToPython.h"
 
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnNullable.h>
@@ -13,6 +14,9 @@
 #include <Common/typeid_cast.h>
 #include <Common/Exception.h>
 #include <DataTypes/DataTypeVariant.h>
+#include <DataTypes/DataTypeDynamic.h>
+#include <Columns/ColumnVariant.h>
+#include <Columns/ColumnDynamic.h>
 #include <Processors/Formats/Impl/CHColumnToArrowColumn.h>
 #include <pybind11/pybind11.h>
 
@@ -93,6 +97,7 @@ struct ColumnTraits<ColumnMap>
 
     static py::object convertElement(const ColumnMap * column, const DataTypePtr & data_type, size_t index)
     {
+        return convertFieldToPython(*column, data_type, index);
     }
 };
 
@@ -113,6 +118,19 @@ struct ColumnTraits<ColumnVariant>
 
     static py::object convertElement(const ColumnVariant * column, const DataTypePtr & data_type, size_t index)
     {
+        auto discriminator = column->globalDiscriminatorAt(index);
+        if (discriminator == ColumnVariant::NULL_DISCRIMINATOR)
+        {
+            return py::none();
+        }
+
+        const auto * variant_type = typeid_cast<const DataTypeVariant *>(data_type.get());
+        const auto & variants = variant_type->getVariants();
+        const auto & actual_type = variants[discriminator];
+        Field variant_field = column->operator[](index);
+
+        /// Nested types can be arbitrary types except Variant(...), LowCardinality(Nullable(...)) and Nullable(...) types.
+        return convertFieldToPython(variant_field, actual_type);
     }
 };
 
@@ -123,6 +141,8 @@ struct ColumnTraits<ColumnDynamic>
 
     static py::object convertElement(const ColumnDynamic * column, const DataTypePtr & data_type, size_t index)
     {
+        Field dynamic_field = column->operator[](index);
+        return convertFieldToPython(dynamic_field, data_type);
     }
 };
 

From dab8450bd6eed758f1abeffeea10c8909e3f9929 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Tue, 4 Nov 2025 22:35:27 +0800
Subject: [PATCH 17/22] chore: support more nested type

---
 programs/local/FieldToPython.cpp    | 289 ++++++++++++++++------------
 programs/local/FieldToPython.h      |   6 +-
 programs/local/NumpyArray.cpp       |  11 +-
 programs/local/NumpyArray.h         |  12 +-
 programs/local/NumpyNestedTypes.cpp |  28 +--
 5 files changed, 192 insertions(+), 154 deletions(-)

diff --git a/programs/local/FieldToPython.cpp b/programs/local/FieldToPython.cpp
index cfce3708b71..83a0569ad0e 100644
--- a/programs/local/FieldToPython.cpp
+++ b/programs/local/FieldToPython.cpp
@@ -5,6 +5,11 @@
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnMap.h>
+#include <Columns/ColumnDynamic.h>
+#include <IO/ReadBufferFromMemory.h>
+#include <DataTypes/Serializations/SerializationInfo.h>
+#include <DataTypes/DataTypesBinaryEncoding.h>
+#include <Formats/FormatSettings.h>
 #include <Core/UUID.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeDateTime.h>
@@ -40,6 +45,73 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+py::object convertTimeFieldToPython(const Field & field)
+{
+    auto & import_cache = PythonImporter::ImportCache();
+    auto time_seconds = field.safeGet<Int64>();
+
+    if (time_seconds < 0)
+    {
+        return py::str(toString(field));
+    }
+
+    /// Handle time overflow (should be within 24 hours)
+    /// ClickHouse Time range is [-999:59:59, 999:59:59]
+    time_seconds = time_seconds % 86400;
+
+    int hour = static_cast<int>(time_seconds / 3600);
+    int minute = static_cast<int>((time_seconds % 3600) / 60);
+    int second = static_cast<int>(time_seconds % 60);
+    int microsecond = 0;
+
+    try
+    {
+        return import_cache.datetime.time()(hour, minute, second, microsecond);
+    }
+    catch (py::error_already_set &)
+    {
+        return py::str(toString(field));
+    }
+}
+
+py::object convertTime64FieldToPython(const Field & field)
+{
+    auto & import_cache = PythonImporter::ImportCache();
+    auto time64_field = field.safeGet<DecimalField<Decimal64>>();
+    auto time64_value = time64_field.getValue();
+    Int64 time64_ticks = time64_value.value;
+
+    if (time64_ticks < 0)
+    {
+        return py::str(toString(field));
+    }
+
+    UInt32 scale = time64_field.getScale();
+    Int64 scale_multiplier = DecimalUtils::scaleMultiplier<Decimal64::NativeType>(scale);
+
+    /// Convert to seconds and fractional part within a day
+    Int64 total_seconds = time64_ticks / scale_multiplier;
+    Int64 fractional = time64_ticks % scale_multiplier;
+
+    /// Handle time overflow (should be within 24 hours)
+    /// ClickHouse Time range is [-999:59:59, 999:59:59]
+    total_seconds = total_seconds % 86400;
+
+    int hour = static_cast<int>(total_seconds / 3600);
+    int minute = static_cast<int>((total_seconds % 3600) / 60);
+    int second = static_cast<int>(total_seconds % 60);
+    int microsecond = static_cast<int>((fractional * 1000000) / scale_multiplier);
+
+    try
+    {
+        return import_cache.datetime.time()(hour, minute, second, microsecond);
+    }
+    catch (py::error_already_set &)
+    {
+        return py::str(toString(field));
+    }
+}
+
 static bool canTypeBeUsedAsDictKey(const DataTypePtr & type)
 {
     DataTypePtr actual_type = type;
@@ -136,11 +208,11 @@ static py::object convertLocalDateToPython(const LocalDate & local_date, auto &
 }
 
 py::object convertFieldToPython(
-    const ColumnPtr & column,
+    const IColumn & column,
     const DataTypePtr & type,
     size_t index)
 {
-    if (column->isNullAt(index))
+    if (column.isNullAt(index))
     {
         return py::none();
     }
@@ -160,13 +232,13 @@ py::object convertFieldToPython(
 
 	case TypeIndex::Int8:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast(field.safeGet<Int64>());
         }
 
 	case TypeIndex::UInt8:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             auto filed_type = field.getType();
             if (filed_type == Field::Types::Bool)
                 return py::cast(field.safeGet<bool>());
@@ -176,85 +248,85 @@ py::object convertFieldToPython(
 
 	case TypeIndex::Int16:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast(field.safeGet<Int64>());
         }
 
 	case TypeIndex::UInt16:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast(field.safeGet<UInt64>());
         }
 
 	case TypeIndex::Int32:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast(field.safeGet<Int64>());
         }
 
 	case TypeIndex::UInt32:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast(field.safeGet<UInt64>());
         }
 
 	case TypeIndex::Int64:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast(field.safeGet<Int64>());
         }
 
 	case TypeIndex::UInt64:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast(field.safeGet<UInt64>());
         }
 
 	case TypeIndex::Float32:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast(field.safeGet<Float64>());
         }
 
 	case TypeIndex::Float64:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast(field.safeGet<Float64>());
         }
 
 	case TypeIndex::Int128:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast((double)field.safeGet<Int128>());
         }
 
 	case TypeIndex::Int256:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast((double)field.safeGet<Int256>());
         }
 
 	case TypeIndex::UInt128:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast((double)field.safeGet<UInt128>());
         }
 
 	case TypeIndex::UInt256:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast((double)field.safeGet<UInt256>());
         }
 
 	case TypeIndex::BFloat16:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast((double)field.safeGet<Float64>());
         }
 
 	case TypeIndex::Date:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             auto days = field.safeGet<UInt64>();
             LocalDate local_date(static_cast<UInt16>(days));
             return convertLocalDateToPython(local_date, import_cache, field);
@@ -262,7 +334,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::Date32:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             auto days = field.safeGet<Int64>();
             LocalDate local_date(static_cast<Int32>(days));
             return convertLocalDateToPython(local_date, import_cache, field);
@@ -270,7 +342,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::DateTime:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             auto seconds = field.safeGet<UInt64>();
 
             const auto * datetime_type = typeid_cast<const DataTypeDateTime *>(actual_type.get());
@@ -305,7 +377,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::DateTime64:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             auto datetime64_field = field.safeGet<DecimalField<DateTime64>>();
             auto datetime64_value = datetime64_field.getValue();
             Int64 datetime64_ticks = datetime64_value.value;
@@ -347,88 +419,33 @@ py::object convertFieldToPython(
 
     case TypeIndex::Time:
         {
-            auto field = column->operator[](index);
-            auto time_seconds = field.safeGet<Int64>();
-
-            if (time_seconds < 0)
-            {
-                return py::str(toString(field));
-            }
-
-            /// Handle time overflow (should be within 24 hours)
-            /// ClickHouse Time range is [-999:59:59, 999:59:59]
-            time_seconds = time_seconds % 86400;
-
-            int hour = static_cast<int>(time_seconds / 3600);
-            int minute = static_cast<int>((time_seconds % 3600) / 60);
-            int second = static_cast<int>(time_seconds % 60);
-            int microsecond = 0;
-
-            try
-            {
-                return import_cache.datetime.time()(hour, minute, second, microsecond);
-            }
-            catch (py::error_already_set &)
-            {
-                return py::str(toString(field));
-            }
+            auto field = column[index];
+            return convertTimeFieldToPython(field);
         }
 
     case TypeIndex::Time64:
         {
-            auto field = column->operator[](index);
-            auto time64_field = field.safeGet<DecimalField<Decimal64>>();
-            auto time64_value = time64_field.getValue();
-            Int64 time64_ticks = time64_value.value;
-
-            if (time64_ticks < 0)
-            {
-                return py::str(toString(field));
-            }
-
-            UInt32 scale = time64_field.getScale();
-            Int64 scale_multiplier = DecimalUtils::scaleMultiplier<Decimal64::NativeType>(scale);
-
-            /// Convert to seconds and fractional part within a day
-            Int64 total_seconds = time64_ticks / scale_multiplier;
-            Int64 fractional = time64_ticks % scale_multiplier;
-
-            /// Handle time overflow (should be within 24 hours)
-            /// ClickHouse Time range is [-999:59:59, 999:59:59]
-            total_seconds = total_seconds % 86400;
-
-            int hour = static_cast<int>(total_seconds / 3600);
-            int minute = static_cast<int>((total_seconds % 3600) / 60);
-            int second = static_cast<int>(total_seconds % 60);
-            int microsecond = static_cast<int>((fractional * 1000000) / scale_multiplier);
-
-            try
-            {
-                return import_cache.datetime.time()(hour, minute, second, microsecond);
-            }
-            catch (py::error_already_set &)
-            {
-                return py::str(toString(field));
-            }
+            auto field = column[index];
+            return convertTime64FieldToPython(field);
         }
 
     case TypeIndex::String:
     case TypeIndex::FixedString:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast(field.safeGet<String>());
         }
 
     case TypeIndex::Enum8:
     case TypeIndex::Enum16:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             return py::cast(field.safeGet<Int64>());
         }
 
     case TypeIndex::Decimal32:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             auto decimal_field = field.safeGet<DecimalField<Decimal32>>();
             auto decimal_value = decimal_field.getValue();
             UInt32 scale = decimal_field.getScale();
@@ -438,7 +455,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::Decimal64:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             auto decimal_field = field.safeGet<DecimalField<Decimal64>>();
             auto decimal_value = decimal_field.getValue();
             UInt32 scale = decimal_field.getScale();
@@ -448,7 +465,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::Decimal128:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             auto decimal_field = field.safeGet<DecimalField<Decimal128>>();
             auto decimal_value = decimal_field.getValue();
             UInt32 scale = decimal_field.getScale();
@@ -458,7 +475,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::Decimal256:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             auto decimal_field = field.safeGet<DecimalField<Decimal256>>();
             auto decimal_value = decimal_field.getValue();
             UInt32 scale = decimal_field.getScale();
@@ -468,7 +485,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::UUID:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             auto uuid_value = field.safeGet<UUID>();
             const auto formatted_uuid = formatUUID(uuid_value);
             return import_cache.uuid.UUID()(String(formatted_uuid.data(), formatted_uuid.size()));
@@ -476,16 +493,14 @@ py::object convertFieldToPython(
 
 	case TypeIndex::Array:
 		{
-			const auto * array_column = typeid_cast<const ColumnArray *>(column.get());
-			if (!array_column)
-				throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnArray");
+			const auto & array_column = typeid_cast<const ColumnArray &>(column);
 
 			const auto * array_type = typeid_cast<const DataTypeArray *>(actual_type.get());
 			chassert(array_type);
 
 			const auto & element_type = array_type->getNestedType();
-			const auto & offsets = array_column->getOffsets();
-			const auto & nested_column = array_column->getDataPtr();
+			const auto & offsets = array_column.getOffsets();
+			const auto & nested_column = array_column.getDataPtr();
 
 			size_t start_offset = (index == 0) ? 0 : offsets[index - 1];
 			size_t end_offset = offsets[index];
@@ -493,7 +508,7 @@ py::object convertFieldToPython(
 			py::list python_list;
 			for (size_t i = start_offset; i < end_offset; ++i)
 			{
-				auto python_element = convertFieldToPython(nested_column, element_type, i);
+				auto python_element = convertFieldToPython(*nested_column, element_type, i);
 				python_list.append(python_element);
 			}
 
@@ -502,20 +517,18 @@ py::object convertFieldToPython(
 
 	case TypeIndex::Tuple:
 		{
-			const auto * tuple_column = typeid_cast<const ColumnTuple *>(column.get());
-			if (!tuple_column)
-				throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnTuple");
+			const auto & tuple_column = typeid_cast<const ColumnTuple &>(column);
 
 			const auto * tuple_type = typeid_cast<const DataTypeTuple *>(actual_type.get());
 			chassert(tuple_type);
 
 			const auto & element_types = tuple_type->getElements();
-			const auto & tuple_columns = tuple_column->getColumns();
+			const auto & tuple_columns = tuple_column.getColumns();
 
 			py::tuple python_tuple(tuple_columns.size());
 			for (size_t i = 0; i < tuple_columns.size(); ++i)
 			{
-				auto python_element = convertFieldToPython(tuple_columns[i], element_types[i], index);
+				auto python_element = convertFieldToPython(*(tuple_columns[i]), element_types[i], index);
 				python_tuple[i] = python_element;
 			}
 
@@ -524,7 +537,7 @@ py::object convertFieldToPython(
 
 	case TypeIndex::Interval:
         {
-            auto field = column->operator[](index);
+            auto field = column[index];
             auto interval_value = field.safeGet<Int64>();
             const auto * interval_type = typeid_cast<const DataTypeInterval *>(actual_type.get());
             chassert(interval_type);
@@ -564,9 +577,7 @@ py::object convertFieldToPython(
 
 	case TypeIndex::Map:
         {
-            const auto * map_column = typeid_cast<const ColumnMap *>(column.get());
-            if (!map_column)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnMap");
+            const auto & map_column = typeid_cast<const ColumnMap &>(column);
 
             const auto * map_type = typeid_cast<const DataTypeMap *>(actual_type.get());
             chassert(map_type);
@@ -575,22 +586,18 @@ py::object convertFieldToPython(
             const auto & value_type = map_type->getValueType();
 
             /// Get the nested array column containing tuples
-            const auto & nested_array = map_column->getNestedColumn();
-            const auto * array_column = typeid_cast<const ColumnArray *>(&nested_array);
-            if (!array_column)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnArray in ColumnMap");
+            const auto & nested_array = map_column.getNestedColumn();
+            const auto & array_column = typeid_cast<const ColumnArray &>(nested_array);
 
-            const auto & offsets = array_column->getOffsets();
-            const auto & tuple_column_ptr = array_column->getDataPtr();
-            const auto * tuple_column = typeid_cast<const ColumnTuple *>(tuple_column_ptr.get());
-            if (!tuple_column)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ColumnTuple in ColumnMap");
+            const auto & offsets = array_column.getOffsets();
+            const auto & tuple_column_ptr = array_column.getDataPtr();
+            const auto & tuple_column = typeid_cast<const ColumnTuple &>(tuple_column_ptr);
 
             size_t start_offset = (index == 0) ? 0 : offsets[index - 1];
             size_t end_offset = offsets[index];
 
-            const auto & key_column_ptr = tuple_column->getColumnPtr(0);
-            const auto & value_column_ptr = tuple_column->getColumnPtr(1);
+            const auto & key_column = tuple_column.getColumn(0);
+            const auto & value_column = tuple_column.getColumn(1);
 
             bool use_dict = canTypeBeUsedAsDictKey(key_type);
 
@@ -599,8 +606,8 @@ py::object convertFieldToPython(
                 py::dict python_dict;
                 for (size_t i = start_offset; i < end_offset; ++i)
                 {
-                    auto python_key = convertFieldToPython(key_column_ptr, key_type, i);
-                    auto python_value = convertFieldToPython(value_column_ptr, value_type, i);
+                    auto python_key = convertFieldToPython(key_column, key_type, i);
+                    auto python_value = convertFieldToPython(value_column, value_type, i);
 
                     python_dict[std::move(python_key)] = std::move(python_value);
                 }
@@ -613,8 +620,8 @@ py::object convertFieldToPython(
                 py::list values_list;
                 for (size_t i = start_offset; i < end_offset; ++i)
                 {
-                    auto python_key = convertFieldToPython(key_column_ptr, key_type, i);
-                    auto python_value = convertFieldToPython(value_column_ptr, value_type, i);
+                    auto python_key = convertFieldToPython(key_column, key_type, i);
+                    auto python_value = convertFieldToPython(value_column, value_type, i);
 
                     keys_list.append(std::move(python_key));
                     values_list.append(std::move(python_value));
@@ -630,17 +637,55 @@ py::object convertFieldToPython(
 
 	case TypeIndex::Variant:
         {
-            
+            const auto & variant_column = typeid_cast<const ColumnVariant &>(column);
+            auto discriminator = variant_column.globalDiscriminatorAt(index);
+            if (discriminator == ColumnVariant::NULL_DISCRIMINATOR)
+            {
+                return py::none();
+            }
+
+            const auto & variant_type = typeid_cast<const DataTypeVariant &>(actual_type);
+            const auto & variants = variant_type.getVariants();
+            const auto & variant_data_type = variants[discriminator];
+
+            auto offset = variant_column.offsetAt(index);
+            const auto & variant_inner_column = variant_column.getVariantByGlobalDiscriminator(discriminator);
+
+            return convertFieldToPython(variant_inner_column, variant_data_type, offset);
         }
 
 
-    // case TypeIndex::Dynamic:
+    case TypeIndex::Dynamic:
+        {
+            const auto & dynamic_column = typeid_cast<const ColumnDynamic &>(column);
+            const auto & variant_column = dynamic_column.getVariantColumn();
+
+            /// Check if this row has value in shared variant
+            if (variant_column.globalDiscriminatorAt(index) == dynamic_column.getSharedVariantDiscriminator())
+            {
+                /// Get data from shared variant and deserialize it
+                auto value = dynamic_column.getSharedVariant().getDataAt(variant_column.offsetAt(index));
+                ReadBufferFromMemory buf(value.data, value.size);
+                auto variant_type = decodeDataType(buf);
+                auto tmp_variant_column = variant_type->createColumn();
+                auto variant_serialization = variant_type->getDefaultSerialization();
+                variant_serialization->deserializeBinary(*tmp_variant_column, buf, FormatSettings{});
+
+                /// Convert the deserialized value
+                return convertFieldToPython(*tmp_variant_column, variant_type, 0);
+            }
+            else
+            {
+                /// Use variant conversion logic directly
+                return convertFieldToPython(variant_column, dynamic_column.getVariantInfo().variant_type, index);
+            }
+        }
 
-    // case TypeIndex::Object:
+    case TypeIndex::Object:
 
 	case TypeIndex::IPv4:
 		{
-            auto field = column->operator[](index);
+            auto field = column[index];
 			auto ipv4_value = field.safeGet<IPv4>();
 
 			char ipv4_str[IPV4_MAX_TEXT_LENGTH];
@@ -653,7 +698,7 @@ py::object convertFieldToPython(
 
     case TypeIndex::IPv6:
 		{
-            auto field = column->operator[](index);
+            auto field = column[index];
 			auto ipv6_value = field.safeGet<IPv6>();
 
 			char ipv6_str[IPV6_MAX_TEXT_LENGTH];
diff --git a/programs/local/FieldToPython.h b/programs/local/FieldToPython.h
index c108d4f6479..f175ceb0866 100644
--- a/programs/local/FieldToPython.h
+++ b/programs/local/FieldToPython.h
@@ -8,8 +8,12 @@
 namespace CHDB
 {
 
+pybind11::object convertTimeFieldToPython(const DB::Field & field);
+
+pybind11::object convertTime64FieldToPython(const DB::Field & field);
+
 pybind11::object convertFieldToPython(
-    const DB::ColumnPtr & column,
+    const DB::IColumn & column,
     const DB::DataTypePtr & type,
     size_t index);
 
diff --git a/programs/local/NumpyArray.cpp b/programs/local/NumpyArray.cpp
index f054a95d27a..89bbc334343 100644
--- a/programs/local/NumpyArray.cpp
+++ b/programs/local/NumpyArray.cpp
@@ -67,7 +67,7 @@ struct TimeConvert
 		chassert(append_data.type);
 
 		Field field(static_cast<Int64>(val));
-		auto time_object = convertFieldToPython(field, append_data.type);
+		auto time_object = convertTimeFieldToPython(field);
 		return time_object.release().ptr();
 	}
 
@@ -87,7 +87,7 @@ struct Time64Convert
 		chassert(append_data.type);
 
 		Field field(val);
-		auto time64_object = convertFieldToPython(field, append_data.type);
+		auto time64_object = convertTime64FieldToPython(field);
 		return time64_object.release().ptr();
 	}
 
@@ -796,7 +796,10 @@ void NumpyArray::append(
 	}
 }
 
-void NumpyArray::append(const DB::Field & field, const DB::DataTypePtr & type)
+void NumpyArray::append(
+	const DB::IColumn & column,
+	const DB::DataTypePtr & type,
+	size_t index)
 {
 	chassert(data_array);
 	chassert(!mask_array);
@@ -806,7 +809,7 @@ void NumpyArray::append(const DB::Field & field, const DB::DataTypePtr & type)
 
 	auto * dest_ptr = reinterpret_cast<py::object *>(data_ptr) + data_array->count;
 
-	*dest_ptr = convertFieldToPython(field, type);
+	*dest_ptr = convertFieldToPython(column, type, index);
 
 	data_array->count += 1;
 }
diff --git a/programs/local/NumpyArray.h b/programs/local/NumpyArray.h
index 03f2fd5f360..bea70774732 100644
--- a/programs/local/NumpyArray.h
+++ b/programs/local/NumpyArray.h
@@ -54,9 +54,15 @@ class NumpyArray {
 
 	void append(const DB::ColumnPtr & column);
 
-	void append(const DB::ColumnPtr & column, size_t offset, size_t count);
-
-	void append(const DB::Field & field, const DB::DataTypePtr & type);
+	void append(
+		const DB::ColumnPtr & column,
+		size_t offset,
+		size_t count);
+
+	void append(
+		const DB::IColumn & column,
+		const DB::DataTypePtr & type,
+		size_t index);
 
 	py::object toArray() const;
 
diff --git a/programs/local/NumpyNestedTypes.cpp b/programs/local/NumpyNestedTypes.cpp
index e35774025fe..3459c65c903 100644
--- a/programs/local/NumpyNestedTypes.cpp
+++ b/programs/local/NumpyNestedTypes.cpp
@@ -42,10 +42,6 @@ struct ColumnTraits<ColumnArray>
 
     static py::object convertElement(const ColumnArray * column, const DataTypePtr & data_type, size_t index)
     {
-        const auto * array_data_type = typeid_cast<const DataType *>(data_type.get());
-        if (!array_data_type)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected DataTypeArray");
-
         const auto & offsets = column->getOffsets();
         const auto & nested_column = column->getDataPtr();
 
@@ -75,15 +71,12 @@ struct ColumnTraits<ColumnTuple>
         const auto & element_types = tuple_data_type->getElements();
         size_t tuple_size = column->tupleSize();
 
-        Field tuple_field = column->operator[](index);
-        const Tuple & tuple_value = tuple_field.safeGet<Tuple>();
-
         NumpyArray numpy_array({});
         numpy_array.init(tuple_size);
 
         for (size_t i = 0; i < tuple_size; ++i)
         {
-            numpy_array.append(tuple_value[i], element_types[i]);
+            numpy_array.append(column->getColumn(i), element_types[i], index);
         }
 
         return numpy_array.toArray();
@@ -108,6 +101,7 @@ struct ColumnTraits<ColumnObject>
 
     static py::object convertElement(const ColumnObject * column, const DataTypePtr & data_type, size_t index)
     {
+        return convertFieldToPython(*column, data_type, index);
     }
 };
 
@@ -118,19 +112,7 @@ struct ColumnTraits<ColumnVariant>
 
     static py::object convertElement(const ColumnVariant * column, const DataTypePtr & data_type, size_t index)
     {
-        auto discriminator = column->globalDiscriminatorAt(index);
-        if (discriminator == ColumnVariant::NULL_DISCRIMINATOR)
-        {
-            return py::none();
-        }
-
-        const auto * variant_type = typeid_cast<const DataTypeVariant *>(data_type.get());
-        const auto & variants = variant_type->getVariants();
-        const auto & actual_type = variants[discriminator];
-        Field variant_field = column->operator[](index);
-
-        /// Nested types can be arbitrary types except Variant(...), LowCardinality(Nullable(...)) and Nullable(...) types.
-        return convertFieldToPython(variant_field, actual_type);
+        return convertFieldToPython(*column, data_type, index);
     }
 };
 
@@ -141,8 +123,7 @@ struct ColumnTraits<ColumnDynamic>
 
     static py::object convertElement(const ColumnDynamic * column, const DataTypePtr & data_type, size_t index)
     {
-        Field dynamic_field = column->operator[](index);
-        return convertFieldToPython(dynamic_field, data_type);
+        return convertFieldToPython(*column, data_type, index);
     }
 };
 
@@ -157,7 +138,6 @@ bool CHNestedColumnToNumpyArray(NumpyAppendData & append_data, const DataTypePtr
     {
         nullable_column = nullable;
         data_column = &nullable->getNestedColumn();
-        has_null = true;
     }
 
     const auto * typed_column = typeid_cast<const ColumnType *>(data_column);

From 9894ace29d85ce4f9f8366261ac882374af2ebc6 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Wed, 5 Nov 2025 01:15:45 +0800
Subject: [PATCH 18/22] chore: support object type

---
 programs/local/FieldToPython.cpp  |   6 ++
 programs/local/ObjectToPython.cpp | 143 ++++++++++++++++++++++++++++++
 programs/local/ObjectToPython.h   |  15 ++++
 3 files changed, 164 insertions(+)
 create mode 100644 programs/local/ObjectToPython.cpp
 create mode 100644 programs/local/ObjectToPython.h

diff --git a/programs/local/FieldToPython.cpp b/programs/local/FieldToPython.cpp
index 83a0569ad0e..cc02eeac945 100644
--- a/programs/local/FieldToPython.cpp
+++ b/programs/local/FieldToPython.cpp
@@ -1,11 +1,13 @@
 #include "FieldToPython.h"
 #include "PythonImporter.h"
+#include "ObjectToPython.h"
 
 #include <Core/DecimalComparison.h>
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnMap.h>
 #include <Columns/ColumnDynamic.h>
+#include <Columns/ColumnObject.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <DataTypes/Serializations/SerializationInfo.h>
 #include <DataTypes/DataTypesBinaryEncoding.h>
@@ -20,6 +22,7 @@
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeVariant.h>
 #include <DataTypes/DataTypeDynamic.h>
+#include <DataTypes/DataTypeObject.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypesNumber.h>
@@ -682,6 +685,9 @@ py::object convertFieldToPython(
         }
 
     case TypeIndex::Object:
+        {
+            return convertObjectToPython(column, actual_type, index);
+        }
 
 	case TypeIndex::IPv4:
 		{
diff --git a/programs/local/ObjectToPython.cpp b/programs/local/ObjectToPython.cpp
new file mode 100644
index 00000000000..88b51f79a3e
--- /dev/null
+++ b/programs/local/ObjectToPython.cpp
@@ -0,0 +1,143 @@
+#include "ObjectToPython.h"
+#include "FieldToPython.h"
+
+#include <Columns/ColumnObject.h>
+#include <Columns/ColumnDynamic.h>
+#include <DataTypes/DataTypeObject.h>
+#include <base/defines.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+}
+
+namespace CHDB
+{
+
+using namespace DB;
+namespace py = pybind11;
+
+struct PathElements
+{
+    explicit PathElements(const String & path)
+    {
+        const char * start = path.data();
+        const char * end = start + path.size();
+        const char * pos = start;
+        const char * last_dot_pos = pos - 1;
+        for (pos = start; pos != end; ++pos)
+        {
+            if (*pos == '.')
+            {
+                elements.emplace_back(last_dot_pos + 1, size_t(pos - last_dot_pos - 1));
+                last_dot_pos = pos;
+            }
+        }
+
+        elements.emplace_back(last_dot_pos + 1, size_t(pos - last_dot_pos - 1));
+    }
+
+    size_t size() const { return elements.size(); }
+
+    std::vector<std::string_view> elements;
+};
+
+py::object convertObjectToPython(
+    const IColumn & column,
+    const DataTypePtr & type,
+    size_t index)
+{
+    const auto & column_object = typeid_cast<const ColumnObject &>(column);
+    const auto & typed_paths = column_object.getTypedPaths();
+    const auto & dynamic_paths = column_object.getDynamicPaths();
+    const auto & shared_data_offsets = column_object.getSharedDataOffsets();
+    const auto [shared_data_paths, shared_data_values] = column_object.getSharedDataPathsAndValues();
+
+    size_t shared_data_offset = shared_data_offsets[static_cast<ssize_t>(index) - 1];
+    size_t shared_data_end = shared_data_offsets[static_cast<ssize_t>(index)];
+
+    const auto & object_type = typeid_cast<const DataTypeObject &>(type);
+    const auto & specific_typed_paths = object_type.getTypedPaths();
+    const auto & dynamic_data_type = object_type.getDynamicType();
+
+    std::vector<std::pair<String, py::object>> path_values;
+    path_values.reserve(typed_paths.size() + dynamic_paths.size() + (shared_data_end - shared_data_offset));
+
+    for (const auto & [path, column_ptr] : typed_paths)
+    {
+        auto iter = specific_typed_paths.find(path);
+        if (iter == specific_typed_paths.end())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Path {} not found in typed paths", path);
+
+        const auto & specific_data_type = iter->second;
+        auto python_value = convertFieldToPython(*column_ptr, specific_data_type, index);
+        path_values.emplace_back(path, python_value);
+    }
+
+    for (const auto & [path, dynamic_column] : dynamic_paths)
+    {
+        if (!dynamic_column->isNullAt(index))
+        {
+            auto python_value = convertFieldToPython(*dynamic_column, dynamic_data_type, index);
+            path_values.emplace_back(path, python_value);
+        }
+    }
+
+    size_t index_in_shared_data_values = shared_data_offset;
+    for (size_t i = shared_data_offset; i != shared_data_end; ++i)
+    {
+        auto path = shared_data_paths->getDataAt(i).toString();
+
+        auto tmp_dynamic_column = ColumnDynamic::create();
+        tmp_dynamic_column->reserve(1);
+        ColumnObject::deserializeValueFromSharedData(shared_data_values, index_in_shared_data_values++, *tmp_dynamic_column);
+
+        auto python_value = convertFieldToPython(*tmp_dynamic_column, dynamic_data_type, 0);
+        path_values.emplace_back(path, python_value);
+    }
+
+    py::dict result;
+
+    for (const auto & [path, value] : path_values)
+    {
+        PathElements path_elements(path);
+
+        if (path_elements.size() == 1)
+        {
+            String key(path_elements.elements[0]);
+            result[key.c_str()] = value;
+        }
+        else
+        {
+            py::dict * current_dict = &result;
+
+            for (size_t i = 0; i < path_elements.size() - 1; ++i)
+            {
+                String key(path_elements.elements[i]);
+
+                if (current_dict->contains(key.c_str()))
+                {
+                    py::object nested = (*current_dict)[key.c_str()];
+                    current_dict = &nested.cast<py::dict &>();
+                }
+                else
+                {
+                    py::dict new_dict;
+                    (*current_dict)[key.c_str()] = new_dict;
+                    current_dict = &new_dict;
+                }
+            }
+
+            chassert(current_dict);
+            String final_key(path_elements.elements[path_elements.size() - 1]);
+            (*current_dict)[final_key.c_str()] = value;
+        }
+    }
+
+    return result;
+}
+
+} // namespace CHDB
diff --git a/programs/local/ObjectToPython.h b/programs/local/ObjectToPython.h
new file mode 100644
index 00000000000..64d79e218fd
--- /dev/null
+++ b/programs/local/ObjectToPython.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <Columns/IColumn.h>
+#include <pybind11/pybind11.h>
+
+namespace CHDB
+{
+
+pybind11::object convertObjectToPython(
+    const DB::IColumn & column,
+    const DB::DataTypePtr & type,
+    size_t index);
+
+} // namespace CHDB

From 95ad2d4553ea51fa33872743c125a7cd1fa90f31 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Wed, 5 Nov 2025 04:28:57 +0800
Subject: [PATCH 19/22] fix: fix build issues

---
 chdb/__init__.py                              |  5 +
 programs/local/CMakeLists.txt                 |  4 +
 programs/local/ChunkCollectorOutputFormat.cpp | 27 +++---
 programs/local/ChunkCollectorOutputFormat.h   | 27 +++---
 programs/local/FieldToPython.cpp              | 20 ++--
 programs/local/LocalChdb.cpp                  | 35 +++----
 programs/local/LocalServer.cpp                |  2 +-
 programs/local/NumpyArray.h                   |  1 -
 programs/local/NumpyNestedTypes.cpp           | 16 ++--
 programs/local/NumpyType.h                    |  2 +-
 programs/local/ObjectToPython.cpp             | 17 ++--
 programs/local/PandasDataFrameBuilder.cpp     | 37 +++++++-
 programs/local/PandasDataFrameBuilder.h       | 14 +--
 src/Client/ClientBase.cpp                     |  6 +-
 src/Client/ClientBase.h                       |  2 +-
 tests/test_complex_pyobj.py                   | 91 +++++++++++++++----
 tests/test_dataframe_column_types.py          | 60 ++++++++++++
 17 files changed, 264 insertions(+), 102 deletions(-)
 create mode 100644 tests/test_dataframe_column_types.py

diff --git a/chdb/__init__.py b/chdb/__init__.py
index ecf29e43df7..6d4d516a3e7 100644
--- a/chdb/__init__.py
+++ b/chdb/__init__.py
@@ -194,6 +194,11 @@ def query(sql, output_format="CSV", path="", udf_path=""):
     with g_conn_lock:
         conn = _chdb.connect(conn_str)
         res = conn.query(sql, output_format)
+
+        if lower_output_format == "dataframe":
+            conn.close()
+            return res
+
         if res.has_error():
             conn.close()
             raise ChdbError(res.error_message())
diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt
index c4e480aaac3..2cade0b59be 100644
--- a/programs/local/CMakeLists.txt
+++ b/programs/local/CMakeLists.txt
@@ -26,11 +26,15 @@ if (USE_PYTHON)
     set (CHDB_SOURCES
         chdb.cpp
         ChunkCollectorOutputFormat.cpp
+        FieldToPython.cpp
         FormatHelper.cpp
         ListScan.cpp
         LocalChdb.cpp
         LocalServer.cpp
+        NumpyArray.cpp
+        NumpyNestedTypes.cpp
         NumpyType.cpp
+        ObjectToPython.cpp
         PandasAnalyzer.cpp
         PandasDataFrame.cpp
         PandasDataFrameBuilder.cpp
diff --git a/programs/local/ChunkCollectorOutputFormat.cpp b/programs/local/ChunkCollectorOutputFormat.cpp
index 38d31883a60..f215f48b25d 100644
--- a/programs/local/ChunkCollectorOutputFormat.cpp
+++ b/programs/local/ChunkCollectorOutputFormat.cpp
@@ -6,15 +6,17 @@
 #include <Client/ClientBase.h>
 #include <base/defines.h>
 
-namespace DB
+using namespace DB;
+
+namespace CHDB
 {
 
 NullWriteBuffer ChunkCollectorOutputFormat::out;
 
 ChunkCollectorOutputFormat::ChunkCollectorOutputFormat(
-    const Block & header,
+    SharedHeader shared_header,
     PandasDataFrameBuilder & builder)
-    : IOutputFormat(header, out)
+    : IOutputFormat(shared_header, out)
     , dataframe_builder(builder)
 {}
 
@@ -48,16 +50,16 @@ void ChunkCollectorOutputFormat::finalizeImpl()
 }
 
 /// Global dataframe builder
-static std::unique_ptr<PandasDataFrameBuilder> g_dataframe_builder = nullptr;
+static std::shared_ptr<PandasDataFrameBuilder> g_dataframe_builder = nullptr;
 
-PandasDataFrameBuilder * getGlobalDataFrameBuilder()
+PandasDataFrameBuilder & getGlobalDataFrameBuilder()
 {
-    return g_dataframe_builder.get();
+    return *g_dataframe_builder;
 }
 
-void setGlobalDataFrameBuilder(std::unique_ptr<PandasDataFrameBuilder> builder)
+void setGlobalDataFrameBuilder(std::shared_ptr<PandasDataFrameBuilder> builder)
 {
-    g_dataframe_builder = std::move(builder);
+    g_dataframe_builder = builder;
 }
 
 void resetGlobalDataFrameBuilder()
@@ -66,15 +68,14 @@ void resetGlobalDataFrameBuilder()
 }
 
 /// create ChunkCollectorOutputFormat for use with function pointer
-std::shared_ptr<IOutputFormat> createDataFrameOutputFormat(const Block & header)
+std::shared_ptr<IOutputFormat> createDataFrameOutputFormat(SharedHeader header)
 {
     /// Create a PandasDataFrameBuilder and set it globally
-    auto dataframe_builder = std::make_unique<PandasDataFrameBuilder>(header);
-    PandasDataFrameBuilder * builder_ptr = dataframe_builder.get();
-    setGlobalDataFrameBuilder(std::move(dataframe_builder));
+    auto dataframe_builder = std::make_shared<PandasDataFrameBuilder>(*header);
+    setGlobalDataFrameBuilder(dataframe_builder);
 
     /// Create and return the format with the builder
-    return std::make_shared<ChunkCollectorOutputFormat>(header, *builder_ptr);
+    return std::make_shared<ChunkCollectorOutputFormat>(header, getGlobalDataFrameBuilder());
 }
 
 /// Registration function to be called during initialization
diff --git a/programs/local/ChunkCollectorOutputFormat.h b/programs/local/ChunkCollectorOutputFormat.h
index 8c588cd9711..7dc2fe26127 100644
--- a/programs/local/ChunkCollectorOutputFormat.h
+++ b/programs/local/ChunkCollectorOutputFormat.h
@@ -1,20 +1,26 @@
 #pragma once
 
 #include <vector>
+#include <Core/NamesAndTypes.h>
 #include <Processors/Formats/IOutputFormat.h>
+#include <Processors/Port.h>
 
 namespace DB
 {
-
 class NullWriteBuffer;
+}
+
+namespace CHDB
+{
+
 class PandasDataFrameBuilder;
 
 /// OutputFormat that collects all chunks into memory for further processing
 /// Does not write to WriteBuffer, instead accumulates data for conversion to pandas DataFrame objects
-class ChunkCollectorOutputFormat : public IOutputFormat
+class ChunkCollectorOutputFormat : public DB::IOutputFormat
 {
 public:
-    ChunkCollectorOutputFormat(const Block & header, PandasDataFrameBuilder & builder);
+    ChunkCollectorOutputFormat(DB::SharedHeader shared_header, PandasDataFrameBuilder & builder);
 
     String getName() const override { return "ChunkCollectorOutputFormat"; }
 
@@ -24,31 +30,30 @@ class ChunkCollectorOutputFormat : public IOutputFormat
     }
 
 protected:
-    void consume(Chunk chunk) override;
+    void consume(DB::Chunk chunk) override;
 
-    void consumeTotals(Chunk totals) override;
+    void consumeTotals(DB::Chunk totals) override;
 
-    void consumeExtremes(Chunk extremes) override;
+    void consumeExtremes(DB::Chunk extremes) override;
 
     void finalizeImpl() override;
 
 private:
-    std::vector<Chunk> chunks;
+    std::vector<DB::Chunk> chunks;
 
     PandasDataFrameBuilder & dataframe_builder;
 
-    /// Is not used.
-    static NullWriteBuffer out;
+    static DB::NullWriteBuffer out;
 };
 
 /// Registration function to be called during initialization
 void registerDataFrameOutputFormat();
 
 /// Get the global dataframe builder
-PandasDataFrameBuilder * getGlobalDataFrameBuilder();
+PandasDataFrameBuilder & getGlobalDataFrameBuilder();
 
 /// Set the global dataframe builder
-void setGlobalDataFrameBuilder(std::unique_ptr<PandasDataFrameBuilder> builder);
+void setGlobalDataFrameBuilder(std::shared_ptr<PandasDataFrameBuilder> builder);
 
 /// Reset the global dataframe builder
 void resetGlobalDataFrameBuilder();
diff --git a/programs/local/FieldToPython.cpp b/programs/local/FieldToPython.cpp
index cc02eeac945..ca76032aaf6 100644
--- a/programs/local/FieldToPython.cpp
+++ b/programs/local/FieldToPython.cpp
@@ -36,18 +36,22 @@
 #include <IO/WriteHelpers.h>
 #include <base/types.h>
 
-namespace CHDB
+namespace DB
 {
 
-using namespace DB;
-namespace py = pybind11;
-
 namespace ErrorCodes
 {
-    extern const int NOT_IMPLEMENTED;
-    extern const int LOGICAL_ERROR;
+extern const int NOT_IMPLEMENTED;
+extern const int LOGICAL_ERROR;
+}
+
 }
 
+namespace CHDB
+{
+
+using namespace DB;
+
 py::object convertTimeFieldToPython(const Field & field)
 {
     auto & import_cache = PythonImporter::ImportCache();
@@ -594,7 +598,7 @@ py::object convertFieldToPython(
 
             const auto & offsets = array_column.getOffsets();
             const auto & tuple_column_ptr = array_column.getDataPtr();
-            const auto & tuple_column = typeid_cast<const ColumnTuple &>(tuple_column_ptr);
+            const auto & tuple_column = typeid_cast<const ColumnTuple &>(*tuple_column_ptr);
 
             size_t start_offset = (index == 0) ? 0 : offsets[index - 1];
             size_t end_offset = offsets[index];
@@ -647,7 +651,7 @@ py::object convertFieldToPython(
                 return py::none();
             }
 
-            const auto & variant_type = typeid_cast<const DataTypeVariant &>(actual_type);
+            const auto & variant_type = typeid_cast<const DataTypeVariant &>(*actual_type);
             const auto & variants = variant_type.getVariants();
             const auto & variant_data_type = variants[discriminator];
 
diff --git a/programs/local/LocalChdb.cpp b/programs/local/LocalChdb.cpp
index e1513d3ec4d..c2a2db34695 100644
--- a/programs/local/LocalChdb.cpp
+++ b/programs/local/LocalChdb.cpp
@@ -1,13 +1,14 @@
 #include "LocalChdb.h"
 #include "chdb-internal.h"
-#include "ChunkCollectorOutputFormat.h"
 #include "PandasDataFrameBuilder.h"
+#include "ChunkCollectorOutputFormat.h"
 #include "PythonImporter.h"
 #include "PythonTableCache.h"
 #include "StoragePython.h"
 
 #include <pybind11/detail/non_limited_api.h>
 #include <pybind11/pybind11.h>
+#include <Poco/String.h>
 #include <Common/logger_useful.h>
 #if USE_JEMALLOC
 #    include <Common/memory.h>
@@ -86,19 +87,14 @@ py::object query(
 {
     auto * result = queryToBuffer(queryStr, output_format, path, udfPath);
 
-    if (output_format == "dataframe")
+    if (Poco::toLower(output_format) == "dataframe")
     {
         chdb_destroy_query_result(result);
 
-        auto * builder = DB::getGlobalDataFrameBuilder();
-        if (builder && builder->hasData())
-        {
-            return builder->getDataFrame();
-        }
-        else
-        {
-            throw std::runtime_error("DataFrame not available - query may have failed");
-        }
+        auto & builder = CHDB::getGlobalDataFrameBuilder();
+        auto ret = builder.getDataFrame();
+        CHDB::resetGlobalDataFrameBuilder();
+        return ret;
     }
 
     // Default behavior - return query_result
@@ -291,22 +287,17 @@ py::object connection_wrapper::query(const std::string & query_str, const std::s
     {
         std::string msg_copy(error_msg);
         chdb_destroy_query_result(result);
+        CHDB::resetGlobalDataFrameBuilder();
         throw std::runtime_error(msg_copy);
     }
 
-    if (format == "dataframe")
+    if (Poco::toLower(format) == "dataframe")
     {
         chdb_destroy_query_result(result);
-
-        auto * builder = DB::getGlobalDataFrameBuilder();
-        if (builder && builder->hasData())
-        {
-            return builder->getDataFrame();
-        }
-        else
-        {
-            throw std::runtime_error("DataFrame not available - query may have failed");
-        }
+        auto & builder = CHDB::getGlobalDataFrameBuilder();
+        auto ret = builder.getDataFrame();
+        CHDB::resetGlobalDataFrameBuilder();
+        return ret;
     }
 
     if (chdb_result_length(result))
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 32dac7fa278..886fe374be0 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -670,7 +670,7 @@ try
         auto & storage_factory = StorageFactory::instance();
 #if USE_PYTHON
         registerStoragePython(storage_factory);
-        registerDataFrameOutputFormat();
+        CHDB::registerDataFrameOutputFormat();
 #else
         registerStorageArrowStream(storage_factory);
 #endif
diff --git a/programs/local/NumpyArray.h b/programs/local/NumpyArray.h
index bea70774732..ca2af0ae6bd 100644
--- a/programs/local/NumpyArray.h
+++ b/programs/local/NumpyArray.h
@@ -3,7 +3,6 @@
 #include "PybindWrapper.h"
 
 #include <Columns/IColumn_fwd.h>
-#include <Processors/Formats/IRowOutputFormat.h>
 #include <Core/Field.h>
 #include <DataTypes/IDataType.h>
 #include <base/types.h>
diff --git a/programs/local/NumpyNestedTypes.cpp b/programs/local/NumpyNestedTypes.cpp
index 3459c65c903..60c0c8cfc88 100644
--- a/programs/local/NumpyNestedTypes.cpp
+++ b/programs/local/NumpyNestedTypes.cpp
@@ -20,18 +20,22 @@
 #include <Processors/Formats/Impl/CHColumnToArrowColumn.h>
 #include <pybind11/pybind11.h>
 
-namespace CHDB
+namespace DB
 {
 
-using namespace DB;
-namespace py = pybind11;
-
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
-    extern const int NOT_IMPLEMENTED;
+extern const int LOGICAL_ERROR;
+extern const int NOT_IMPLEMENTED;
+}
+
 }
 
+namespace CHDB
+{
+
+using namespace DB;
+
 template <typename ColumnType>
 struct ColumnTraits;
 
diff --git a/programs/local/NumpyType.h b/programs/local/NumpyType.h
index 787bfcd857a..da8ccd5eafe 100644
--- a/programs/local/NumpyType.h
+++ b/programs/local/NumpyType.h
@@ -53,6 +53,6 @@ std::shared_ptr<DB::IDataType> NumpyToDataType(const NumpyType & col_type);
 
 String DataTypeToNumpyTypeStr(const std::shared_ptr<const DB::IDataType> & data_type);
 
-py::object ConvertNumpyDtype(py::handle & numpy_array);
+py::object ConvertNumpyDtype(const py::handle & numpy_array);
 
 } // namespace CHDB
diff --git a/programs/local/ObjectToPython.cpp b/programs/local/ObjectToPython.cpp
index 88b51f79a3e..4ea107b9ca6 100644
--- a/programs/local/ObjectToPython.cpp
+++ b/programs/local/ObjectToPython.cpp
@@ -8,10 +8,12 @@
 
 namespace DB
 {
+
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
 }
+
 }
 
 namespace CHDB
@@ -59,7 +61,7 @@ py::object convertObjectToPython(
     size_t shared_data_offset = shared_data_offsets[static_cast<ssize_t>(index) - 1];
     size_t shared_data_end = shared_data_offsets[static_cast<ssize_t>(index)];
 
-    const auto & object_type = typeid_cast<const DataTypeObject &>(type);
+    const auto & object_type = typeid_cast<const DataTypeObject &>(*type);
     const auto & specific_typed_paths = object_type.getTypedPaths();
     const auto & dynamic_data_type = object_type.getDynamicType();
 
@@ -112,28 +114,27 @@ py::object convertObjectToPython(
         }
         else
         {
-            py::dict * current_dict = &result;
+            py::dict current_dict = result;
 
             for (size_t i = 0; i < path_elements.size() - 1; ++i)
             {
                 String key(path_elements.elements[i]);
 
-                if (current_dict->contains(key.c_str()))
+                if (current_dict.contains(key.c_str()))
                 {
                     py::object nested = (*current_dict)[key.c_str()];
-                    current_dict = &nested.cast<py::dict &>();
+                    current_dict = nested.cast<py::dict>();
                 }
                 else
                 {
                     py::dict new_dict;
-                    (*current_dict)[key.c_str()] = new_dict;
-                    current_dict = &new_dict;
+                    current_dict[key.c_str()] = new_dict;
+                    current_dict = new_dict;
                 }
             }
 
-            chassert(current_dict);
             String final_key(path_elements.elements[path_elements.size() - 1]);
-            (*current_dict)[final_key.c_str()] = value;
+            current_dict[final_key.c_str()] = value;
         }
     }
 
diff --git a/programs/local/PandasDataFrameBuilder.cpp b/programs/local/PandasDataFrameBuilder.cpp
index 9d81271a563..4992a69e994 100644
--- a/programs/local/PandasDataFrameBuilder.cpp
+++ b/programs/local/PandasDataFrameBuilder.cpp
@@ -17,17 +17,23 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDate.h>
 #include <base/Decimal.h>
-
-using namespace CHDB;
+#include <pybind11/gil.h>
 
 namespace DB
 {
 
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
+extern const int LOGICAL_ERROR;
+}
+
 }
 
+using namespace DB;
+
+namespace CHDB
+{
+
 PandasDataFrameBuilder::PandasDataFrameBuilder(const Block & sample)
 {
     column_names.reserve(sample.columns());
@@ -70,9 +76,9 @@ py::object PandasDataFrameBuilder::genDataFrame(const py::handle & dict)
 		py::handle key = key_value[0];
 		py::handle value = key_value[1];
 
-		auto dtype = ConvertNumpyDtype(value);
 		if (py::isinstance(value, import_cache.numpy.ma.masked_array()))
         {
+		    auto dtype = ConvertNumpyDtype(value);
 			auto series = pandas.attr("Series")(value.attr("data"), py::arg("dtype") = dtype);
 			series.attr("__setitem__")(value.attr("mask"), import_cache.pandas.NA());
 			dict.attr("__setitem__")(key, series);
@@ -118,6 +124,9 @@ void PandasDataFrameBuilder::finalize()
         return;
 
     columns_data.reserve(column_types.size());
+
+    py::gil_scoped_acquire acquire;
+
     for (const auto & type : column_types)
     {
         columns_data.emplace_back(type);
@@ -134,10 +143,19 @@ void PandasDataFrameBuilder::finalize()
         const auto & columns = chunk.getColumns();
         for (size_t col_idx = 0; col_idx < columns.size(); ++col_idx)
         {
-            columns_data[col_idx].append(columns[col_idx]);
+            auto column = columns[col_idx];
+
+            if (column->lowCardinality())
+            {
+                column = column->convertToFullColumnIfLowCardinality();
+            }
+
+            columns_data[col_idx].append(column);
         }
     }
 
+    chunks.clear();
+
     /// Create pandas DataFrame
     py::dict res;
 	for (size_t col_idx = 0; col_idx < column_names.size(); ++col_idx) {
@@ -150,4 +168,13 @@ void PandasDataFrameBuilder::finalize()
     is_finalized = true;
 }
 
+py::object PandasDataFrameBuilder::getDataFrame()
+{
+    chassert(is_finalized);
+
+    py::gil_scoped_acquire acquire;
+
+    columns_data.clear();
+    return std::move(final_dataframe);
+}
 }
diff --git a/programs/local/PandasDataFrameBuilder.h b/programs/local/PandasDataFrameBuilder.h
index 2f45b08e866..4c6d395e0a5 100644
--- a/programs/local/PandasDataFrameBuilder.h
+++ b/programs/local/PandasDataFrameBuilder.h
@@ -9,7 +9,7 @@
 #include <Common/logger_useful.h>
 #include <unordered_map>
 
-namespace DB
+namespace CHDB
 {
 
 /// Builder class to convert ClickHouse Chunks to Pandas DataFrame
@@ -17,28 +17,30 @@ namespace DB
 class PandasDataFrameBuilder
 {
 public:
-    explicit PandasDataFrameBuilder(const Block & sample);
+    explicit PandasDataFrameBuilder(const DB::Block & sample);
+
+    ~PandasDataFrameBuilder() = default;
 
     /// Add data chunk
-    void addChunk(const Chunk & chunk);
+    void addChunk(const DB::Chunk & chunk);
 
     /// Finalize and build pandas DataFrame from all collected chunks
     void finalize();
 
     /// Get the finalized pandas DataFrame
-    pybind11::object getDataFrame() const { return final_dataframe; }
+    pybind11::object getDataFrame();
 
 private:
     pybind11::object genDataFrame(const pybind11::handle & dict);
     void changeToTZType(pybind11::object & df);
 
     std::vector<String> column_names;
-    std::vector<DataTypePtr> column_types;
+    std::vector<DB::DataTypePtr> column_types;
 
     /// Map column name to timezone string for timezone-aware types
     std::unordered_map<String, String> column_timezones;
 
-    std::vector<Chunk> chunks;
+    std::vector<DB::Chunk> chunks;
     std::vector<CHDB::NumpyArray> columns_data;
 
     size_t total_rows = 0;
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index f767a5c50b9..1c2e8bab2e1 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -648,17 +648,17 @@ try
     if (!output_format)
     {
 #if USE_PYTHON
-        if (default_output_format == "dataframe")
+        if (Poco::toLower(default_output_format) == "dataframe")
         {
             auto creator = getDataFrameFormatCreator();
             if (creator)
             {
-                output_format = creator(block);
+                output_format = creator(std::make_shared<const Block>(block));
                 return;
             }
             else
             {
-                throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DataFrame output format creator not set");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "DataFrame output format creator not set");
             }
         }
 #endif
diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h
index 7a52e50ed40..86fdb78d798 100644
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@@ -96,7 +96,7 @@ struct StreamingQueryContext
 
 #if USE_PYTHON
 /// Function pointer type for creating custom output formats (e.g. DataFrame)
-using CustomOutputFormatCreator = std::function<std::shared_ptr<IOutputFormat>(const Block &)>;
+using CustomOutputFormatCreator = std::function<std::shared_ptr<IOutputFormat>(SharedHeader)>;
 #endif
 
 /**
diff --git a/tests/test_complex_pyobj.py b/tests/test_complex_pyobj.py
index 241fffc0105..e393f042e70 100644
--- a/tests/test_complex_pyobj.py
+++ b/tests/test_complex_pyobj.py
@@ -42,14 +42,43 @@ def test_df_with_na(self):
         self.assertEqual(ret.dtypes["E"], "object")
         self.assertEqual(ret.dtypes["F"], "object")
         self.assertEqual(ret.dtypes["G"], "object")
-        self.assertEqual(
-            str(ret),
-            """      A     B      C     D     E       F                 G
-0     1   4.0   True     a  <NA>  [1, 2]  {"a": 1, "b": 2}
-1     2   5.0  False     b  <NA>  [3, 4]  {"c": 3, "d": 4}
-2     3   6.0   True     c  <NA>  [5, 6]  {"e": 5, "f": 6}
-3  <NA>  <NA>   <NA>  <NA>  <NA>    <NA>              <NA>""",
-        )
+        self.assertEqual(ret.shape, (4, 7))
+
+        # Row 0
+        self.assertEqual(ret.iloc[0]["A"], '1')
+        self.assertEqual(ret.iloc[0]["B"], '4.0')
+        self.assertEqual(ret.iloc[0]["C"], 'True')
+        self.assertEqual(ret.iloc[0]["D"], 'a')
+        self.assertEqual(ret.iloc[0]["E"], '<NA>')
+        self.assertEqual(ret.iloc[0]["F"], '[1, 2]')
+        self.assertEqual(ret.iloc[0]["G"], '{"a": 1, "b": 2}')
+
+        # Row 1
+        self.assertEqual(ret.iloc[1]["A"], '2')
+        self.assertEqual(ret.iloc[1]["B"], '5.0')
+        self.assertEqual(ret.iloc[1]["C"], 'False')
+        self.assertEqual(ret.iloc[1]["D"], 'b')
+        self.assertEqual(ret.iloc[1]["E"], '<NA>')
+        self.assertEqual(ret.iloc[1]["F"], '[3, 4]')
+        self.assertEqual(ret.iloc[1]["G"], '{"c": 3, "d": 4}')
+
+        # Row 2
+        self.assertEqual(ret.iloc[2]["A"], '3')
+        self.assertEqual(ret.iloc[2]["B"], '6.0')
+        self.assertEqual(ret.iloc[2]["C"], 'True')
+        self.assertEqual(ret.iloc[2]["D"], 'c')
+        self.assertEqual(ret.iloc[2]["E"], '<NA>')
+        self.assertEqual(ret.iloc[2]["F"], '[5, 6]')
+        self.assertEqual(ret.iloc[2]["G"], '{"e": 5, "f": 6}')
+
+        # Row 3
+        self.assertEqual(ret.iloc[3]["A"], '<NA>')
+        self.assertEqual(ret.iloc[3]["B"], '<NA>')
+        self.assertEqual(ret.iloc[3]["C"], '<NA>')
+        self.assertEqual(ret.iloc[3]["D"], '<NA>')
+        self.assertEqual(ret.iloc[3]["E"], '<NA>')
+        self.assertEqual(ret.iloc[3]["F"], '<NA>')
+        self.assertEqual(ret.iloc[3]["G"], '<NA>')
 
     def test_df_without_na(self):
         ret = chdb.query(
@@ -65,14 +94,44 @@ def test_df_without_na(self):
         self.assertEqual(ret.dtypes["E"], "object")
         self.assertEqual(ret.dtypes["F"], "object")
         self.assertEqual(ret.dtypes["G"], "object")
-        self.assertEqual(
-            str(ret),
-            """   A    B  C  D  E       F                 G
-0  1  4.0  1  a  a  [1, 2]  {"a": 1, "b": 2}
-1  2  5.0  0  b  b  [3, 4]  {"c": 3, "d": 4}
-2  3  6.0  1  c  c  [5, 6]  {"e": 5, "f": 6}
-3  4  7.0  0  d  d  [7, 8]  {"g": 7, "h": 8}""",
-        )
+
+        self.assertEqual(ret.shape, (4, 7))
+
+        # Row 0
+        self.assertEqual(ret.iloc[0]["A"], 1)
+        self.assertEqual(ret.iloc[0]["B"], 4.0)
+        self.assertEqual(ret.iloc[0]["C"], 1)
+        self.assertEqual(ret.iloc[0]["D"], "a")
+        self.assertEqual(ret.iloc[0]["E"], "a")
+        self.assertEqual(ret.iloc[0]["F"], '[1, 2]')
+        self.assertEqual(ret.iloc[0]["G"], {"a": 1, "b": 2})
+
+        # Row 1
+        self.assertEqual(ret.iloc[1]["A"], 2)
+        self.assertEqual(ret.iloc[1]["B"], 5.0)
+        self.assertEqual(ret.iloc[1]["C"], 0)
+        self.assertEqual(ret.iloc[1]["D"], "b")
+        self.assertEqual(ret.iloc[1]["E"], "b")
+        self.assertEqual(ret.iloc[1]["F"], '[3, 4]')
+        self.assertEqual(ret.iloc[1]["G"], {"c": 3, "d": 4})
+
+        # Row 2
+        self.assertEqual(ret.iloc[2]["A"], 3)
+        self.assertEqual(ret.iloc[2]["B"], 6.0)
+        self.assertEqual(ret.iloc[2]["C"], 1)
+        self.assertEqual(ret.iloc[2]["D"], "c")
+        self.assertEqual(ret.iloc[2]["E"], "c")
+        self.assertEqual(ret.iloc[2]["F"], '[5, 6]')
+        self.assertEqual(ret.iloc[2]["G"], {"e": 5, "f": 6})
+
+        # Row 3
+        self.assertEqual(ret.iloc[3]["A"], 4)
+        self.assertEqual(ret.iloc[3]["B"], 7.0)
+        self.assertEqual(ret.iloc[3]["C"], 0)
+        self.assertEqual(ret.iloc[3]["D"], "d")
+        self.assertEqual(ret.iloc[3]["E"], "d")
+        self.assertEqual(ret.iloc[3]["F"], '[7, 8]')
+        self.assertEqual(ret.iloc[3]["G"], {"g": 7, "h": 8})
 
 
 if __name__ == "__main__":
diff --git a/tests/test_dataframe_column_types.py b/tests/test_dataframe_column_types.py
new file mode 100644
index 00000000000..62bb11fc8c4
--- /dev/null
+++ b/tests/test_dataframe_column_types.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+
+import unittest
+import pandas as pd
+import chdb
+from datetime import datetime, date
+import numpy as np
+
+
+class TestDataFrameColumnTypes(unittest.TestCase):
+
+    def setUp(self):
+        self.session = chdb.session.Session()
+
+    def tearDown(self):
+        self.session.close()
+
+    def test_integer_types(self):
+        ret = self.session.query("""
+            SELECT
+                toInt8(-128) as int8_val,
+                toInt16(-32768) as int16_val,
+                toInt32(-2147483648) as int32_val,
+                toInt64(-9223372036854775808) as int64_val,
+                toUInt8(255) as uint8_val,
+                toUInt16(65535) as uint16_val,
+                toUInt32(4294967295) as uint32_val,
+                toUInt64(18446744073709551615) as uint64_val
+        """, "DataFrame")
+
+        self.assertEqual(ret.iloc[0]["int16_val"], -32768)
+        self.assertEqual(ret.iloc[0]["int32_val"], -2147483648)
+        self.assertEqual(ret.iloc[0]["int64_val"], -9223372036854775808)
+        self.assertEqual(ret.iloc[0]["uint8_val"], 255)
+        self.assertEqual(ret.iloc[0]["uint16_val"], 65535)
+        self.assertEqual(ret.iloc[0]["uint32_val"], 4294967295)
+        self.assertEqual(ret.iloc[0]["uint64_val"], 18446744073709551615)
+
+        for col in ret.columns:
+            print(f"{col}: {ret.dtypes[col]} (actual value: {ret.iloc[0][col]}, Python type: {type(ret.iloc[0][col])})")
+
+        # Precise data type validation
+        expected_types = {
+            "int8_val": "int8",
+            "int16_val": "int16",
+            "int32_val": "int32",
+            "int64_val": "int64",
+            "uint8_val": "uint8",
+            "uint16_val": "uint16",
+            "uint32_val": "uint32",
+            "uint64_val": "uint64"
+        }
+
+        for col, expected_type in expected_types.items():
+            actual_type = str(ret.dtypes[col])
+            self.assertEqual(actual_type, expected_type)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 7ab8fcbb32a194263b23fca4bd985c26a8857a13 Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Wed, 5 Nov 2025 12:18:19 +0800
Subject: [PATCH 20/22] test: update workflow

---
 .../workflows/build_linux_arm64_wheels-gh.yml | 22 +++++++++----------
 .../workflows/build_macos_arm64_wheels.yml    | 20 ++++++++---------
 .github/workflows/build_macos_x86_wheels.yml  | 20 ++++++++---------
 .../build_musllinux_arm64_wheels.yml          | 20 ++++++++---------
 .../workflows/build_musllinux_x86_wheels.yml  | 20 ++++++++---------
 src/Client/ClientBase.cpp                     |  2 ++
 6 files changed, 53 insertions(+), 51 deletions(-)

diff --git a/.github/workflows/build_linux_arm64_wheels-gh.yml b/.github/workflows/build_linux_arm64_wheels-gh.yml
index 5fd5c76fd83..952ef01a151 100644
--- a/.github/workflows/build_linux_arm64_wheels-gh.yml
+++ b/.github/workflows/build_linux_arm64_wheels-gh.yml
@@ -8,16 +8,16 @@ on:
         required: true
   release:
     types: [created]
-  push:
-    branches:
-      - main
-    paths-ignore:
-      - '**/*.md'
-  pull_request:
-    branches:
-      - main
-    paths-ignore:
-      - '**/*.md'
+  # push:
+  #   branches:
+  #     - main
+  #   paths-ignore:
+  #     - '**/*.md'
+  # pull_request:
+  #   branches:
+  #     - main
+  #   paths-ignore:
+  #     - '**/*.md'
 
 
 jobs:
@@ -137,7 +137,7 @@ jobs:
           which clang++-19
           clang++-19 --version
           sudo apt-get install -y make cmake ccache ninja-build yasm gawk wget
-          # Install WebAssembly linker (wasm-ld) 
+          # Install WebAssembly linker (wasm-ld)
           sudo apt-get install -y lld-19
           # Create symlink for wasm-ld
           if ! command -v wasm-ld &> /dev/null; then
diff --git a/.github/workflows/build_macos_arm64_wheels.yml b/.github/workflows/build_macos_arm64_wheels.yml
index 4c7b24f1ac2..f09c06edfb5 100644
--- a/.github/workflows/build_macos_arm64_wheels.yml
+++ b/.github/workflows/build_macos_arm64_wheels.yml
@@ -8,16 +8,16 @@ on:
         required: true
   release:
     types: [created]
-  push:
-    branches:
-      - main
-    paths-ignore:
-      - '**/*.md'
-  pull_request:
-    branches:
-      - main
-    paths-ignore:
-      - '**/*.md'
+  # push:
+  #   branches:
+  #     - main
+  #   paths-ignore:
+  #     - '**/*.md'
+  # pull_request:
+  #   branches:
+  #     - main
+  #   paths-ignore:
+  #     - '**/*.md'
 
 jobs:
   build_universal_wheel:
diff --git a/.github/workflows/build_macos_x86_wheels.yml b/.github/workflows/build_macos_x86_wheels.yml
index 22d597a0f6e..47ae8e490c0 100644
--- a/.github/workflows/build_macos_x86_wheels.yml
+++ b/.github/workflows/build_macos_x86_wheels.yml
@@ -8,16 +8,16 @@ on:
         required: true
   release:
     types: [created]
-  push:
-    branches:
-      - main
-    paths-ignore:
-      - '**/*.md'
-  pull_request:
-    branches:
-      - main
-    paths-ignore:
-      - '**/*.md'
+  # push:
+  #   branches:
+  #     - main
+  #   paths-ignore:
+  #     - '**/*.md'
+  # pull_request:
+  #   branches:
+  #     - main
+  #   paths-ignore:
+  #     - '**/*.md'
 
 jobs:
   build_universal_wheel:
diff --git a/.github/workflows/build_musllinux_arm64_wheels.yml b/.github/workflows/build_musllinux_arm64_wheels.yml
index d7c2819ac0a..0cfd5d2a3a1 100644
--- a/.github/workflows/build_musllinux_arm64_wheels.yml
+++ b/.github/workflows/build_musllinux_arm64_wheels.yml
@@ -8,16 +8,16 @@ on:
         required: true
   release:
     types: [created]
-  push:
-    branches:
-      - main
-    paths-ignore:
-      - '**/*.md'
-  pull_request:
-    branches:
-      - main
-    paths-ignore:
-      - '**/*.md'
+  # push:
+  #   branches:
+  #     - main
+  #   paths-ignore:
+  #     - '**/*.md'
+  # pull_request:
+  #   branches:
+  #     - main
+  #   paths-ignore:
+  #     - '**/*.md'
 
 jobs:
   build_musllinux_wheels:
diff --git a/.github/workflows/build_musllinux_x86_wheels.yml b/.github/workflows/build_musllinux_x86_wheels.yml
index 715cc816bf6..bf077181b6b 100644
--- a/.github/workflows/build_musllinux_x86_wheels.yml
+++ b/.github/workflows/build_musllinux_x86_wheels.yml
@@ -8,16 +8,16 @@ on:
         required: true
   release:
     types: [created]
-  push:
-    branches:
-      - main
-    paths-ignore:
-      - '**/*.md'
-  pull_request:
-    branches:
-      - main
-    paths-ignore:
-      - '**/*.md'
+  # push:
+  #   branches:
+  #     - main
+  #   paths-ignore:
+  #     - '**/*.md'
+  # pull_request:
+  #   branches:
+  #     - main
+  #   paths-ignore:
+  #     - '**/*.md'
 
 
 jobs:
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 1c2e8bab2e1..aad4ad78e78 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -151,8 +151,10 @@ namespace ErrorCodes
     extern const int CANNOT_WRITE_TO_FILE;
 }
 
+#if USE_PYTHON
 /// Custom DataFrame format creator function pointer
 static CustomOutputFormatCreator g_dataframe_format_creator = nullptr;
+#endif
 
 }
 

From f0406e6cae67115a0f6d4bf7e26b96505aa670ed Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Wed, 5 Nov 2025 20:17:50 +0800
Subject: [PATCH 21/22] fix: fix test issues

---
 programs/local/ChunkCollectorOutputFormat.cpp |  6 ++-
 programs/local/FieldToPython.cpp              | 13 ++----
 programs/local/LocalChdb.cpp                  | 43 ++++++++++---------
 programs/local/NumpyArray.cpp                 | 23 +++++-----
 programs/local/NumpyType.cpp                  | 15 +++----
 programs/local/ObjectToPython.cpp             | 10 ++++-
 programs/local/PandasDataFrameBuilder.cpp     |  6 +--
 programs/local/PythonImportCache.cpp          |  2 +-
 8 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/programs/local/ChunkCollectorOutputFormat.cpp b/programs/local/ChunkCollectorOutputFormat.cpp
index f215f48b25d..8faa54a7ef4 100644
--- a/programs/local/ChunkCollectorOutputFormat.cpp
+++ b/programs/local/ChunkCollectorOutputFormat.cpp
@@ -64,7 +64,11 @@ void setGlobalDataFrameBuilder(std::shared_ptr<PandasDataFrameBuilder> builder)
 
 void resetGlobalDataFrameBuilder()
 {
-    g_dataframe_builder.reset();
+    if (g_dataframe_builder)
+    {
+        py::gil_scoped_acquire acquire;
+        g_dataframe_builder.reset();
+    }
 }
 
 /// create ChunkCollectorOutputFormat for use with function pointer
diff --git a/programs/local/FieldToPython.cpp b/programs/local/FieldToPython.cpp
index ca76032aaf6..8863f3e79ff 100644
--- a/programs/local/FieldToPython.cpp
+++ b/programs/local/FieldToPython.cpp
@@ -8,6 +8,7 @@
 #include <Columns/ColumnMap.h>
 #include <Columns/ColumnDynamic.h>
 #include <Columns/ColumnObject.h>
+#include <DataTypes/DataTypeLowCardinality.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <DataTypes/Serializations/SerializationInfo.h>
 #include <DataTypes/DataTypesBinaryEncoding.h>
@@ -121,11 +122,7 @@ py::object convertTime64FieldToPython(const Field & field)
 
 static bool canTypeBeUsedAsDictKey(const DataTypePtr & type)
 {
-    DataTypePtr actual_type = type;
-    if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(type.get()))
-    {
-        actual_type = nullable_type->getNestedType();
-    }
+    DataTypePtr actual_type = removeLowCardinalityAndNullable(type);
 
     switch (actual_type->getTypeId())
 	{
@@ -224,11 +221,7 @@ py::object convertFieldToPython(
         return py::none();
     }
 
-    DataTypePtr actual_type = type;
-    if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(type.get()))
-    {
-        actual_type = nullable_type->getNestedType();
-    }
+    DataTypePtr actual_type = removeLowCardinalityAndNullable(type);
 
     auto & import_cache = PythonImporter::ImportCache();
 
diff --git a/programs/local/LocalChdb.cpp b/programs/local/LocalChdb.cpp
index c2a2db34695..61c931270e3 100644
--- a/programs/local/LocalChdb.cpp
+++ b/programs/local/LocalChdb.cpp
@@ -280,29 +280,32 @@ py::object connection_wrapper::query(const std::string & query_str, const std::s
 {
     CHDB::PythonTableCache::findQueryableObjFromQuery(query_str);
 
-    py::gil_scoped_release release;
-    auto * result = chdb_query_n(*conn, query_str.data(), query_str.size(), format.data(), format.size());
-    auto error_msg = CHDB::chdb_result_error_string(result);
-    if (!error_msg.empty())
+    chdb_result * result = nullptr;
     {
-        std::string msg_copy(error_msg);
-        chdb_destroy_query_result(result);
-        CHDB::resetGlobalDataFrameBuilder();
-        throw std::runtime_error(msg_copy);
-    }
+        py::gil_scoped_release release;
+        result = chdb_query_n(*conn, query_str.data(), query_str.size(), format.data(), format.size());
+        auto error_msg = CHDB::chdb_result_error_string(result);
+        if (!error_msg.empty())
+        {
+            std::string msg_copy(error_msg);
+            chdb_destroy_query_result(result);
+            CHDB::resetGlobalDataFrameBuilder();
+            throw std::runtime_error(msg_copy);
+        }
 
-    if (Poco::toLower(format) == "dataframe")
-    {
-        chdb_destroy_query_result(result);
-        auto & builder = CHDB::getGlobalDataFrameBuilder();
-        auto ret = builder.getDataFrame();
-        CHDB::resetGlobalDataFrameBuilder();
-        return ret;
-    }
+        if (Poco::toLower(format) == "dataframe")
+        {
+            chdb_destroy_query_result(result);
+            auto & builder = CHDB::getGlobalDataFrameBuilder();
+            auto ret = builder.getDataFrame();
+            CHDB::resetGlobalDataFrameBuilder();
+            return ret;
+        }
 
-    if (chdb_result_length(result))
-    {
-        LOG_DEBUG(getLogger("CHDB"), "Empty result returned for query: {}", query_str);
+        if (chdb_result_length(result))
+        {
+            LOG_DEBUG(getLogger("CHDB"), "Empty result returned for query: {}", query_str);
+        }
     }
 
     return py::cast(new query_result(result, false));
diff --git a/programs/local/NumpyArray.cpp b/programs/local/NumpyArray.cpp
index 89bbc334343..1b8a03568bf 100644
--- a/programs/local/NumpyArray.cpp
+++ b/programs/local/NumpyArray.cpp
@@ -4,6 +4,7 @@
 #include "PythonImporter.h"
 #include "FieldToPython.h"
 
+#include <DataTypes/DataTypeLowCardinality.h>
 #include <Processors/Chunk.h>
 #include <base/defines.h>
 #include <Columns/ColumnFixedSizeHelper.h>
@@ -86,7 +87,11 @@ struct Time64Convert
 	{
 		chassert(append_data.type);
 
-		Field field(val);
+		const auto & time64_type = typeid_cast<const DataTypeTime64 &>(*append_data.type);
+		UInt32 scale = time64_type.getScale();
+		DecimalField<Decimal64> decimal_field(static_cast<Decimal64::NativeType>(val), scale);
+		Field field(decimal_field);
+
 		auto time64_object = convertTime64FieldToPython(field);
 		return time64_object.release().ptr();
 	}
@@ -564,6 +569,9 @@ void NumpyArray::append(
 	size_t offset,
 	size_t count)
 {
+	auto actual_column = column->convertToFullColumnIfLowCardinality();
+	DataTypePtr actual_type = removeLowCardinalityAndNullable(data_array->type);
+
 	chassert(data_array);
 	chassert(mask_array);
 
@@ -571,21 +579,14 @@ void NumpyArray::append(
 	auto * mask_ptr = reinterpret_cast<bool *>(mask_array->data);
 	chassert(data_ptr);
 	chassert(mask_ptr);
-	chassert(column->getDataType() == data_array->type->getColumnType());
+	chassert(actual_column->getDataType() == actual_type->getColumnType());
 
-	size_t size = column->size();
+	size_t size = actual_column->size();
 	data_array->count += size;
 	mask_array->count += size;
 	bool may_have_null = false;
 
-	/// For nullable types, we need to get the nested type
-	DataTypePtr actual_type = data_array->type;
-	if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(data_array->type.get()))
-	{
-		actual_type = nullable_type->getNestedType();
-	}
-
-	NumpyAppendData append_data(*column, actual_type);
+	NumpyAppendData append_data(*actual_column, actual_type);
 	append_data.src_offset = offset;
 	append_data.src_count = count;
 	append_data.target_data = data_ptr;
diff --git a/programs/local/NumpyType.cpp b/programs/local/NumpyType.cpp
index 83408d13278..682332397be 100644
--- a/programs/local/NumpyType.cpp
+++ b/programs/local/NumpyType.cpp
@@ -3,6 +3,7 @@
 
 #include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeInterval.h>
+#include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeObject.h>
 #include <DataTypes/DataTypesNumber.h>
@@ -238,7 +239,9 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
     if (!data_type)
         return "object";
 
-    TypeIndex type_id = data_type->getTypeId();
+    auto actual_data_type = removeLowCardinalityAndNullable(data_type);
+
+    TypeIndex type_id = actual_data_type->getTypeId();
     switch (type_id)
     {
     case TypeIndex::Nothing:
@@ -305,7 +308,7 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
 
     case TypeIndex::Date:
     case TypeIndex::Date32:
-        return "datetime64[D]";
+        return "datetime64[D]";  // pandas converts datetime64[D] to datetime64[s] internally
 
     case TypeIndex::Time:
     case TypeIndex::Time64:
@@ -375,14 +378,6 @@ String DataTypeToNumpyTypeStr(const std::shared_ptr<const IDataType> & data_type
         return "int16";
 
     case TypeIndex::Nullable:
-        {
-            if (const auto * nullable = typeid_cast<const DataTypeNullable *>(data_type.get()))
-            {
-                return DataTypeToNumpyTypeStr(nullable->getNestedType());
-            }
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected nullable type {}", data_type->getName());
-        }
-
     default:
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type {}", data_type->getName());
     }
diff --git a/programs/local/ObjectToPython.cpp b/programs/local/ObjectToPython.cpp
index 4ea107b9ca6..c3caf91ef58 100644
--- a/programs/local/ObjectToPython.cpp
+++ b/programs/local/ObjectToPython.cpp
@@ -1,8 +1,10 @@
 #include "ObjectToPython.h"
 #include "FieldToPython.h"
 
+#include <Columns/ColumnNullable.h>
 #include <Columns/ColumnObject.h>
 #include <Columns/ColumnDynamic.h>
+#include <Columns/IColumn.h>
 #include <DataTypes/DataTypeObject.h>
 #include <base/defines.h>
 
@@ -52,7 +54,13 @@ py::object convertObjectToPython(
     const DataTypePtr & type,
     size_t index)
 {
-    const auto & column_object = typeid_cast<const ColumnObject &>(column);
+    const IColumn * data_column = &column;
+	if (const auto * nullable = typeid_cast<const ColumnNullable *>(&column))
+	{
+		data_column = &nullable->getNestedColumn();
+	}
+
+    const auto & column_object = typeid_cast<const ColumnObject &>(*data_column);
     const auto & typed_paths = column_object.getTypedPaths();
     const auto & dynamic_paths = column_object.getDynamicPaths();
     const auto & shared_data_offsets = column_object.getSharedDataOffsets();
diff --git a/programs/local/PandasDataFrameBuilder.cpp b/programs/local/PandasDataFrameBuilder.cpp
index 4992a69e994..7b570cebfcb 100644
--- a/programs/local/PandasDataFrameBuilder.cpp
+++ b/programs/local/PandasDataFrameBuilder.cpp
@@ -7,6 +7,7 @@
 #include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeTime.h>
 #include <DataTypes/DataTypeTime64.h>
+#include <DataTypes/DataTypeLowCardinality.h>
 #include <Common/DateLUTImpl.h>
 #include <Processors/Chunk.h>
 #include <Columns/IColumn.h>
@@ -145,11 +146,6 @@ void PandasDataFrameBuilder::finalize()
         {
             auto column = columns[col_idx];
 
-            if (column->lowCardinality())
-            {
-                column = column->convertToFullColumnIfLowCardinality();
-            }
-
             columns_data[col_idx].append(column);
         }
     }
diff --git a/programs/local/PythonImportCache.cpp b/programs/local/PythonImportCache.cpp
index 6e24b35e934..85f30a9a732 100644
--- a/programs/local/PythonImportCache.cpp
+++ b/programs/local/PythonImportCache.cpp
@@ -53,7 +53,7 @@ py::handle PythonImportCacheItem::AddCache(PythonImportCache & cache, py::object
 void PythonImportCacheItem::LoadModule(PythonImportCache & cache)
 {
 #if USE_JEMALLOC
-	::Memory::MemoryCheckScope memory_check_scope; 
+	::Memory::MemoryCheckScope memory_check_scope;
 #endif
 	try
 	{

From c6d370d39ca18ee186e1420ae4d53b7ff99f644c Mon Sep 17 00:00:00 2001
From: wudidapaopao <xiaozhe.yu@clickhouse.com>
Date: Wed, 5 Nov 2025 20:27:02 +0800
Subject: [PATCH 22/22] test: add more test cases

---
 tests/test_dataframe_column_types.py | 709 ++++++++++++++++++++++++++-
 1 file changed, 696 insertions(+), 13 deletions(-)

diff --git a/tests/test_dataframe_column_types.py b/tests/test_dataframe_column_types.py
index 62bb11fc8c4..fc1d6f6988f 100644
--- a/tests/test_dataframe_column_types.py
+++ b/tests/test_dataframe_column_types.py
@@ -5,39 +5,83 @@
 import chdb
 from datetime import datetime, date
 import numpy as np
+import math
 
 
 class TestDataFrameColumnTypes(unittest.TestCase):
 
     def setUp(self):
-        self.session = chdb.session.Session()
+        self.session = chdb.session.Session("./tmp")
 
     def tearDown(self):
         self.session.close()
 
     def test_integer_types(self):
         ret = self.session.query("""
-            SELECT
-                toInt8(-128) as int8_val,
-                toInt16(-32768) as int16_val,
-                toInt32(-2147483648) as int32_val,
-                toInt64(-9223372036854775808) as int64_val,
-                toUInt8(255) as uint8_val,
-                toUInt16(65535) as uint16_val,
-                toUInt32(4294967295) as uint32_val,
-                toUInt64(18446744073709551615) as uint64_val
+            SELECT * FROM (
+                SELECT
+                    1 as row_id,
+                    toInt8(-128) as int8_val,
+                    toInt16(-32768) as int16_val,
+                    toInt32(-2147483648) as int32_val,
+                    toInt64(-9223372036854775808) as int64_val,
+                    toInt128('-170141183460469231731687303715884105728') as int128_val,
+                    toInt256('-57896044618658097711785492504343953926634992332820282019728792003956564819968') as int256_val,
+                    toUInt8(255) as uint8_val,
+                    toUInt16(65535) as uint16_val,
+                    toUInt32(4294967295) as uint32_val,
+                    toUInt64(18446744073709551615) as uint64_val,
+                    toUInt128('340282366920938463463374607431768211455') as uint128_val,
+                    toUInt256('115792089237316195423570985008687907853269984665640564039457584007913129639935') as uint256_val
+                UNION ALL
+                SELECT
+                    2 as row_id,
+                    toInt8(127) as int8_val,
+                    toInt16(32767) as int16_val,
+                    toInt32(2147483647) as int32_val,
+                    toInt64(9223372036854775807) as int64_val,
+                    toInt128('170141183460469231731687303715884105727') as int128_val,
+                    toInt256('57896044618658097711785492504343953926634992332820282019728792003956564819967') as int256_val,
+                    toUInt8(254) as uint8_val,
+                    toUInt16(65534) as uint16_val,
+                    toUInt32(4294967294) as uint32_val,
+                    toUInt64(18446744073709551614) as uint64_val,
+                    toUInt128('340282366920938463463374607431768211454') as uint128_val,
+                    toUInt256('115792089237316195423570985008687907853269984665640564039457584007913129639934') as uint256_val
+            )
+            ORDER BY row_id
         """, "DataFrame")
 
+        for col in ret.columns:
+            print(f"{col}: {ret.dtypes[col]} (actual value: {ret.iloc[0][col]}, Python type: {type(ret.iloc[0][col])})")
+
+        # Test first row (minimum/maximum values)
+        self.assertEqual(ret.iloc[0]["int8_val"], -128)
         self.assertEqual(ret.iloc[0]["int16_val"], -32768)
         self.assertEqual(ret.iloc[0]["int32_val"], -2147483648)
         self.assertEqual(ret.iloc[0]["int64_val"], -9223372036854775808)
+        self.assertEqual(ret.iloc[0]["int128_val"], float(-170141183460469231731687303715884105728))
+        self.assertEqual(ret.iloc[0]["int256_val"], float(-57896044618658097711785492504343953926634992332820282019728792003956564819968))
         self.assertEqual(ret.iloc[0]["uint8_val"], 255)
         self.assertEqual(ret.iloc[0]["uint16_val"], 65535)
         self.assertEqual(ret.iloc[0]["uint32_val"], 4294967295)
         self.assertEqual(ret.iloc[0]["uint64_val"], 18446744073709551615)
+        self.assertEqual(ret.iloc[0]["uint128_val"], float(340282366920938463463374607431768211455))
+        self.assertEqual(ret.iloc[0]["uint256_val"], float(115792089237316195423570985008687907853269984665640564039457584007913129639935))
 
-        for col in ret.columns:
-            print(f"{col}: {ret.dtypes[col]} (actual value: {ret.iloc[0][col]}, Python type: {type(ret.iloc[0][col])})")
+        # Test second row (maximum/near-maximum values)
+        self.assertEqual(ret.iloc[1]["int8_val"], 127)
+        self.assertEqual(ret.iloc[1]["int16_val"], 32767)
+        self.assertEqual(ret.iloc[1]["int32_val"], 2147483647)
+        self.assertEqual(ret.iloc[1]["int64_val"], 9223372036854775807)
+        self.assertEqual(ret.iloc[1]["int128_val"], float(170141183460469231731687303715884105727))
+        self.assertEqual(ret.iloc[1]["int256_val"], float(57896044618658097711785492504343953926634992332820282019728792003956564819967))
+        self.assertEqual(ret.iloc[1]["uint8_val"], 254)
+        self.assertEqual(ret.iloc[1]["uint16_val"], 65534)
+        self.assertEqual(ret.iloc[1]["uint32_val"], 4294967294)
+        self.assertEqual(ret.iloc[1]["uint64_val"], 18446744073709551614)
+        self.assertEqual(ret.iloc[1]["uint128_val"], float(340282366920938463463374607431768211454))
+        self.assertEqual(ret.iloc[1]["uint256_val"], float(115792089237316195423570985008687907853269984665640564039457584007913129639934))
 
         # Precise data type validation
         expected_types = {
@@ -45,10 +89,649 @@ def test_integer_types(self):
             "int16_val": "int16",
             "int32_val": "int32",
             "int64_val": "int64",
+            "int128_val": "float64",  # Int128 mapped to float64 in ClickHouse->pandas conversion
+            "int256_val": "float64",  # Int256 mapped to float64 in ClickHouse->pandas conversion
             "uint8_val": "uint8",
             "uint16_val": "uint16",
             "uint32_val": "uint32",
-            "uint64_val": "uint64"
+            "uint64_val": "uint64",
+            "uint128_val": "float64",  # UInt128 mapped to float64 in ClickHouse->pandas conversion
+            "uint256_val": "float64"   # UInt256 mapped to float64 in ClickHouse->pandas conversion
+        }
+
+        for col, expected_type in expected_types.items():
+            actual_type = str(ret.dtypes[col])
+            self.assertEqual(actual_type, expected_type)
+
+    def test_float_types(self):
+        ret = self.session.query("""
+            SELECT * FROM (
+                SELECT
+                    1 as row_id,
+                    toFloat32(3.14159265) as float32_val,
+                    toFloat32(-3.40282347e+38) as float32_min,
+                    toFloat32(3.40282347e+38) as float32_max,
+                    toFloat64(2.718281828459045) as float64_val,
+                    toFloat64(-1.7976931348623157e+308) as float64_min,
+                    toFloat64(1.7976931348623157e+308) as float64_max,
+                    toBFloat16(1.5) as bfloat16_val,
+                    toBFloat16(-3.389531389e+38) as bfloat16_min,
+                    toBFloat16(3.389531389e+38) as bfloat16_max
+                UNION ALL
+                SELECT
+                    2 as row_id,
+                    toFloat32(0.0) as float32_val,
+                    toFloat32(1.175494351e-38) as float32_min,
+                    toFloat32(-1.175494351e-38) as float32_max,
+                    toFloat64(0.0) as float64_val,
+                    toFloat64(2.2250738585072014e-308) as float64_min,
+                    toFloat64(-2.2250738585072014e-308) as float64_max,
+                    toBFloat16(0.0) as bfloat16_val,
+                    toBFloat16(1.175494351e-38) as bfloat16_min,
+                    toBFloat16(-1.175494351e-38) as bfloat16_max
+            )
+            ORDER BY row_id
+        """, "DataFrame")
+
+        for col in ret.columns:
+            print(f"{col}: {ret.dtypes[col]} (actual value: {ret.iloc[1][col]}, Python type: {type(ret.iloc[0][col])})")
+
+        # Test first row - regular and extreme values
+        self.assertAlmostEqual(ret.iloc[0]["float32_val"], 3.14159265, places=6)
+        self.assertAlmostEqual(ret.iloc[0]["float32_min"], -3.40282347e+38, delta=1e30)
+        self.assertAlmostEqual(ret.iloc[0]["float32_max"], 3.40282347e+38, delta=1e30)
+        self.assertAlmostEqual(ret.iloc[0]["float64_val"], 2.718281828459045, places=15)
+        self.assertAlmostEqual(ret.iloc[0]["float64_min"], -1.7976931348623157e+308, delta=1e300)
+        self.assertAlmostEqual(ret.iloc[0]["float64_max"], 1.7976931348623157e+308, delta=1e300)
+        self.assertAlmostEqual(ret.iloc[0]["bfloat16_val"], 1.5, places=2)
+        self.assertAlmostEqual(ret.iloc[0]["bfloat16_min"], -3.389531389e+38, delta=1e30)
+        self.assertAlmostEqual(ret.iloc[0]["bfloat16_max"], 3.389531389e+38, delta=1e30)
+
+        # Test second row - zero and small values
+        self.assertEqual(ret.iloc[1]["float32_val"], 0.0)
+        self.assertAlmostEqual(ret.iloc[1]["float32_min"], 1.175494351e-38, delta=1e-40)
+        self.assertAlmostEqual(ret.iloc[1]["float32_max"], -1.175494351e-38, delta=1e-40)
+        self.assertEqual(ret.iloc[1]["float64_val"], 0.0)
+        self.assertAlmostEqual(ret.iloc[1]["float64_min"], 2.2250738585072014e-308, delta=1e-310)
+        self.assertAlmostEqual(ret.iloc[1]["float64_max"], -2.2250738585072014e-308, delta=1e-310)
+        self.assertEqual(ret.iloc[1]["bfloat16_val"], 0.0)
+        self.assertAlmostEqual(ret.iloc[1]["bfloat16_min"], 1.175494351e-38, delta=1e-40)
+        self.assertAlmostEqual(ret.iloc[1]["bfloat16_max"], -1.175494351e-38, delta=1e-40)
+
+        # Precise data type validation
+        expected_types = {
+            "float32_val": "float32",
+            "float32_min": "float32",
+            "float32_max": "float32",
+            "float64_val": "float64",
+            "float64_min": "float64",
+            "float64_max": "float64",
+            "bfloat16_val": "float32",  # BFloat16 typically mapped to float32 in pandas
+            "bfloat16_min": "float32",
+            "bfloat16_max": "float32"
+        }
+
+        for col, expected_type in expected_types.items():
+            actual_type = str(ret.dtypes[col])
+            self.assertEqual(actual_type, expected_type)
+
+    def test_float_special_values(self):
+        """Test Infinity and NaN values for all float types"""
+        ret = self.session.query("""
+            SELECT * FROM (
+                SELECT
+                    1 as row_id,
+                    toFloat32(1.0/0.0) as float32_pos_inf,
+                    toFloat32(-1.0/0.0) as float32_neg_inf,
+                    toFloat32(0.0/0.0) as float32_nan,
+                    toFloat64(1.0/0.0) as float64_pos_inf,
+                    toFloat64(-1.0/0.0) as float64_neg_inf,
+                    toFloat64(0.0/0.0) as float64_nan,
+                    toBFloat16(1.0/0.0) as bfloat16_pos_inf,
+                    toBFloat16(-1.0/0.0) as bfloat16_neg_inf,
+                    toBFloat16(0.0/0.0) as bfloat16_nan
+                UNION ALL
+                SELECT
+                    2 as row_id,
+                    toFloat32(1.0/0.0) as float32_pos_inf,
+                    toFloat32(-1.0/0.0) as float32_neg_inf,
+                    toFloat32(0.0/0.0) as float32_nan,
+                    toFloat64(1.0/0.0) as float64_pos_inf,
+                    toFloat64(-1.0/0.0) as float64_neg_inf,
+                    toFloat64(0.0/0.0) as float64_nan,
+                    toBFloat16(1.0/0.0) as bfloat16_pos_inf,
+                    toBFloat16(-1.0/0.0) as bfloat16_neg_inf,
+                    toBFloat16(0.0/0.0) as bfloat16_nan
+            )
+            ORDER BY row_id
+        """, "DataFrame")
+
+        for col in ret.columns:
+            print(f"{col}: {ret.dtypes[col]} (actual value: {ret.iloc[0][col]}, Python type: {type(ret.iloc[0][col])})")
+
+        # Test Float32 special values
+        self.assertTrue(math.isinf(ret.iloc[0]["float32_pos_inf"]))
+        self.assertTrue(ret.iloc[0]["float32_pos_inf"] > 0)  # positive infinity
+        self.assertTrue(math.isinf(ret.iloc[0]["float32_neg_inf"]))
+        self.assertTrue(ret.iloc[0]["float32_neg_inf"] < 0)  # negative infinity
+        self.assertTrue(math.isnan(ret.iloc[0]["float32_nan"]))
+
+        # Test Float64 special values
+        self.assertTrue(math.isinf(ret.iloc[0]["float64_pos_inf"]))
+        self.assertTrue(ret.iloc[0]["float64_pos_inf"] > 0)  # positive infinity
+        self.assertTrue(math.isinf(ret.iloc[0]["float64_neg_inf"]))
+        self.assertTrue(ret.iloc[0]["float64_neg_inf"] < 0)  # negative infinity
+        self.assertTrue(math.isnan(ret.iloc[0]["float64_nan"]))
+
+        # Test BFloat16 special values
+        self.assertTrue(math.isinf(ret.iloc[0]["bfloat16_pos_inf"]))
+        self.assertTrue(ret.iloc[0]["bfloat16_pos_inf"] > 0)  # positive infinity
+        self.assertTrue(math.isinf(ret.iloc[0]["bfloat16_neg_inf"]))
+        self.assertTrue(ret.iloc[0]["bfloat16_neg_inf"] < 0)  # negative infinity
+        self.assertTrue(math.isnan(ret.iloc[0]["bfloat16_nan"]))
+
+        # Test second row (same values, consistency check)
+        self.assertTrue(math.isinf(ret.iloc[1]["float32_pos_inf"]))
+        self.assertTrue(ret.iloc[1]["float32_pos_inf"] > 0)
+        self.assertTrue(math.isinf(ret.iloc[1]["float32_neg_inf"]))
+        self.assertTrue(ret.iloc[1]["float32_neg_inf"] < 0)
+        self.assertTrue(math.isnan(ret.iloc[1]["float32_nan"]))
+
+        self.assertTrue(math.isinf(ret.iloc[1]["float64_pos_inf"]))
+        self.assertTrue(ret.iloc[1]["float64_pos_inf"] > 0)
+        self.assertTrue(math.isinf(ret.iloc[1]["float64_neg_inf"]))
+        self.assertTrue(ret.iloc[1]["float64_neg_inf"] < 0)
+        self.assertTrue(math.isnan(ret.iloc[1]["float64_nan"]))
+
+        self.assertTrue(math.isinf(ret.iloc[1]["bfloat16_pos_inf"]))
+        self.assertTrue(ret.iloc[1]["bfloat16_pos_inf"] > 0)
+        self.assertTrue(math.isinf(ret.iloc[1]["bfloat16_neg_inf"]))
+        self.assertTrue(ret.iloc[1]["bfloat16_neg_inf"] < 0)
+        self.assertTrue(math.isnan(ret.iloc[1]["bfloat16_nan"]))
+
+        # Precise data type validation
+        expected_types = {
+            "float32_pos_inf": "float32",
+            "float32_neg_inf": "float32",
+            "float32_nan": "float32",
+            "float64_pos_inf": "float64",
+            "float64_neg_inf": "float64",
+            "float64_nan": "float64",
+            "bfloat16_pos_inf": "float32",  # BFloat16 typically mapped to float32 in pandas
+            "bfloat16_neg_inf": "float32",
+            "bfloat16_nan": "float32"
+        }
+
+        for col, expected_type in expected_types.items():
+            actual_type = str(ret.dtypes[col])
+            self.assertEqual(actual_type, expected_type)
+
+    def test_decimal_types(self):
+        """Test Decimal32, Decimal64, Decimal128, Decimal256 types"""
+        ret = self.session.query("""
+            SELECT * FROM (
+                SELECT
+                    1 as row_id,
+                    toDecimal32('123.456', 3) as decimal32_val,
+                    toDecimal32('-999999.999', 3) as decimal32_min,
+                    toDecimal32('999999.999', 3) as decimal32_max,
+                    toDecimal64('123456.789012', 6) as decimal64_val,
+                    toDecimal64('-999999999999.999999', 6) as decimal64_min,
+                    toDecimal64('999999999999.999999', 6) as decimal64_max,
+                    toDecimal128('12345678901234567890123456789.123456789', 9) as decimal128_val,
+                    toDecimal128('-12345678901234567890123456789.123456789', 9) as decimal128_min,
+                    toDecimal128('12345678901234567890123456789.123456789', 9) as decimal128_max,
+                    toDecimal256('1234567890123456789012345678901234567890123456789012345678.123456789012345678', 18) as decimal256_val,
+                    toDecimal256('-1234567890123456789012345678901234567890123456789012345678.123456789012345678', 18) as decimal256_min,
+                    toDecimal256('1234567890123456789012345678901234567890123456789012345678.123456789012345678', 18) as decimal256_max
+                UNION ALL
+                SELECT
+                    2 as row_id,
+                    toDecimal32('0.001', 3) as decimal32_val,
+                    toDecimal32('0.000', 3) as decimal32_min,
+                    toDecimal32('1.000', 3) as decimal32_max,
+                    toDecimal64('0.000001', 6) as decimal64_val,
+                    toDecimal64('0.000000', 6) as decimal64_min,
+                    toDecimal64('1.000000', 6) as decimal64_max,
+                    toDecimal128('0.000000001', 9) as decimal128_val,
+                    toDecimal128('0.000000000', 9) as decimal128_min,
+                    toDecimal128('1.000000000', 9) as decimal128_max,
+                    toDecimal256('0.000000000000000001', 18) as decimal256_val,
+                    toDecimal256('0.000000000000000000', 18) as decimal256_min,
+                    toDecimal256('1.000000000000000000', 18) as decimal256_max
+            )
+            ORDER BY row_id
+        """, "DataFrame")
+
+        for col in ret.columns:
+            print(f"{col}: {ret.dtypes[col]} (actual value: {ret.iloc[0][col]}, Python type: {type(ret.iloc[0][col])})")
+
+        # Test first row - regular and extreme decimal values (converted to float64)
+        self.assertAlmostEqual(ret.iloc[0]["decimal32_val"], 123.456, places=3)
+        self.assertAlmostEqual(ret.iloc[0]["decimal32_min"], -999999.999, places=3)
+        self.assertAlmostEqual(ret.iloc[0]["decimal32_max"], 999999.999, places=3)
+
+        self.assertAlmostEqual(ret.iloc[0]["decimal64_val"], 123456.789012, places=6)
+        self.assertAlmostEqual(ret.iloc[0]["decimal64_min"], -999999999999.999999, places=6)
+        self.assertAlmostEqual(ret.iloc[0]["decimal64_max"], 999999999999.999999, places=6)
+
+        self.assertAlmostEqual(ret.iloc[0]["decimal128_val"], 12345678901234567890123456789.123456789, delta=1e20)
+        self.assertAlmostEqual(ret.iloc[0]["decimal128_min"], -12345678901234567890123456789.123456789, delta=1e20)
+        self.assertAlmostEqual(ret.iloc[0]["decimal128_max"], 12345678901234567890123456789.123456789, delta=1e20)
+
+        self.assertAlmostEqual(ret.iloc[0]["decimal256_val"], 1234567890123456789012345678901234567890123456789012345678.123456789012345678, delta=1e50)
+        self.assertAlmostEqual(ret.iloc[0]["decimal256_min"], -1234567890123456789012345678901234567890123456789012345678.123456789012345678, delta=1e50)
+        self.assertAlmostEqual(ret.iloc[0]["decimal256_max"], 1234567890123456789012345678901234567890123456789012345678.123456789012345678, delta=1e50)
+
+        # Test second row - small decimal values (converted to float64)
+        self.assertAlmostEqual(ret.iloc[1]["decimal32_val"], 0.001, places=3)
+        self.assertEqual(ret.iloc[1]["decimal32_min"], 0.000)
+        self.assertAlmostEqual(ret.iloc[1]["decimal32_max"], 1.000, places=3)
+
+        self.assertAlmostEqual(ret.iloc[1]["decimal64_val"], 0.000001, places=6)
+        self.assertEqual(ret.iloc[1]["decimal64_min"], 0.000000)
+        self.assertAlmostEqual(ret.iloc[1]["decimal64_max"], 1.000000, places=6)
+
+        self.assertAlmostEqual(ret.iloc[1]["decimal128_val"], 0.000000001, places=9)
+        self.assertEqual(ret.iloc[1]["decimal128_min"], 0.000000000)
+        self.assertAlmostEqual(ret.iloc[1]["decimal128_max"], 1.000000000, places=9)
+
+        self.assertAlmostEqual(ret.iloc[1]["decimal256_val"], 0.000000000000000001, places=18)
+        self.assertEqual(ret.iloc[1]["decimal256_min"], 0.000000000000000000)
+        self.assertAlmostEqual(ret.iloc[1]["decimal256_max"], 1.000000000000000000, places=18)
+
+        # Precise data type validation
+        expected_types = {
+            "decimal32_val": "float64",  # Decimal types mapped to float64 in ClickHouse->pandas conversion
+            "decimal32_min": "float64",
+            "decimal32_max": "float64",
+            "decimal64_val": "float64",
+            "decimal64_min": "float64",
+            "decimal64_max": "float64",
+            "decimal128_val": "float64",
+            "decimal128_min": "float64",
+            "decimal128_max": "float64",
+            "decimal256_val": "float64",
+            "decimal256_min": "float64",
+            "decimal256_max": "float64"
+        }
+
+        for col, expected_type in expected_types.items():
+            actual_type = str(ret.dtypes[col])
+            self.assertEqual(actual_type, expected_type)
+
+    def test_string_types(self):
+        """Test String, FixedString, and LowCardinality string types"""
+        ret = self.session.query("""
+            SELECT * FROM (
+                SELECT
+                    1 as row_id,
+                    toString('Hello World') as string_val,
+                    toFixedString('Fixed', 10) as fixed_string_val,
+                    toLowCardinality('Category A') as low_cardinality_val,
+                    toString('') as empty_string,
+                    toString('Unicode: 🌍 éñáíóú') as unicode_string,
+                    toString('Special chars: \\t\\n\\r\\"\\\'') as special_chars,
+                    toString('Very long string with many characters to test maximum length handling and memory allocation behavior') as long_string,
+                    toFixedString('ABC', 5) as fixed_string_short,
+                    toLowCardinality('') as low_cardinality_empty
+                UNION ALL
+                SELECT
+                    2 as row_id,
+                    toString('Another string') as string_val,
+                    toFixedString('Test123', 10) as fixed_string_val,
+                    toLowCardinality('Category B') as low_cardinality_val,
+                    toString('Non-empty') as empty_string,
+                    toString('More Unicode: 🚀 ñáéíóú àèìòù') as unicode_string,
+                    toString('Line breaks:\\nTab:\\tQuote:\\"') as special_chars,
+                    toString('Short') as long_string,
+                    toFixedString('XYZZZ', 5) as fixed_string_short,
+                    toLowCardinality('Option 2') as low_cardinality_empty
+            )
+            ORDER BY row_id
+        """, "DataFrame")
+
+        for col in ret.columns:
+            print(f"{col}: {ret.dtypes[col]} (actual value: {ret.iloc[0][col]}, Python type: {type(ret.iloc[0][col])})")
+
+        # Test first row - various string types
+        self.assertEqual(ret.iloc[0]["string_val"], "Hello World")
+        self.assertEqual(ret.iloc[0]["fixed_string_val"], "Fixed\x00\x00\x00\x00\x00")  # FixedString pads with null bytes
+        self.assertEqual(ret.iloc[0]["low_cardinality_val"], "Category A")
+        self.assertEqual(ret.iloc[0]["empty_string"], "")
+        self.assertEqual(ret.iloc[0]["unicode_string"], "Unicode: 🌍 éñáíóú")
+        self.assertEqual(ret.iloc[0]["special_chars"], "Special chars: \t\n\r\"'")  # ClickHouse interprets escape sequences
+        self.assertEqual(ret.iloc[0]["long_string"], "Very long string with many characters to test maximum length handling and memory allocation behavior")
+        self.assertEqual(ret.iloc[0]["fixed_string_short"], "ABC\x00\x00")  # Padded to 5 chars
+        self.assertEqual(ret.iloc[0]["low_cardinality_empty"], "")
+
+        # Test second row - different string values
+        self.assertEqual(ret.iloc[1]["string_val"], "Another string")
+        self.assertEqual(ret.iloc[1]["fixed_string_val"], "Test123\x00\x00\x00")  # Padded to 10 chars
+        self.assertEqual(ret.iloc[1]["low_cardinality_val"], "Category B")
+        self.assertEqual(ret.iloc[1]["empty_string"], "Non-empty")
+        self.assertEqual(ret.iloc[1]["unicode_string"], "More Unicode: 🚀 ñáéíóú àèìòù")
+        self.assertEqual(ret.iloc[1]["special_chars"], "Line breaks:\nTab:\tQuote:\"")  # ClickHouse interprets escape sequences
+        self.assertEqual(ret.iloc[1]["long_string"], "Short")
+        self.assertEqual(ret.iloc[1]["fixed_string_short"], "XYZZZ")  # Exactly 5 chars, no padding
+        self.assertEqual(ret.iloc[1]["low_cardinality_empty"], "Option 2")
+
+        # Precise data type validation
+        expected_types = {
+            "string_val": "object",  # String types mapped to object in pandas
+            "fixed_string_val": "object",
+            "low_cardinality_val": "object",
+            "empty_string": "object",
+            "unicode_string": "object",
+            "special_chars": "object",
+            "long_string": "object",
+            "fixed_string_short": "object",
+            "low_cardinality_empty": "object"
+        }
+
+        for col, expected_type in expected_types.items():
+            actual_type = str(ret.dtypes[col])
+            self.assertEqual(actual_type, expected_type)
+
+    def test_date_types(self):
+        """Test Date and Date32 types"""
+        ret = self.session.query("""
+            SELECT * FROM (
+                SELECT
+                    1 as row_id,
+                    toDate('2023-12-25') as date_val,
+                    toDate('1970-01-01') as date_min,
+                    toDate('2149-06-06') as date_max,
+                    toDate32('2023-12-25') as date32_val,
+                    toDate32('1900-01-01') as date32_min,
+                    toDate32('2299-12-31') as date32_max,
+                    toDate('2000-02-29') as date_leap_year,
+                    toDate32('2000-02-29') as date32_leap_year,
+                    toDate32('1950-06-15') as date32_negative_1,
+                    toDate32('1960-12-31') as date32_negative_2,
+                    toDate32('1969-12-31') as date32_before_epoch
+                UNION ALL
+                SELECT
+                    2 as row_id,
+                    toDate('1970-01-01') as date_val,
+                    toDate('2023-01-01') as date_min,
+                    toDate('2023-12-31') as date_max,
+                    toDate32('1970-01-01') as date32_val,
+                    toDate32('2023-01-01') as date32_min,
+                    toDate32('2023-12-31') as date32_max,
+                    toDate('2024-02-29') as date_leap_year,
+                    toDate32('2024-02-29') as date32_leap_year,
+                    toDate32('1945-05-08') as date32_negative_1,
+                    toDate32('1955-03-20') as date32_negative_2,
+                    toDate32('1968-07-20') as date32_before_epoch
+            )
+            ORDER BY row_id
+        """, "DataFrame")
+
+        for col in ret.columns:
+            print(f"{col}: {ret.dtypes[col]} (actual value: {ret.iloc[0][col]}, Python type: {type(ret.iloc[0][col])})")
+
+        # Test first row - specific dates (Date types include time component 00:00:00)
+        self.assertIn("2023-12-25", str(ret.iloc[0]["date_val"]))
+        self.assertIn("1970-01-01", str(ret.iloc[0]["date_min"]))
+        self.assertIn("2149-06-06", str(ret.iloc[0]["date_max"]))
+        self.assertIn("2023-12-25", str(ret.iloc[0]["date32_val"]))
+        self.assertIn("1900-01-01", str(ret.iloc[0]["date32_min"]))
+        self.assertIn("2299-12-31", str(ret.iloc[0]["date32_max"]))
+        self.assertIn("2000-02-29", str(ret.iloc[0]["date_leap_year"]))
+        self.assertIn("2000-02-29", str(ret.iloc[0]["date32_leap_year"]))
+        # Test Date32 negative values (before 1970 epoch)
+        self.assertIn("1950-06-15", str(ret.iloc[0]["date32_negative_1"]))
+        self.assertIn("1960-12-31", str(ret.iloc[0]["date32_negative_2"]))
+        self.assertIn("1969-12-31", str(ret.iloc[0]["date32_before_epoch"]))
+
+        # Test second row - different dates
+        self.assertIn("1970-01-01", str(ret.iloc[1]["date_val"]))
+        self.assertIn("2023-01-01", str(ret.iloc[1]["date_min"]))
+        self.assertIn("2023-12-31", str(ret.iloc[1]["date_max"]))
+        self.assertIn("1970-01-01", str(ret.iloc[1]["date32_val"]))
+        self.assertIn("2023-01-01", str(ret.iloc[1]["date32_min"]))
+        self.assertIn("2023-12-31", str(ret.iloc[1]["date32_max"]))
+        self.assertIn("2024-02-29", str(ret.iloc[1]["date_leap_year"]))
+        self.assertIn("2024-02-29", str(ret.iloc[1]["date32_leap_year"]))
+        # Test Date32 negative values (before 1970 epoch) - second row
+        self.assertIn("1945-05-08", str(ret.iloc[1]["date32_negative_1"]))
+        self.assertIn("1955-03-20", str(ret.iloc[1]["date32_negative_2"]))
+        self.assertIn("1968-07-20", str(ret.iloc[1]["date32_before_epoch"]))
+
+        # Precise data type validation
+        expected_types = {
+            "date_val": "datetime64[s]",  # Date types mapped to datetime64[s] in pandas
+            "date_min": "datetime64[s]",
+            "date_max": "datetime64[s]",
+            "date32_val": "datetime64[s]",
+            "date32_min": "datetime64[s]",
+            "date32_max": "datetime64[s]",
+            "date_leap_year": "datetime64[s]",
+            "date32_leap_year": "datetime64[s]",
+            "date32_negative_1": "datetime64[s]",  # Date32 negative values (before 1970)
+            "date32_negative_2": "datetime64[s]",
+            "date32_before_epoch": "datetime64[s]"
+        }
+
+        for col, expected_type in expected_types.items():
+            actual_type = str(ret.dtypes[col])
+            self.assertEqual(actual_type, expected_type)
+
+    def test_time_types(self):
+        """Test Time and Time64 types"""
+        # Enable Time and Time64 types
+        self.session.query("SET enable_time_time64_type = 1")
+
+        ret = self.session.query("""
+            SELECT * FROM (
+                SELECT
+                    1 as row_id,
+                    CAST('14:30:45' AS Time) as time_val,
+                    CAST('00:00:00' AS Time) as time_min,
+                    CAST('23:59:59' AS Time) as time_max,
+                    CAST('14:30:45.123456' AS Time64(6)) as time64_val,
+                    CAST('00:00:00.000000' AS Time64(6)) as time64_min,
+                    CAST('23:59:59.999999' AS Time64(6)) as time64_max,
+                    CAST('12:00:00.123' AS Time64(3)) as time64_ms,
+                    CAST('18:45:30.987654321' AS Time64(9)) as time64_ns
+                UNION ALL
+                SELECT
+                    2 as row_id,
+                    CAST('09:15:30' AS Time) as time_val,
+                    CAST('12:00:00' AS Time) as time_min,
+                    CAST('18:45:15' AS Time) as time_max,
+                    CAST('09:15:30.654321' AS Time64(6)) as time64_val,
+                    CAST('12:30:45.500000' AS Time64(6)) as time64_min,
+                    CAST('20:15:30.111111' AS Time64(6)) as time64_max,
+                    CAST('08:30:15.500' AS Time64(3)) as time64_ms,
+                    CAST('16:20:10.123456789' AS Time64(9)) as time64_ns
+                UNION ALL
+                SELECT
+                    3 as row_id,
+                    CAST(-3600 AS Time) as time_val,       -- -1 hour as negative seconds
+                    CAST(-7200 AS Time) as time_min,       -- -2 hours as negative seconds
+                    CAST(-1800 AS Time) as time_max,       -- -30 minutes as negative seconds
+                    CAST(-3661.123456 AS Time64(6)) as time64_val,  -- -1h 1m 1.123456s
+                    CAST(-7322.500000 AS Time64(6)) as time64_min,  -- -2h 2m 2.5s
+                    CAST(-1801.999999 AS Time64(6)) as time64_max,  -- -30m 1.999999s
+                    CAST(-3723.500 AS Time64(3)) as time64_ms,      -- -1h 2m 3.5s
+                    CAST(-5434.123456789 AS Time64(9)) as time64_ns -- -1h 30m 34.123456789s
+            )
+            ORDER BY row_id
+        """, "DataFrame")
+
+        for col in ret.columns:
+            print(f"{col}: {ret.dtypes[col]} (actual value: {ret.iloc[0][col]}, Python type: {type(ret.iloc[0][col])})")
+
+        # Test first row - time values
+        self.assertIn("14:30:45", str(ret.iloc[0]["time_val"]))
+        self.assertIn("00:00:00", str(ret.iloc[0]["time_min"]))
+        self.assertIn("23:59:59", str(ret.iloc[0]["time_max"]))
+        self.assertIn("14:30:45", str(ret.iloc[0]["time64_val"]))
+        self.assertIn("00:00:00", str(ret.iloc[0]["time64_min"]))
+        self.assertIn("23:59:59", str(ret.iloc[0]["time64_max"]))
+        self.assertIn("12:00:00", str(ret.iloc[0]["time64_ms"]))
+        self.assertIn("18:45:30", str(ret.iloc[0]["time64_ns"]))
+
+        # Test second row - different time values
+        self.assertIn("09:15:30", str(ret.iloc[1]["time_val"]))
+        self.assertIn("12:00:00", str(ret.iloc[1]["time_min"]))
+        self.assertIn("18:45:15", str(ret.iloc[1]["time_max"]))
+        self.assertIn("09:15:30", str(ret.iloc[1]["time64_val"]))
+        self.assertIn("12:30:45", str(ret.iloc[1]["time64_min"]))
+        self.assertIn("20:15:30", str(ret.iloc[1]["time64_max"]))
+        self.assertIn("08:30:15", str(ret.iloc[1]["time64_ms"]))
+        self.assertIn("16:20:10", str(ret.iloc[1]["time64_ns"]))
+
+        # Test third row - negative time values (should be returned as string numbers)
+        # Since Python time types don't support negative values, they are returned as numeric strings
+        self.assertEqual(ret.iloc[2]["time_val"], "-3600")          # -1 hour
+        self.assertEqual(ret.iloc[2]["time_min"], "-7200")          # -2 hours
+        self.assertEqual(ret.iloc[2]["time_max"], "-1800")          # -30 minutes
+        self.assertEqual(ret.iloc[2]["time64_val"], "-3661.123456") # -1h 1m 1.123456s
+        self.assertEqual(ret.iloc[2]["time64_min"], "-7322.5")      # -2h 2m 2.5s
+        self.assertEqual(ret.iloc[2]["time64_max"], "-1801.999999") # -30m 1.999999s
+        self.assertEqual(ret.iloc[2]["time64_ms"], "-3723.5")       # -1h 2m 3.5s
+        self.assertEqual(ret.iloc[2]["time64_ns"], "-5434.123456789") # -1h 30m 34.123456789s
+
+        # Verify negative values are returned as strings (object dtype)
+        for col in ["time_val", "time_min", "time_max", "time64_val", "time64_min", "time64_max", "time64_ms", "time64_ns"]:
+            self.assertIsInstance(ret.iloc[2][col], str, f"{col} should be string for negative values")
+
+        # Precise data type validation
+        expected_types = {
+            "time_val": "object",  # Time types mapped to object in pandas
+            "time_min": "object",
+            "time_max": "object",
+            "time64_val": "object",
+            "time64_min": "object",
+            "time64_max": "object",
+            "time64_ms": "object",
+            "time64_ns": "object"
+        }
+
+        for col, expected_type in expected_types.items():
+            actual_type = str(ret.dtypes[col])
+            self.assertEqual(actual_type, expected_type)
+
+    @unittest.skip("")
+    def test_datetime_types(self):
+        """Test DateTime and DateTime64 types"""
+        ret = self.session.query("""
+            SELECT * FROM (
+                SELECT
+                    1 as row_id,
+                    toDateTime('2023-12-25 14:30:45') as datetime_val,
+                    toDateTime('1970-01-01 00:00:00') as datetime_min,
+                    toDateTime('2106-02-07 06:28:15') as datetime_max,
+                    toDateTime64('2023-12-25 14:30:45.123456', 6) as datetime64_val,
+                    toDateTime64('1900-01-01 00:00:00.000000', 6) as datetime64_min,
+                    toDateTime64('2299-12-31 23:59:59.999999', 6) as datetime64_max,
+                    toDateTime64('2023-12-25 14:30:45.123456789', 9) as datetime64_ns,
+                    toDateTime('2023-06-15 12:00:00', 'UTC') as datetime_utc,
+                    toDateTime('2023-06-15 15:30:00', 'Europe/London') as datetime_london,
+                    toDateTime64('2023-06-15 12:00:00.123', 3, 'Asia/Shanghai') as datetime64_tz_sh,
+                    toDateTime64('2023-06-15 12:00:00.456', 3, 'America/New_York') as datetime64_tz_ny
+                UNION ALL
+                SELECT
+                    2 as row_id,
+                    toDateTime('2000-02-29 09:15:30') as datetime_val,
+                    toDateTime('2023-01-01 12:30:45') as datetime_min,
+                    toDateTime('2023-12-31 18:45:15') as datetime_max,
+                    toDateTime64('2000-02-29 09:15:30.654321', 6) as datetime64_val,
+                    toDateTime64('2023-01-01 08:00:00.111111', 6) as datetime64_min,
+                    toDateTime64('2023-12-31 20:30:45.888888', 6) as datetime64_max,
+                    toDateTime64('2000-02-29 09:15:30.987654321', 9) as datetime64_ns,
+                    toDateTime('2024-01-15 08:30:00', 'UTC') as datetime_utc,
+                    toDateTime('2024-01-15 20:00:00', 'Europe/London') as datetime_london,
+                    toDateTime64('2024-01-15 16:45:30.789', 3, 'Asia/Shanghai') as datetime64_tz_sh,
+                    toDateTime64('2024-01-15 09:15:45.987', 3, 'America/New_York') as datetime64_tz_ny
+            )
+            ORDER BY row_id
+        """, "DataFrame")
+
+        for col in ret.columns:
+            print(f"{col}: {ret.dtypes[col]} (actual value: {ret.iloc[0][col]}, Python type: {type(ret.iloc[0][col])})")
+
+
+        # Test first row - exact datetime values
+        # DateTime (second precision) - ClickHouse uses server timezone (likely Asia/Shanghai)
+        # We need to check what timezone ClickHouse is actually using
+        actual_tz = 'UTC'
+
+        self.assertEqual(ret.iloc[0]["datetime_val"], pd.Timestamp('2023-12-25 14:30:45', tz=actual_tz))
+        self.assertEqual(ret.iloc[0]["datetime_min"], pd.Timestamp('1970-01-01 00:00:00', tz=actual_tz))
+        self.assertEqual(ret.iloc[0]["datetime_max"], pd.Timestamp('2106-02-07 06:28:15', tz=actual_tz))
+
+        # DateTime64 (microsecond precision) - should use same timezone as ClickHouse server
+        self.assertEqual(ret.iloc[0]["datetime64_val"], pd.Timestamp('2023-12-25 14:30:45.123456', tz=actual_tz))
+        self.assertEqual(ret.iloc[0]["datetime64_min"], pd.Timestamp('1900-01-01 00:00:00.000000', tz=actual_tz))
+        self.assertEqual(ret.iloc[0]["datetime64_max"], pd.Timestamp('2299-12-31 23:59:59.999999', tz=actual_tz))
+
+        # DateTime64 (nanosecond precision) - should use same timezone as ClickHouse server
+        self.assertEqual(ret.iloc[0]["datetime64_ns"], pd.Timestamp('2023-12-25 14:30:45.123456789', tz=actual_tz))
+
+        # UTC timezone datetime
+        expected_utc = pd.Timestamp('2023-06-15 12:00:00', tz='UTC')
+        actual_utc = ret.iloc[0]["datetime_utc"]
+        self.assertEqual(actual_utc, expected_utc)
+
+        # Europe/London timezone datetime
+        expected_london = pd.Timestamp('2023-06-15 15:30:00', tz='Europe/London')
+        actual_london = ret.iloc[0]["datetime_london"]
+        self.assertEqual(actual_london, expected_london)
+
+        # Timezone-aware datetime64 - Asia/Shanghai
+        expected_sh = pd.Timestamp('2023-06-15 12:00:00.123', tz='Asia/Shanghai')
+        actual_sh = ret.iloc[0]["datetime64_tz_sh"]
+        self.assertEqual(actual_sh, expected_sh)
+
+        # Timezone-aware datetime64 - America/New_York
+        expected_ny = pd.Timestamp('2023-06-15 12:00:00.456', tz='America/New_York')
+        actual_ny = ret.iloc[0]["datetime64_tz_ny"]
+        self.assertEqual(actual_ny, expected_ny)
+
+        # Test second row - exact datetime values with ClickHouse server timezone
+        self.assertEqual(ret.iloc[1]["datetime_val"], pd.Timestamp('2000-02-29 09:15:30', tz=actual_tz))
+        self.assertEqual(ret.iloc[1]["datetime_min"], pd.Timestamp('2023-01-01 12:30:45', tz=actual_tz))
+        self.assertEqual(ret.iloc[1]["datetime_max"], pd.Timestamp('2023-12-31 18:45:15', tz=actual_tz))
+        self.assertEqual(ret.iloc[1]["datetime64_val"], pd.Timestamp('2000-02-29 09:15:30.654321', tz=actual_tz))
+        self.assertEqual(ret.iloc[1]["datetime64_min"], pd.Timestamp('2023-01-01 08:00:00.111111', tz=actual_tz))
+        self.assertEqual(ret.iloc[1]["datetime64_max"], pd.Timestamp('2023-12-31 20:30:45.888888', tz=actual_tz))
+        self.assertEqual(ret.iloc[1]["datetime64_ns"], pd.Timestamp('2000-02-29 09:15:30.987654321', tz=actual_tz))
+
+        # Second row timezone datetime tests
+        expected_utc_2 = pd.Timestamp('2024-01-15 08:30:00', tz='UTC')
+        actual_utc_2 = ret.iloc[1]["datetime_utc"]
+        self.assertEqual(actual_utc_2, expected_utc_2)
+
+        expected_london_2 = pd.Timestamp('2024-01-15 20:00:00', tz='Europe/London')
+        actual_london_2 = ret.iloc[1]["datetime_london"]
+        self.assertEqual(actual_london_2, expected_london_2)
+
+        # Second row timezone tests (already converted by C++ code)
+        expected_sh_2 = pd.Timestamp('2024-01-15 16:45:30.789', tz='Asia/Shanghai')
+        actual_sh_2 = ret.iloc[1]["datetime64_tz_sh"]
+        self.assertEqual(actual_sh_2, expected_sh_2)
+
+        expected_ny_2 = pd.Timestamp('2024-01-15 09:15:45.987', tz='America/New_York')
+        actual_ny_2 = ret.iloc[1]["datetime64_tz_ny"]
+        self.assertEqual(actual_ny_2, expected_ny_2)
+
+        # Precise data type validation
+        expected_types = {
+            "row_id": "int64",
+            "datetime_val": "datetime64[s]",      # DateTime types mapped to datetime64[s] (second precision)
+            "datetime_min": "datetime64[s]",
+            "datetime_max": "datetime64[s]",
+            "datetime64_val": "datetime64[ns]",   # DateTime64 types mapped to datetime64[ns] (nanosecond precision)
+            "datetime64_min": "datetime64[ns]",
+            "datetime64_max": "datetime64[ns]",
+            "datetime64_ns": "datetime64[ns]",    # DateTime64 with 9-digit precision (nanoseconds)
+            "datetime_utc": "datetime64[s]",      # DateTime with timezone -> datetime64[s]
+            "datetime64_tz_sh": "datetime64[ns]", # DateTime64 with Asia/Shanghai timezone
+            "datetime64_tz_ny": "datetime64[ns]"  # DateTime64 with America/New_York timezone
         }
 
         for col, expected_type in expected_types.items():