From dfea5bfcd62f8a458178db2e2fcc3eca8b5f0830 Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Tue, 7 Nov 2023 12:11:52 +0100 Subject: [PATCH 1/3] Add support for a "postgres_query" function that allows you to run arbitrary queries against postgres --- src/CMakeLists.txt | 1 + src/include/postgres_scanner.hpp | 6 + src/include/postgres_type_oids.hpp | 192 ++++++++++++++++++ src/include/postgres_utils.hpp | 1 + src/postgres_extension.cpp | 3 + src/postgres_query.cpp | 87 ++++++++ src/postgres_scanner.cpp | 29 ++- src/postgres_utils.cpp | 108 ++++++++-- src/storage/postgres_insert.cpp | 3 +- test/sql/scanner/postgres_query.test | 94 +++++++++ .../attach_insert_from_scan_large.test | 2 +- 11 files changed, 499 insertions(+), 27 deletions(-) create mode 100644 src/include/postgres_type_oids.hpp create mode 100644 src/postgres_query.cpp create mode 100644 test/sql/scanner/postgres_query.test diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bdbd14fb0..5c85a1b53 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,6 +10,7 @@ add_library( postgres_copy_to.cpp postgres_extension.cpp postgres_filter_pushdown.cpp + postgres_query.cpp postgres_scanner.cpp postgres_storage.cpp postgres_utils.cpp) diff --git a/src/include/postgres_scanner.hpp b/src/include/postgres_scanner.hpp index ac2e9a32e..ac3c5f083 100644 --- a/src/include/postgres_scanner.hpp +++ b/src/include/postgres_scanner.hpp @@ -21,6 +21,7 @@ struct PostgresBindData : public FunctionData { string schema_name; string table_name; + string sql; idx_t pages_approx = 0; vector postgres_types; @@ -73,4 +74,9 @@ class PostgresClearCacheFunction : public TableFunction { PostgresClearCacheFunction(); }; +class PostgresQueryFunction : public TableFunction { +public: + PostgresQueryFunction(); +}; + } // namespace duckdb diff --git a/src/include/postgres_type_oids.hpp b/src/include/postgres_type_oids.hpp new file mode 100644 index 000000000..0c3f5a3b8 --- /dev/null +++ b/src/include/postgres_type_oids.hpp @@ -0,0 +1,192 @@ +// taken from pg_type_d.h +#pragma once + +#define BOOLOID 16 +#define BYTEAOID 17 +#define CHAROID 18 +#define NAMEOID 19 +#define INT8OID 20 +#define INT2OID 21 +#define INT2VECTOROID 22 +#define INT4OID 23 +#define REGPROCOID 24 +#define TEXTOID 25 +#define OIDOID 26 +#define TIDOID 27 +#define XIDOID 28 +#define CIDOID 29 +#define OIDVECTOROID 30 +#define JSONOID 114 +#define XMLOID 142 +#define PG_NODE_TREEOID 194 +#define PG_NDISTINCTOID 3361 +#define PG_DEPENDENCIESOID 3402 +#define PG_MCV_LISTOID 5017 +#define PG_DDL_COMMANDOID 32 +#define XID8OID 5069 +#define POINTOID 600 +#define LSEGOID 601 +#define PATHOID 602 +#define BOXOID 603 +#define POLYGONOID 604 +#define LINEOID 628 +#define FLOAT4OID 700 +#define FLOAT8OID 701 +#define UNKNOWNOID 705 +#define CIRCLEOID 718 +#define MONEYOID 790 +#define MACADDROID 829 +#define INETOID 869 +#define CIDROID 650 +#define MACADDR8OID 774 +#define ACLITEMOID 1033 +#define BPCHAROID 1042 +#define VARCHAROID 1043 +#define DATEOID 1082 +#define TIMEOID 1083 +#define TIMESTAMPOID 1114 +#define TIMESTAMPTZOID 1184 +#define INTERVALOID 1186 +#define TIMETZOID 1266 +#define BITOID 1560 +#define VARBITOID 1562 +#define NUMERICOID 1700 +#define REFCURSOROID 1790 +#define REGPROCEDUREOID 2202 +#define REGOPEROID 2203 +#define REGOPERATOROID 2204 +#define REGCLASSOID 2205 +#define REGCOLLATIONOID 4191 +#define REGTYPEOID 2206 +#define REGROLEOID 4096 +#define REGNAMESPACEOID 4089 +#define UUIDOID 2950 +#define PG_LSNOID 3220 +#define TSVECTOROID 3614 +#define GTSVECTOROID 3642 +#define TSQUERYOID 3615 +#define REGCONFIGOID 3734 +#define REGDICTIONARYOID 3769 +#define JSONBOID 3802 +#define JSONPATHOID 4072 +#define TXID_SNAPSHOTOID 2970 +#define PG_SNAPSHOTOID 5038 +#define INT4RANGEOID 3904 +#define NUMRANGEOID 3906 +#define TSRANGEOID 3908 +#define TSTZRANGEOID 3910 +#define DATERANGEOID 3912 +#define INT8RANGEOID 3926 +#define INT4MULTIRANGEOID 4451 +#define NUMMULTIRANGEOID 4532 +#define TSMULTIRANGEOID 4533 +#define TSTZMULTIRANGEOID 4534 +#define DATEMULTIRANGEOID 4535 +#define INT8MULTIRANGEOID 4536 +#define RECORDOID 2249 +#define RECORDARRAYOID 2287 +#define CSTRINGOID 2275 +#define ANYOID 2276 +#define ANYARRAYOID 2277 +#define VOIDOID 2278 +#define TRIGGEROID 2279 +#define EVENT_TRIGGEROID 3838 +#define LANGUAGE_HANDLEROID 2280 +#define INTERNALOID 2281 +#define ANYELEMENTOID 2283 +#define ANYNONARRAYOID 2776 +#define ANYENUMOID 3500 +#define FDW_HANDLEROID 3115 +#define INDEX_AM_HANDLEROID 325 +#define TSM_HANDLEROID 3310 +#define TABLE_AM_HANDLEROID 269 +#define ANYRANGEOID 3831 +#define ANYCOMPATIBLEOID 5077 +#define ANYCOMPATIBLEARRAYOID 5078 +#define ANYCOMPATIBLENONARRAYOID 5079 +#define ANYCOMPATIBLERANGEOID 5080 +#define ANYMULTIRANGEOID 4537 +#define ANYCOMPATIBLEMULTIRANGEOID 4538 +#define PG_BRIN_BLOOM_SUMMARYOID 4600 +#define PG_BRIN_MINMAX_MULTI_SUMMARYOID 4601 +#define BOOLARRAYOID 1000 +#define BYTEAARRAYOID 1001 +#define CHARARRAYOID 1002 +#define NAMEARRAYOID 1003 +#define INT8ARRAYOID 1016 +#define INT2ARRAYOID 1005 +#define INT2VECTORARRAYOID 1006 +#define INT4ARRAYOID 1007 +#define REGPROCARRAYOID 1008 +#define TEXTARRAYOID 1009 +#define OIDARRAYOID 1028 +#define TIDARRAYOID 1010 +#define XIDARRAYOID 1011 +#define CIDARRAYOID 1012 +#define OIDVECTORARRAYOID 1013 +#define PG_TYPEARRAYOID 210 +#define PG_ATTRIBUTEARRAYOID 270 +#define PG_PROCARRAYOID 272 +#define PG_CLASSARRAYOID 273 +#define JSONARRAYOID 199 +#define XMLARRAYOID 143 +#define XID8ARRAYOID 271 +#define POINTARRAYOID 1017 +#define LSEGARRAYOID 1018 +#define PATHARRAYOID 1019 +#define BOXARRAYOID 1020 +#define POLYGONARRAYOID 1027 +#define LINEARRAYOID 629 +#define FLOAT4ARRAYOID 1021 +#define FLOAT8ARRAYOID 1022 +#define CIRCLEARRAYOID 719 +#define MONEYARRAYOID 791 +#define MACADDRARRAYOID 1040 +#define INETARRAYOID 1041 +#define CIDRARRAYOID 651 +#define MACADDR8ARRAYOID 775 +#define ACLITEMARRAYOID 1034 +#define BPCHARARRAYOID 1014 +#define VARCHARARRAYOID 1015 +#define DATEARRAYOID 1182 +#define TIMEARRAYOID 1183 +#define TIMESTAMPARRAYOID 1115 +#define TIMESTAMPTZARRAYOID 1185 +#define INTERVALARRAYOID 1187 +#define TIMETZARRAYOID 1270 +#define BITARRAYOID 1561 +#define VARBITARRAYOID 1563 +#define NUMERICARRAYOID 1231 +#define REFCURSORARRAYOID 2201 +#define REGPROCEDUREARRAYOID 2207 +#define REGOPERARRAYOID 2208 +#define REGOPERATORARRAYOID 2209 +#define REGCLASSARRAYOID 2210 +#define REGCOLLATIONARRAYOID 4192 +#define REGTYPEARRAYOID 2211 +#define REGROLEARRAYOID 4097 +#define REGNAMESPACEARRAYOID 4090 +#define UUIDARRAYOID 2951 +#define PG_LSNARRAYOID 3221 +#define TSVECTORARRAYOID 3643 +#define GTSVECTORARRAYOID 3644 +#define TSQUERYARRAYOID 3645 +#define REGCONFIGARRAYOID 3735 +#define REGDICTIONARYARRAYOID 3770 +#define JSONBARRAYOID 3807 +#define JSONPATHARRAYOID 4073 +#define TXID_SNAPSHOTARRAYOID 2949 +#define PG_SNAPSHOTARRAYOID 5039 +#define INT4RANGEARRAYOID 3905 +#define NUMRANGEARRAYOID 3907 +#define TSRANGEARRAYOID 3909 +#define TSTZRANGEARRAYOID 3911 +#define DATERANGEARRAYOID 3913 +#define INT8RANGEARRAYOID 3927 +#define INT4MULTIRANGEARRAYOID 6150 +#define NUMMULTIRANGEARRAYOID 6151 +#define TSMULTIRANGEARRAYOID 6152 +#define TSTZMULTIRANGEARRAYOID 6153 +#define DATEMULTIRANGEARRAYOID 6155 +#define INT8MULTIRANGEARRAYOID 6157 +#define CSTRINGARRAYOID 1263 diff --git a/src/include/postgres_utils.hpp b/src/include/postgres_utils.hpp index de8158410..72b5ffe69 100644 --- a/src/include/postgres_utils.hpp +++ b/src/include/postgres_utils.hpp @@ -41,6 +41,7 @@ class PostgresUtils { optional_ptr schema, const PostgresTypeData &input, PostgresType &postgres_type); static string TypeToString(const LogicalType &input); + static string PostgresOidToName(uint32_t oid); static uint32_t ToPostgresOid(const LogicalType &input); static bool SupportedPostgresOid(const LogicalType &input); static LogicalType RemoveAlias(const LogicalType &type); diff --git a/src/postgres_extension.cpp b/src/postgres_extension.cpp index a4f46a28f..ea632b862 100644 --- a/src/postgres_extension.cpp +++ b/src/postgres_extension.cpp @@ -49,6 +49,9 @@ static void LoadInternal(DatabaseInstance &db) { PostgresClearCacheFunction clear_cache_func; ExtensionUtil::RegisterFunction(db, clear_cache_func); + PostgresQueryFunction query_func; + ExtensionUtil::RegisterFunction(db, query_func); + auto &config = DBConfig::GetConfig(db); config.storage_extensions["postgres_scanner"] = make_uniq(); diff --git a/src/postgres_query.cpp b/src/postgres_query.cpp new file mode 100644 index 000000000..cb124a950 --- /dev/null +++ b/src/postgres_query.cpp @@ -0,0 +1,87 @@ +#include "duckdb.hpp" + +#include "duckdb/parser/parsed_data/create_table_function_info.hpp" +#include "postgres_scanner.hpp" +#include "duckdb/main/database_manager.hpp" +#include "duckdb/main/attached_database.hpp" +#include "storage/postgres_catalog.hpp" +#include "storage/postgres_transaction.hpp" + +namespace duckdb { + +static unique_ptr PGQueryBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto result = make_uniq(); + + // look up the database to query + auto db_name = input.inputs[0].GetValue(); + auto &db_manager = DatabaseManager::Get(context); + auto db = db_manager.GetDatabase(context, db_name); + if (!db) { + throw BinderException("Failed to find attached database \"%s\" referenced in postgres_query", db_name); + } + auto &catalog = db->GetCatalog(); + if (catalog.GetCatalogType() != "postgres") { + throw BinderException("Attached database \"%s\" does not refer to a Postgres database", db_name); + } + auto &pg_catalog = catalog.Cast(); + auto &transaction = Transaction::Get(context, catalog).Cast(); + auto sql = input.inputs[1].GetValue(); + + auto &con = transaction.GetConnection(); + auto conn = con.GetConn(); + // prepare execution of the query to figure out the result types and names + auto prepared = PQprepare(conn, "", sql.c_str(), 0, nullptr); + PostgresResult prepared_wrapper(prepared); + if (!prepared) { + throw BinderException("Failed to prepare query \"%s\" (no result returned): %s", sql, PQerrorMessage(conn)); + } + if (PQresultStatus(prepared) != PGRES_COMMAND_OK) { + throw BinderException("Failed to prepare query \"%s\": %s", sql, PQresultErrorMessage(prepared)); + } + // use describe_prepared + auto describe_prepared = PQdescribePrepared(conn, ""); + PostgresResult describe_wrapper(describe_prepared); + if (!describe_prepared || PQresultStatus(describe_prepared) != PGRES_COMMAND_OK) { + auto extended_err = describe_prepared ? PQresultErrorMessage(describe_prepared) : PQerrorMessage(conn); + throw BinderException("Failed to describe prepared statement: %s", extended_err); + } + auto nfields = PQnfields(describe_prepared); + if (nfields <= 0) { + throw BinderException("No fields returned by query \"%s\" - the query must be a SELECT statement that returns at least one column", sql); + } + for(idx_t c = 0; c < nfields; c++) { + PostgresType postgres_type; + postgres_type.oid = PQftype(describe_prepared, c); + PostgresTypeData type_data; + type_data.type_name = PostgresUtils::PostgresOidToName(postgres_type.oid); + type_data.type_modifier = PQfmod(describe_prepared, c); + auto converted_type = PostgresUtils::TypeToLogicalType(nullptr, nullptr, type_data, postgres_type); + result->postgres_types.push_back(postgres_type); + return_types.emplace_back(converted_type); + names.emplace_back(PQfname(describe_prepared, c)); + } + + // set up the bind data + result->dsn = "xxxxxxxxxxxx"; // dsn should never be used so insert a bogus string + result->transaction = &transaction; + result->connection = PostgresConnection(con.GetConnection()); + result->types = return_types; + result->names = names; + result->read_only = false; + result->in_recovery = true; + result->SetTablePages(0); + result->sql = std::move(sql); + return std::move(result); +} + +PostgresQueryFunction::PostgresQueryFunction() + : TableFunction("postgres_query", {LogicalType::VARCHAR, LogicalType::VARCHAR}, + nullptr, PGQueryBind) { + PostgresScanFunction scan_function; + init_global = scan_function.init_global; + init_local = scan_function.init_local; + function = scan_function.function; + projection_pushdown = true; +} +} diff --git a/src/postgres_scanner.cpp b/src/postgres_scanner.cpp index 50bcc765a..2565f0a78 100644 --- a/src/postgres_scanner.cpp +++ b/src/postgres_scanner.cpp @@ -127,7 +127,12 @@ static void PostgresInitInternal(ClientContext &context, const PostgresBindData col_names += ", "; } if (column_id == COLUMN_IDENTIFIER_ROW_ID) { - col_names += "ctid"; + if (bind_data->table_name.empty()) { + // count(*) over postgres_query + col_names += "NULL"; + } else { + col_names += "ctid"; + } } else { col_names += KeywordHelper::WriteQuoted(bind_data->names[column_id], '"'); if (bind_data->postgres_types[column_id].info == PostgresTypeAnnotation::CAST_TO_VARCHAR) { @@ -141,7 +146,7 @@ static void PostgresInitInternal(ClientContext &context, const PostgresBindData } } - string filter_string= PostgresFilterPushdown::TransformFilters(lstate.column_ids, lstate.filters, bind_data->names); + string filter_string = PostgresFilterPushdown::TransformFilters(lstate.column_ids, lstate.filters, bind_data->names); string filter; if (bind_data->pages_approx > 0) { @@ -155,12 +160,22 @@ static void PostgresInitInternal(ClientContext &context, const PostgresBindData } filter += filter_string; } - lstate.sql = StringUtil::Format( - R"( -COPY (SELECT %s FROM %s.%s %s) TO STDOUT (FORMAT binary); -)", - col_names, KeywordHelper::WriteQuoted(bind_data->schema_name, '"'), KeywordHelper::WriteQuoted(bind_data->table_name, '"'), filter); + if (bind_data->table_name.empty()) { + D_ASSERT(!bind_data->sql.empty()); + lstate.sql = StringUtil::Format( + R"( + COPY (SELECT %s FROM (%s) AS __unnamed_subquery %s) TO STDOUT (FORMAT binary); + )", + col_names, bind_data->sql, filter); + } else { + lstate.sql = StringUtil::Format( + R"( + COPY (SELECT %s FROM %s.%s %s) TO STDOUT (FORMAT binary); + )", + col_names, KeywordHelper::WriteQuoted(bind_data->schema_name, '"'), KeywordHelper::WriteQuoted(bind_data->table_name, '"'), filter); + + } lstate.exec = false; lstate.done = false; } diff --git a/src/postgres_utils.cpp b/src/postgres_utils.cpp index 67b554985..d5ffc1cc3 100644 --- a/src/postgres_utils.cpp +++ b/src/postgres_utils.cpp @@ -1,6 +1,7 @@ #include "postgres_utils.hpp" #include "storage/postgres_schema_entry.hpp" #include "storage/postgres_transaction.hpp" +#include "postgres_type_oids.hpp" namespace duckdb { @@ -242,24 +243,6 @@ PostgresType PostgresUtils::CreateEmptyPostgresType(const LogicalType &type) { return result; } -// taken from pg_type_d.h -#define BOOLOID 16 -#define BYTEAOID 17 -#define INT8OID 20 -#define INT2OID 21 -#define INT4OID 23 -#define FLOAT4OID 700 -#define FLOAT8OID 701 -#define VARCHAROID 1043 -#define DATEOID 1082 -#define TIMEOID 1083 -#define TIMESTAMPOID 1114 -#define TIMESTAMPTZOID 1184 -#define INTERVALOID 1186 -#define TIMETZOID 1266 -#define BITOID 1560 -#define UUIDOID 2950 - bool PostgresUtils::SupportedPostgresOid(const LogicalType &input) { switch(input.id()) { case LogicalTypeId::BOOLEAN: @@ -284,6 +267,95 @@ bool PostgresUtils::SupportedPostgresOid(const LogicalType &input) { } } +string PostgresUtils::PostgresOidToName(uint32_t oid) { + switch(oid) { + case BOOLOID: + return "bool"; + case INT2OID: + return "int2"; + case INT4OID: + return "int4"; + case INT8OID: + return "int8"; + case FLOAT4OID: + return "float4"; + case FLOAT8OID: + return "float8"; + case CHAROID: + case BPCHAROID: + return "char"; + case TEXTOID: + case VARCHAROID: + return "varchar"; + case JSONOID: + return "json"; + case BYTEAOID: + return "bytea"; + case DATEOID: + return "date"; + case TIMEOID: + return "time"; + case TIMESTAMPOID: + return "timestamp"; + case INTERVALOID: + return "interval"; + case TIMETZOID: + return "timetz"; + case TIMESTAMPTZOID: + return "timestamptz"; + case BITOID: + return "bit"; + case UUIDOID: + return "uuid"; + case NUMERICOID: + return "numeric"; + case JSONBOID: + return "jsonb"; + case BOOLARRAYOID: + return "_bool"; + case CHARARRAYOID: + case BPCHARARRAYOID: + return "_char"; + case INT8ARRAYOID: + return "_int8"; + case INT2ARRAYOID: + return "_int2"; + case INT4ARRAYOID: + return "_int4"; + case FLOAT4ARRAYOID: + return "_float4"; + case FLOAT8ARRAYOID: + return "_float8"; + case TEXTARRAYOID: + case VARCHARARRAYOID: + return "_varchar"; + case JSONARRAYOID: + return "_json"; + case JSONBARRAYOID: + return "_jsonb"; + case NUMERICARRAYOID: + return "_numeric"; + case UUIDARRAYOID: + return "_uuid"; + case DATEARRAYOID: + return "_date"; + case TIMEARRAYOID: + return "_time"; + case TIMESTAMPARRAYOID: + return "_timestamp"; + case TIMESTAMPTZARRAYOID: + return "_timestamptz"; + case INTERVALARRAYOID: + return "_interval"; + case TIMETZARRAYOID: + return "_timetz"; + case BITARRAYOID: + return "_bit"; + default: + return "unsupported_type"; + } +} + uint32_t PostgresUtils::ToPostgresOid(const LogicalType &input) { switch(input.id()) { case LogicalTypeId::BOOLEAN: diff --git a/src/storage/postgres_insert.cpp b/src/storage/postgres_insert.cpp index f92b60be8..5f6fff5c1 100644 --- a/src/storage/postgres_insert.cpp +++ b/src/storage/postgres_insert.cpp @@ -188,7 +188,8 @@ unique_ptr AddCastToPostgresTypes(ClientContext &context, uniq void PostgresCatalog::MaterializePostgresScans(PhysicalOperator &op) { if (op.type == PhysicalOperatorType::TABLE_SCAN) { auto &table_scan = op.Cast(); - if (table_scan.function.name == "postgres_scan" || table_scan.function.name == "postgres_scan_pushdown") { + if (table_scan.function.name == "postgres_scan" || table_scan.function.name == "postgres_scan_pushdown" + || table_scan.function.name == "postgres_query") { auto &bind_data = table_scan.bind_data->Cast(); bind_data.requires_materialization = true; bind_data.max_threads = 1; diff --git a/test/sql/scanner/postgres_query.test b/test/sql/scanner/postgres_query.test new file mode 100644 index 000000000..78c16ad44 --- /dev/null +++ b/test/sql/scanner/postgres_query.test @@ -0,0 +1,94 @@ +# name: test/sql/scanner/postgres_query.test +# description: Test running postgres_query +# group: [scanner] + +require postgres_scanner + +statement ok +ATTACH 'dbname=postgresscanner' AS s1 (TYPE POSTGRES) + +query III +select * from postgres_query('s1', 'SELECT * FROM cars'); +---- +ferari testarosa red +aston martin db2 blue +bentley mulsanne gray +ford T black + +# filters +query III +select * from postgres_query('s1', 'SELECT * FROM cars WHERE color=''red'''); +---- +ferari testarosa red + +query III +select * from postgres_query('s1', 'SELECT * FROM cars') WHERE color='red'; +---- +ferari testarosa red + +# projection pushdown +query I +select color from postgres_query('s1', 'SELECT color, brand FROM cars'); +---- +red +blue +gray +black + +# count(*) +query I +select count(*) from postgres_query('s1', 'SELECT * FROM cars WHERE color=''red'''); +---- +1 + +# types +query IIIIIIII +SELECT TYPEOF(COLUMNS(*)) FROM postgres_query('s1', 'SELECT * FROM pg_numtypes') LIMIT 1 +---- +BOOLEAN SMALLINT INTEGER BIGINT FLOAT DOUBLE DECIMAL(4,1) DOUBLE + +query IIIIIIIII +SELECT TYPEOF(COLUMNS(*)) FROM postgres_query('s1', 'SELECT * FROM pg_bytetypes') LIMIT 1 +---- +VARCHAR VARCHAR VARCHAR VARCHAR VARCHAR VARCHAR BLOB VARCHAR UUID + +query IIIII +SELECT TYPEOF(COLUMNS(*)) FROM postgres_query('s1', 'SELECT * FROM pg_datetypes') LIMIT 1 +---- +DATE TIME TIME WITH TIME ZONE TIMESTAMP TIMESTAMP WITH TIME ZONE + +# arrays +query IIIIIIII +SELECT TYPEOF(COLUMNS(*)) FROM postgres_query('s1', 'SELECT * FROM pg_numarraytypes') LIMIT 1 +---- +BOOLEAN[] SMALLINT[] INTEGER[] BIGINT[] FLOAT[] DOUBLE[] DECIMAL(4,1)[] DOUBLE[] + +query IIIIIIII +SELECT TYPEOF(COLUMNS(*)) FROM postgres_query('s1', 'SELECT * FROM pg_bytearraytypes') LIMIT 1 +---- +VARCHAR[] VARCHAR[] VARCHAR[] VARCHAR[] VARCHAR[] VARCHAR VARCHAR[] UUID[] + +query IIIII +SELECT TYPEOF(COLUMNS(*)) FROM postgres_query('s1', 'SELECT * FROM pg_datearraytypes') LIMIT 1 +---- +DATE[] TIME[] TIME WITH TIME ZONE[] TIMESTAMP[] TIMESTAMP WITH TIME ZONE[] + +# incorrect usage +statement error +select * from postgres_query('s1', 'CREATE TABLE my_table(i INT)'); +---- +No fields returned by query + +statement error +select * from postgres_query('xx', 'SELECT * FROM cars'); +---- +Failed to find attached database + +# what if the database is not a postgres database +statement ok +ATTACH ':memory:' AS ddb + +statement error +select * from postgres_query('ddb', 'SELECT * FROM cars'); +---- +does not refer to a Postgres database diff --git a/test/sql/storage/attach_insert_from_scan_large.test b/test/sql/storage/attach_insert_from_scan_large.test index f43f9bb9a..f8ec5cc02 100644 --- a/test/sql/storage/attach_insert_from_scan_large.test +++ b/test/sql/storage/attach_insert_from_scan_large.test @@ -27,7 +27,7 @@ SELECT COUNT(*) FROM s.tbl 200000 query I -INSERT INTO s.tbl FROM s.tbl +INSERT INTO s.tbl FROM postgres_query('s', 'SELECT * FROM tbl') ---- 200000 From 0f06336476b68ffd2e025a77477ed8ebf28ae2ac Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Tue, 7 Nov 2023 12:29:02 +0100 Subject: [PATCH 2/3] Cleanup unnecessary transaction --- src/include/postgres_scanner.hpp | 1 - src/postgres_query.cpp | 3 +-- src/postgres_scanner.cpp | 4 ++-- src/storage/postgres_table_entry.cpp | 1 - test/sql/scanner/postgres_query.test | 12 ++++++++++++ 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/include/postgres_scanner.hpp b/src/include/postgres_scanner.hpp index ac3c5f083..dd685b949 100644 --- a/src/include/postgres_scanner.hpp +++ b/src/include/postgres_scanner.hpp @@ -38,7 +38,6 @@ struct PostgresBindData : public FunctionData { idx_t max_threads = 1; PostgresConnection connection; - optional_ptr transaction; PostgresConnectionReservation connection_reservation; public: diff --git a/src/postgres_query.cpp b/src/postgres_query.cpp index cb124a950..9e5094fc6 100644 --- a/src/postgres_query.cpp +++ b/src/postgres_query.cpp @@ -63,8 +63,7 @@ static unique_ptr PGQueryBind(ClientContext &context, TableFunctio } // set up the bind data - result->dsn = "xxxxxxxxxxxx"; // dsn should never be used so insert a bogus string - result->transaction = &transaction; + result->dsn = con.GetDSN(); result->connection = PostgresConnection(con.GetConnection()); result->types = return_types; result->names = names; diff --git a/src/postgres_scanner.cpp b/src/postgres_scanner.cpp index 2565f0a78..b94853a6c 100644 --- a/src/postgres_scanner.cpp +++ b/src/postgres_scanner.cpp @@ -263,9 +263,9 @@ static unique_ptr GetLocalState(ClientContext &context, } local_state->column_ids = input.column_ids; - if (bind_data.transaction && !bind_data.read_only) { + if (!bind_data.read_only) { // if we have made other modifications in this transaction we have to use the main connection - local_state->connection = PostgresConnection(bind_data.transaction->GetConnection().GetConnection()); + local_state->connection = PostgresConnection(bind_data.connection.GetConnection()); } else { local_state->connection = PostgresScanConnect(bind_data.dsn, bind_data.in_recovery, bind_data.snapshot); } diff --git a/src/storage/postgres_table_entry.cpp b/src/storage/postgres_table_entry.cpp index 010bbca43..587a5692e 100644 --- a/src/storage/postgres_table_entry.cpp +++ b/src/storage/postgres_table_entry.cpp @@ -44,7 +44,6 @@ TableFunction PostgresTableEntry::GetScanFunction(ClientContext &context, unique result->schema_name = schema.name; result->table_name = name; result->dsn = conn.GetDSN(); - result->transaction = &transaction; result->connection = PostgresConnection(conn.GetConnection()); PostgresScanFunction::PrepareBind(pg_catalog.GetPostgresVersion(), context, *result); diff --git a/test/sql/scanner/postgres_query.test b/test/sql/scanner/postgres_query.test index 78c16ad44..a5331c07e 100644 --- a/test/sql/scanner/postgres_query.test +++ b/test/sql/scanner/postgres_query.test @@ -92,3 +92,15 @@ statement error select * from postgres_query('ddb', 'SELECT * FROM cars'); ---- does not refer to a Postgres database + +# syntax error +statement error +select count(*) from postgres_query('s1', 'SELEC * FROM cars'); +---- +syntax error + +# table does not exist +statement error +select count(*) from postgres_query('s1', 'SELECT * FROM nonexistent_table'); +---- +does not exist From 2770513d120daa8389161dcbb01119169bc0a603 Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Tue, 7 Nov 2023 13:03:33 +0100 Subject: [PATCH 3/3] .1 --- .github/workflows/Linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Linux.yml b/.github/workflows/Linux.yml index 4889db757..4cab71ebd 100644 --- a/.github/workflows/Linux.yml +++ b/.github/workflows/Linux.yml @@ -77,7 +77,7 @@ jobs: aarch64_cross_compile: 1 - name: Setup vcpkg - uses: lukka/run-vcpkg@v11 + uses: lukka/run-vcpkg@v11.1 with: vcpkgGitCommitId: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1