diff --git a/gpcontrib/gp_exttable_fdw/input/gp_exttable_fdw.source b/gpcontrib/gp_exttable_fdw/input/gp_exttable_fdw.source index 41012e73c81..81754fbc996 100644 --- a/gpcontrib/gp_exttable_fdw/input/gp_exttable_fdw.source +++ b/gpcontrib/gp_exttable_fdw/input/gp_exttable_fdw.source @@ -53,12 +53,18 @@ OPTIONS (format_type 'c', delimiter ',', location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv', reject_limit_type 'p', reject_limit '120'); --- Error, invalid encoding +-- Error, invalid encoding (negative numeric ID) CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) SERVER gp_exttable_server OPTIONS (format_type 'c', delimiter ',', encoding '-1', location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); +-- Error, invalid encoding (unknown name) +CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) +SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', encoding 'bogus', + location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); + -- OK, no execute_on | log_errors | encoding | is_writable option CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) SERVER gp_exttable_server @@ -79,3 +85,59 @@ SELECT urilocation FROM pg_exttable WHERE reloid = 'public.ext_special_uri'::reg SELECT ftoptions FROM pg_foreign_table WHERE ftrelid='public.ext_special_uri'::regclass; \a SELECT * FROM ext_special_uri ORDER BY a; + +-- =================================================================== +-- Tests for issue #1726: FDW OPTIONS encoding accepts both numeric IDs +-- and symbolic names (UTF8, utf-8, GBK, ...). Names previously parsed +-- via atoi() and silently degraded to SQL_ASCII. +-- =================================================================== + +-- Numeric form (baseline; worked before the fix as well). +CREATE FOREIGN TABLE ext_enc_num (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding '6'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_num'::regclass; + +-- Symbolic name 'UTF8' — used to be silently SQL_ASCII (the bug). +CREATE FOREIGN TABLE ext_enc_utf8 (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'UTF8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_utf8'::regclass; + +-- Case + dash variant resolved by pg_char_to_encoding(). +CREATE FOREIGN TABLE ext_enc_utf8_dash (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'utf-8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_utf8_dash'::regclass; + +-- Non-UTF8 symbolic name. +CREATE FOREIGN TABLE ext_enc_gbk (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'GBK'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_gbk'::regclass; + +-- ALTER FOREIGN TABLE ... OPTIONS (SET encoding 'UTF8') — same code +-- path, this proves the read-side resolution works after an ALTER too. +CREATE FOREIGN TABLE ext_enc_alter (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding '0'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_alter'::regclass; +ALTER FOREIGN TABLE ext_enc_alter OPTIONS (SET encoding 'UTF8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_alter'::regclass; + +DROP FOREIGN TABLE ext_enc_num; +DROP FOREIGN TABLE ext_enc_utf8; +DROP FOREIGN TABLE ext_enc_utf8_dash; +DROP FOREIGN TABLE ext_enc_gbk; +DROP FOREIGN TABLE ext_enc_alter; diff --git a/gpcontrib/gp_exttable_fdw/option.c b/gpcontrib/gp_exttable_fdw/option.c index 04cccfe0e47..59bd6b99014 100644 --- a/gpcontrib/gp_exttable_fdw/option.c +++ b/gpcontrib/gp_exttable_fdw/option.c @@ -135,11 +135,13 @@ gp_exttable_permission_check(PG_FUNCTION_ARGS) } else if(pg_strcasecmp(def->defname, "encoding") == 0) { - char *encoding = (char *) defGetString(def); - if (!PG_VALID_ENCODING(atoi(encoding))) - ereport(ERROR, - (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), - errmsg("%s is not a valid encoding code", encoding))); + /* + * Accept either a symbolic encoding name (e.g. 'UTF8', 'GBK') + * or a numeric encoding ID. Reject anything else explicitly, + * rather than letting atoi() silently mistranslate non-numeric + * names to SQL_ASCII. + */ + (void) parse_fdw_encoding_option((char *) defGetString(def)); } } diff --git a/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source b/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source index a3191eb0853..8f863448265 100644 --- a/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source +++ b/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source @@ -52,12 +52,18 @@ OPTIONS (format_type 'c', delimiter ',', location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv', reject_limit_type 'p', reject_limit '120'); ERROR: segment reject limit in PERCENT must be between 1 and 100 (got 120) (seg1 127.0.0.1:7003 pid=5173) --- Error, invalid encoding +-- Error, invalid encoding (negative numeric ID) CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) SERVER gp_exttable_server OPTIONS (format_type 'c', delimiter ',', encoding '-1', location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); -ERROR: -1 is not a valid encoding code (seg0 127.0.0.1:7002 pid=8289) +ERROR: "-1" is not a valid encoding name or code +-- Error, invalid encoding (unknown name) +CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) +SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', encoding 'bogus', + location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); +ERROR: "bogus" is not a valid encoding name or code -- OK, no execute_on | log_errors | encoding | is_writable option CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) SERVER gp_exttable_server @@ -96,3 +102,82 @@ SELECT * FROM ext_special_uri ORDER BY a; 3 | 3 (3 rows) +-- =================================================================== +-- Tests for issue #1726: FDW OPTIONS encoding accepts both numeric IDs +-- and symbolic names (UTF8, utf-8, GBK, ...). Names previously parsed +-- via atoi() and silently degraded to SQL_ASCII. +-- =================================================================== +-- Numeric form (baseline; worked before the fix as well). +CREATE FOREIGN TABLE ext_enc_num (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding '6'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_num'::regclass; + pg_encoding_to_char +--------------------- + UTF8 +(1 row) + +-- Symbolic name 'UTF8' — used to be silently SQL_ASCII (the bug). +CREATE FOREIGN TABLE ext_enc_utf8 (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'UTF8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_utf8'::regclass; + pg_encoding_to_char +--------------------- + UTF8 +(1 row) + +-- Case + dash variant resolved by pg_char_to_encoding(). +CREATE FOREIGN TABLE ext_enc_utf8_dash (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'utf-8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_utf8_dash'::regclass; + pg_encoding_to_char +--------------------- + UTF8 +(1 row) + +-- Non-UTF8 symbolic name. +CREATE FOREIGN TABLE ext_enc_gbk (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'GBK'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_gbk'::regclass; + pg_encoding_to_char +--------------------- + GBK +(1 row) + +-- ALTER FOREIGN TABLE ... OPTIONS (SET encoding 'UTF8') — same code +-- path, this proves the read-side resolution works after an ALTER too. +CREATE FOREIGN TABLE ext_enc_alter (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding '0'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_alter'::regclass; + pg_encoding_to_char +--------------------- + SQL_ASCII +(1 row) + +ALTER FOREIGN TABLE ext_enc_alter OPTIONS (SET encoding 'UTF8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_alter'::regclass; + pg_encoding_to_char +--------------------- + UTF8 +(1 row) + +DROP FOREIGN TABLE ext_enc_num; +DROP FOREIGN TABLE ext_enc_utf8; +DROP FOREIGN TABLE ext_enc_utf8_dash; +DROP FOREIGN TABLE ext_enc_gbk; +DROP FOREIGN TABLE ext_enc_alter; diff --git a/src/backend/access/external/external.c b/src/backend/access/external/external.c index 62cff75db53..e42b4255af3 100644 --- a/src/backend/access/external/external.c +++ b/src/backend/access/external/external.c @@ -34,6 +34,47 @@ static List *create_external_scan_uri_list(ExtTableEntry *ext, bool *ismasteronly); +/* + * parse_fdw_encoding_option + * + * Parse the value of an "encoding" FDW OPTIONS entry (whether on creation, + * during validation, or when reading back stored ftoptions) into a numeric + * encoding ID. Accepts a symbolic encoding name (e.g. "UTF8", "utf-8", "GBK") + * resolved via pg_char_to_encoding(), or a strictly numeric string (e.g. "6") + * validated via PG_VALID_ENCODING(). Anything else raises ERROR. + * + * Note: atoi() is intentionally avoided in the numeric fallback. atoi("UTF8") + * silently returns 0 (= SQL_ASCII), which is exactly the bug this helper + * exists to fix. strtol() with end-of-string and range checks is strict. + */ +int +parse_fdw_encoding_option(const char *value) +{ + int encoding; + char *endptr; + long n; + + if (value == NULL || *value == '\0') + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("encoding option must not be empty"))); + + encoding = pg_char_to_encoding(value); + if (encoding >= 0) + return encoding; + + errno = 0; + n = strtol(value, &endptr, 10); + if (endptr != value && *endptr == '\0' && errno == 0 && + n >= 0 && PG_VALID_ENCODING((int) n)) + return (int) n; + + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("\"%s\" is not a valid encoding name or code", value))); + return -1; /* unreachable, keeps compiler happy */ +} + void gfile_printf_then_putc_newline(const char *format,...) { @@ -277,7 +318,7 @@ GetExtFromForeignTableOptions(List *ftoptons, Oid relid) if (pg_strcasecmp(def->defname, "encoding") == 0) { - extentry->encoding = atoi(defGetString(def)); + extentry->encoding = parse_fdw_encoding_option(defGetString(def)); encoding_found = true; continue; } diff --git a/src/include/access/external.h b/src/include/access/external.h index 35933f54f75..453c3d59179 100644 --- a/src/include/access/external.h +++ b/src/include/access/external.h @@ -48,5 +48,10 @@ extern ExtTableEntry *GetExtFromForeignTableOptions(List *ftoptons, Oid relid); extern ExternalScanInfo *MakeExternalScanInfo(ExtTableEntry *extEntry); +/* + * Parse an "encoding" FDW OPTIONS value (symbolic name or numeric string) + * into a numeric encoding ID. ereports on invalid input. + */ +extern int parse_fdw_encoding_option(const char *value); #endif /* EXTERNAL_H */