Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 63 additions & 1 deletion gpcontrib/gp_exttable_fdw/input/gp_exttable_fdw.source
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,18 @@ OPTIONS (format_type 'c', delimiter ',',
location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv',
reject_limit_type 'p', reject_limit '120');

-- Error, invalid encoding
-- Error, invalid encoding (negative numeric ID)
CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int)
SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',', encoding '-1',
location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv');

-- Error, invalid encoding (unknown name)
CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int)
SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',', encoding 'bogus',
location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv');

-- OK, no execute_on | log_errors | encoding | is_writable option
CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int)
SERVER gp_exttable_server
Expand All @@ -79,3 +85,59 @@ SELECT urilocation FROM pg_exttable WHERE reloid = 'public.ext_special_uri'::reg
SELECT ftoptions FROM pg_foreign_table WHERE ftrelid='public.ext_special_uri'::regclass;
\a
SELECT * FROM ext_special_uri ORDER BY a;

-- ===================================================================
-- Tests for issue #1726: FDW OPTIONS encoding accepts both numeric IDs
-- and symbolic names (UTF8, utf-8, GBK, ...). Names previously parsed
-- via atoi() and silently degraded to SQL_ASCII.
-- ===================================================================

-- Numeric form (baseline; worked before the fix as well).
CREATE FOREIGN TABLE ext_enc_num (a int) SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',',
location_uris 'file:///tmp/ext_enc_ignored.csv',
encoding '6');
SELECT pg_encoding_to_char(encoding) FROM pg_exttable
WHERE reloid = 'ext_enc_num'::regclass;

-- Symbolic name 'UTF8' — used to be silently SQL_ASCII (the bug).
CREATE FOREIGN TABLE ext_enc_utf8 (a int) SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',',
location_uris 'file:///tmp/ext_enc_ignored.csv',
encoding 'UTF8');
SELECT pg_encoding_to_char(encoding) FROM pg_exttable
WHERE reloid = 'ext_enc_utf8'::regclass;

-- Case + dash variant resolved by pg_char_to_encoding().
CREATE FOREIGN TABLE ext_enc_utf8_dash (a int) SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',',
location_uris 'file:///tmp/ext_enc_ignored.csv',
encoding 'utf-8');
SELECT pg_encoding_to_char(encoding) FROM pg_exttable
WHERE reloid = 'ext_enc_utf8_dash'::regclass;

-- Non-UTF8 symbolic name.
CREATE FOREIGN TABLE ext_enc_gbk (a int) SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',',
location_uris 'file:///tmp/ext_enc_ignored.csv',
encoding 'GBK');
SELECT pg_encoding_to_char(encoding) FROM pg_exttable
WHERE reloid = 'ext_enc_gbk'::regclass;

-- ALTER FOREIGN TABLE ... OPTIONS (SET encoding 'UTF8') — same code
-- path, this proves the read-side resolution works after an ALTER too.
CREATE FOREIGN TABLE ext_enc_alter (a int) SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',',
location_uris 'file:///tmp/ext_enc_ignored.csv',
encoding '0');
SELECT pg_encoding_to_char(encoding) FROM pg_exttable
WHERE reloid = 'ext_enc_alter'::regclass;
ALTER FOREIGN TABLE ext_enc_alter OPTIONS (SET encoding 'UTF8');
SELECT pg_encoding_to_char(encoding) FROM pg_exttable
WHERE reloid = 'ext_enc_alter'::regclass;

DROP FOREIGN TABLE ext_enc_num;
DROP FOREIGN TABLE ext_enc_utf8;
DROP FOREIGN TABLE ext_enc_utf8_dash;
DROP FOREIGN TABLE ext_enc_gbk;
DROP FOREIGN TABLE ext_enc_alter;
12 changes: 7 additions & 5 deletions gpcontrib/gp_exttable_fdw/option.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,13 @@ gp_exttable_permission_check(PG_FUNCTION_ARGS)
}
else if(pg_strcasecmp(def->defname, "encoding") == 0)
{
char *encoding = (char *) defGetString(def);
if (!PG_VALID_ENCODING(atoi(encoding)))
ereport(ERROR,
(errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE),
errmsg("%s is not a valid encoding code", encoding)));
/*
* Accept either a symbolic encoding name (e.g. 'UTF8', 'GBK')
* or a numeric encoding ID. Reject anything else explicitly,
* rather than letting atoi() silently mistranslate non-numeric
* names to SQL_ASCII.
*/
(void) parse_fdw_encoding_option((char *) defGetString(def));
}
}

Expand Down
89 changes: 87 additions & 2 deletions gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,18 @@ OPTIONS (format_type 'c', delimiter ',',
location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv',
reject_limit_type 'p', reject_limit '120');
ERROR: segment reject limit in PERCENT must be between 1 and 100 (got 120) (seg1 127.0.0.1:7003 pid=5173)
-- Error, invalid encoding
-- Error, invalid encoding (negative numeric ID)
CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int)
SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',', encoding '-1',
location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv');
ERROR: -1 is not a valid encoding code (seg0 127.0.0.1:7002 pid=8289)
ERROR: "-1" is not a valid encoding name or code
-- Error, invalid encoding (unknown name)
CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int)
SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',', encoding 'bogus',
location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv');
ERROR: "bogus" is not a valid encoding name or code
-- OK, no execute_on | log_errors | encoding | is_writable option
CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int)
SERVER gp_exttable_server
Expand Down Expand Up @@ -96,3 +102,82 @@ SELECT * FROM ext_special_uri ORDER BY a;
3 | 3
(3 rows)

-- ===================================================================
-- Tests for issue #1726: FDW OPTIONS encoding accepts both numeric IDs
-- and symbolic names (UTF8, utf-8, GBK, ...). Names previously parsed
-- via atoi() and silently degraded to SQL_ASCII.
-- ===================================================================
-- Numeric form (baseline; worked before the fix as well).
CREATE FOREIGN TABLE ext_enc_num (a int) SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',',
location_uris 'file:///tmp/ext_enc_ignored.csv',
encoding '6');
SELECT pg_encoding_to_char(encoding) FROM pg_exttable
WHERE reloid = 'ext_enc_num'::regclass;
pg_encoding_to_char
---------------------
UTF8
(1 row)

-- Symbolic name 'UTF8' — used to be silently SQL_ASCII (the bug).
CREATE FOREIGN TABLE ext_enc_utf8 (a int) SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',',
location_uris 'file:///tmp/ext_enc_ignored.csv',
encoding 'UTF8');
SELECT pg_encoding_to_char(encoding) FROM pg_exttable
WHERE reloid = 'ext_enc_utf8'::regclass;
pg_encoding_to_char
---------------------
UTF8
(1 row)

-- Case + dash variant resolved by pg_char_to_encoding().
CREATE FOREIGN TABLE ext_enc_utf8_dash (a int) SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',',
location_uris 'file:///tmp/ext_enc_ignored.csv',
encoding 'utf-8');
SELECT pg_encoding_to_char(encoding) FROM pg_exttable
WHERE reloid = 'ext_enc_utf8_dash'::regclass;
pg_encoding_to_char
---------------------
UTF8
(1 row)

-- Non-UTF8 symbolic name.
CREATE FOREIGN TABLE ext_enc_gbk (a int) SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',',
location_uris 'file:///tmp/ext_enc_ignored.csv',
encoding 'GBK');
SELECT pg_encoding_to_char(encoding) FROM pg_exttable
WHERE reloid = 'ext_enc_gbk'::regclass;
pg_encoding_to_char
---------------------
GBK
(1 row)

-- ALTER FOREIGN TABLE ... OPTIONS (SET encoding 'UTF8') — same code
-- path, this proves the read-side resolution works after an ALTER too.
CREATE FOREIGN TABLE ext_enc_alter (a int) SERVER gp_exttable_server
OPTIONS (format_type 'c', delimiter ',',
location_uris 'file:///tmp/ext_enc_ignored.csv',
encoding '0');
SELECT pg_encoding_to_char(encoding) FROM pg_exttable
WHERE reloid = 'ext_enc_alter'::regclass;
pg_encoding_to_char
---------------------
SQL_ASCII
(1 row)

ALTER FOREIGN TABLE ext_enc_alter OPTIONS (SET encoding 'UTF8');
SELECT pg_encoding_to_char(encoding) FROM pg_exttable
WHERE reloid = 'ext_enc_alter'::regclass;
pg_encoding_to_char
---------------------
UTF8
(1 row)

DROP FOREIGN TABLE ext_enc_num;
DROP FOREIGN TABLE ext_enc_utf8;
DROP FOREIGN TABLE ext_enc_utf8_dash;
DROP FOREIGN TABLE ext_enc_gbk;
DROP FOREIGN TABLE ext_enc_alter;
43 changes: 42 additions & 1 deletion src/backend/access/external/external.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,47 @@

static List *create_external_scan_uri_list(ExtTableEntry *ext, bool *ismasteronly);

/*
* parse_fdw_encoding_option
*
* Parse the value of an "encoding" FDW OPTIONS entry (whether on creation,
* during validation, or when reading back stored ftoptions) into a numeric
* encoding ID. Accepts a symbolic encoding name (e.g. "UTF8", "utf-8", "GBK")
* resolved via pg_char_to_encoding(), or a strictly numeric string (e.g. "6")
* validated via PG_VALID_ENCODING(). Anything else raises ERROR.
*
* Note: atoi() is intentionally avoided in the numeric fallback. atoi("UTF8")
* silently returns 0 (= SQL_ASCII), which is exactly the bug this helper
* exists to fix. strtol() with end-of-string and range checks is strict.
*/
int
parse_fdw_encoding_option(const char *value)
{
int encoding;
char *endptr;
long n;

if (value == NULL || *value == '\0')
ereport(ERROR,
(errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE),
errmsg("encoding option must not be empty")));

encoding = pg_char_to_encoding(value);
if (encoding >= 0)
return encoding;

errno = 0;
n = strtol(value, &endptr, 10);
if (endptr != value && *endptr == '\0' && errno == 0 &&
n >= 0 && PG_VALID_ENCODING((int) n))
return (int) n;

ereport(ERROR,
(errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE),
errmsg("\"%s\" is not a valid encoding name or code", value)));
return -1; /* unreachable, keeps compiler happy */
}

void
gfile_printf_then_putc_newline(const char *format,...)
{
Expand Down Expand Up @@ -277,7 +318,7 @@ GetExtFromForeignTableOptions(List *ftoptons, Oid relid)

if (pg_strcasecmp(def->defname, "encoding") == 0)
{
extentry->encoding = atoi(defGetString(def));
extentry->encoding = parse_fdw_encoding_option(defGetString(def));
encoding_found = true;
continue;
}
Expand Down
5 changes: 5 additions & 0 deletions src/include/access/external.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,10 @@ extern ExtTableEntry *GetExtFromForeignTableOptions(List *ftoptons, Oid relid);

extern ExternalScanInfo *MakeExternalScanInfo(ExtTableEntry *extEntry);

/*
* Parse an "encoding" FDW OPTIONS value (symbolic name or numeric string)
* into a numeric encoding ID. ereports on invalid input.
*/
extern int parse_fdw_encoding_option(const char *value);

#endif /* EXTERNAL_H */