From 27988069375e5fc0414071850a395679425cfdb2 Mon Sep 17 00:00:00 2001 From: "Ahmad N. F." Date: Tue, 3 Jun 2025 16:33:48 +0700 Subject: [PATCH 1/2] feat: handle reserved keywords in GetOrderedColumns --- mc2mc/internal/client/odps.go | 53 ++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/mc2mc/internal/client/odps.go b/mc2mc/internal/client/odps.go index 46658e9..a9675c9 100644 --- a/mc2mc/internal/client/odps.go +++ b/mc2mc/internal/client/odps.go @@ -21,6 +21,48 @@ type odpsClient struct { isDryRun bool } +var ( + // reserved keywords https://www.alibabacloud.com/help/en/maxcompute/user-guide/reserved-words-and-keywords + reservedKeywords = []string{ + "add", "after", "all", "alter", "analyze", "and", "archive", "array", "as", "asc", + "before", "between", "bigint", "binary", "blob", "boolean", "both", "decimal", + "bucket", "buckets", "by", "cascade", "case", "cast", "cfile", "change", "cluster", + "clustered", "clusterstatus", "collection", "column", "columns", "comment", "compute", + "concatenate", "continue", "create", "cross", "current", "cursor", "data", "database", + "databases", "date", "datetime", "dbproperties", "deferred", "delete", "delimited", + "desc", "describe", "directory", "disable", "distinct", "distribute", "double", "drop", + "else", "enable", "end", "except", "escaped", "exclusive", "exists", "explain", "export", + "extended", "external", "false", "fetch", "fields", "fileformat", "first", "float", + "following", "format", "formatted", "from", "full", "function", "functions", "grant", + "group", "having", "hold_ddltime", "idxproperties", "if", "import", "in", "index", + "indexes", "inpath", "inputdriver", "inputformat", "insert", "int", "intersect", "into", + "is", "items", "join", "keys", "lateral", "left", "lifecycle", "like", "limit", "lines", + "load", "local", "location", "lock", "locks", "long", "map", "mapjoin", "materialized", + "minus", "msck", "not", "no_drop", "null", "of", "offline", "offset", "on", "option", + "or", "order", "out", "outer", "outputdriver", "outputformat", "over", "overwrite", + "partition", "partitioned", "partitionproperties", "partitions", "percent", "plus", + "preceding", "preserve", "procedure", "purge", "range", "rcfile", "read", "readonly", + "reads", "rebuild", "recordreader", "recordwriter", "reduce", "regexp", "rename", + "repair", "replace", "restrict", "revoke", "right", "rlike", "row", "rows", "schema", + "schemas", "select", "semi", "sequencefile", "serde", "serdeproperties", "set", "shared", + "show", "show_database", "smallint", "sort", "sorted", "ssl", "statistics", "status", + "stored", "streamtable", "string", "struct", "table", "tables", "tablesample", + "tblproperties", "temporary", "terminated", "textfile", "then", "timestamp", "tinyint", + "to", "touch", "transform", "trigger", "true", "type", "unarchive", "unbounded", "undo", + "union", "uniontype", "uniquejoin", "unlock", "unsigned", "update", "use", "using", + "utc", "utc_timestamp", "view", "when", "where", "while", "div", + } + + reservedKeywordsMap map[string]bool +) + +func init() { + reservedKeywordsMap = make(map[string]bool, len(reservedKeywords)) + for _, keyword := range reservedKeywords { + reservedKeywordsMap[keyword] = true + } +} + // NewODPSClient creates a new odpsClient instance func NewODPSClient(logger *slog.Logger, client *odps.Odps) *odpsClient { return &odpsClient{ @@ -95,6 +137,15 @@ func (c *odpsClient) GetPartitionNames(_ context.Context, tableID string) ([]str return partitionNames, nil } +func sanitizeColumnName(columnName string) string { + // if column name is a reserved keyword, add backticks around it + if _, ok := reservedKeywordsMap[strings.ToLower(columnName)]; ok { + return fmt.Sprintf("`%s`", columnName) + } + + return columnName +} + // GetOrderedColumns returns the ordered column names of the given table // by querying the table schema. func (c *odpsClient) GetOrderedColumns(tableID string) ([]string, error) { @@ -104,7 +155,7 @@ func (c *odpsClient) GetOrderedColumns(tableID string) ([]string, error) { } var columnNames []string for _, column := range table.Schema().Columns { - columnNames = append(columnNames, column.Name) + columnNames = append(columnNames, sanitizeColumnName(column.Name)) } return columnNames, nil From 99e01251d1b76cdcae23b741ebce535246ac7259 Mon Sep 17 00:00:00 2001 From: "Ahmad N. F." Date: Wed, 4 Jun 2025 07:52:49 +0700 Subject: [PATCH 2/2] move to new helper file --- mc2mc/internal/client/helper.go | 57 +++++++++++++++++++++++++++++++++ mc2mc/internal/client/odps.go | 51 ----------------------------- 2 files changed, 57 insertions(+), 51 deletions(-) create mode 100644 mc2mc/internal/client/helper.go diff --git a/mc2mc/internal/client/helper.go b/mc2mc/internal/client/helper.go new file mode 100644 index 0000000..add750f --- /dev/null +++ b/mc2mc/internal/client/helper.go @@ -0,0 +1,57 @@ +package client + +import ( + "fmt" + "strings" +) + +var ( + // reserved keywords https://www.alibabacloud.com/help/en/maxcompute/user-guide/reserved-words-and-keywords + reservedKeywords = []string{ + "add", "after", "all", "alter", "analyze", "and", "archive", "array", "as", "asc", + "before", "between", "bigint", "binary", "blob", "boolean", "both", "decimal", + "bucket", "buckets", "by", "cascade", "case", "cast", "cfile", "change", "cluster", + "clustered", "clusterstatus", "collection", "column", "columns", "comment", "compute", + "concatenate", "continue", "create", "cross", "current", "cursor", "data", "database", + "databases", "date", "datetime", "dbproperties", "deferred", "delete", "delimited", + "desc", "describe", "directory", "disable", "distinct", "distribute", "double", "drop", + "else", "enable", "end", "except", "escaped", "exclusive", "exists", "explain", "export", + "extended", "external", "false", "fetch", "fields", "fileformat", "first", "float", + "following", "format", "formatted", "from", "full", "function", "functions", "grant", + "group", "having", "hold_ddltime", "idxproperties", "if", "import", "in", "index", + "indexes", "inpath", "inputdriver", "inputformat", "insert", "int", "intersect", "into", + "is", "items", "join", "keys", "lateral", "left", "lifecycle", "like", "limit", "lines", + "load", "local", "location", "lock", "locks", "long", "map", "mapjoin", "materialized", + "minus", "msck", "not", "no_drop", "null", "of", "offline", "offset", "on", "option", + "or", "order", "out", "outer", "outputdriver", "outputformat", "over", "overwrite", + "partition", "partitioned", "partitionproperties", "partitions", "percent", "plus", + "preceding", "preserve", "procedure", "purge", "range", "rcfile", "read", "readonly", + "reads", "rebuild", "recordreader", "recordwriter", "reduce", "regexp", "rename", + "repair", "replace", "restrict", "revoke", "right", "rlike", "row", "rows", "schema", + "schemas", "select", "semi", "sequencefile", "serde", "serdeproperties", "set", "shared", + "show", "show_database", "smallint", "sort", "sorted", "ssl", "statistics", "status", + "stored", "streamtable", "string", "struct", "table", "tables", "tablesample", + "tblproperties", "temporary", "terminated", "textfile", "then", "timestamp", "tinyint", + "to", "touch", "transform", "trigger", "true", "type", "unarchive", "unbounded", "undo", + "union", "uniontype", "uniquejoin", "unlock", "unsigned", "update", "use", "using", + "utc", "utc_timestamp", "view", "when", "where", "while", "div", + } + + reservedKeywordsMap map[string]bool +) + +func init() { + reservedKeywordsMap = make(map[string]bool, len(reservedKeywords)) + for _, keyword := range reservedKeywords { + reservedKeywordsMap[keyword] = true + } +} + +func sanitizeColumnName(columnName string) string { + // if column name is a reserved keyword, add backticks around it + if _, ok := reservedKeywordsMap[strings.ToLower(columnName)]; ok { + return fmt.Sprintf("`%s`", columnName) + } + + return columnName +} diff --git a/mc2mc/internal/client/odps.go b/mc2mc/internal/client/odps.go index a9675c9..e90d6cf 100644 --- a/mc2mc/internal/client/odps.go +++ b/mc2mc/internal/client/odps.go @@ -21,48 +21,6 @@ type odpsClient struct { isDryRun bool } -var ( - // reserved keywords https://www.alibabacloud.com/help/en/maxcompute/user-guide/reserved-words-and-keywords - reservedKeywords = []string{ - "add", "after", "all", "alter", "analyze", "and", "archive", "array", "as", "asc", - "before", "between", "bigint", "binary", "blob", "boolean", "both", "decimal", - "bucket", "buckets", "by", "cascade", "case", "cast", "cfile", "change", "cluster", - "clustered", "clusterstatus", "collection", "column", "columns", "comment", "compute", - "concatenate", "continue", "create", "cross", "current", "cursor", "data", "database", - "databases", "date", "datetime", "dbproperties", "deferred", "delete", "delimited", - "desc", "describe", "directory", "disable", "distinct", "distribute", "double", "drop", - "else", "enable", "end", "except", "escaped", "exclusive", "exists", "explain", "export", - "extended", "external", "false", "fetch", "fields", "fileformat", "first", "float", - "following", "format", "formatted", "from", "full", "function", "functions", "grant", - "group", "having", "hold_ddltime", "idxproperties", "if", "import", "in", "index", - "indexes", "inpath", "inputdriver", "inputformat", "insert", "int", "intersect", "into", - "is", "items", "join", "keys", "lateral", "left", "lifecycle", "like", "limit", "lines", - "load", "local", "location", "lock", "locks", "long", "map", "mapjoin", "materialized", - "minus", "msck", "not", "no_drop", "null", "of", "offline", "offset", "on", "option", - "or", "order", "out", "outer", "outputdriver", "outputformat", "over", "overwrite", - "partition", "partitioned", "partitionproperties", "partitions", "percent", "plus", - "preceding", "preserve", "procedure", "purge", "range", "rcfile", "read", "readonly", - "reads", "rebuild", "recordreader", "recordwriter", "reduce", "regexp", "rename", - "repair", "replace", "restrict", "revoke", "right", "rlike", "row", "rows", "schema", - "schemas", "select", "semi", "sequencefile", "serde", "serdeproperties", "set", "shared", - "show", "show_database", "smallint", "sort", "sorted", "ssl", "statistics", "status", - "stored", "streamtable", "string", "struct", "table", "tables", "tablesample", - "tblproperties", "temporary", "terminated", "textfile", "then", "timestamp", "tinyint", - "to", "touch", "transform", "trigger", "true", "type", "unarchive", "unbounded", "undo", - "union", "uniontype", "uniquejoin", "unlock", "unsigned", "update", "use", "using", - "utc", "utc_timestamp", "view", "when", "where", "while", "div", - } - - reservedKeywordsMap map[string]bool -) - -func init() { - reservedKeywordsMap = make(map[string]bool, len(reservedKeywords)) - for _, keyword := range reservedKeywords { - reservedKeywordsMap[keyword] = true - } -} - // NewODPSClient creates a new odpsClient instance func NewODPSClient(logger *slog.Logger, client *odps.Odps) *odpsClient { return &odpsClient{ @@ -137,15 +95,6 @@ func (c *odpsClient) GetPartitionNames(_ context.Context, tableID string) ([]str return partitionNames, nil } -func sanitizeColumnName(columnName string) string { - // if column name is a reserved keyword, add backticks around it - if _, ok := reservedKeywordsMap[strings.ToLower(columnName)]; ok { - return fmt.Sprintf("`%s`", columnName) - } - - return columnName -} - // GetOrderedColumns returns the ordered column names of the given table // by querying the table schema. func (c *odpsClient) GetOrderedColumns(tableID string) ([]string, error) {