From 8f6e6f6f98483f52df2550f5ec0fa317b5ca36f9 Mon Sep 17 00:00:00 2001 From: Dery Rahman Ahaddienata Date: Tue, 27 May 2025 12:42:35 +0700 Subject: [PATCH 1/4] fix: ddl without variable --- mc2mc/internal/query/builder.go | 2 +- mc2mc/internal/query/builder_test.go | 52 ++++++++++++++++++++++++---- mc2mc/internal/query/helper.go | 23 ++++++------ 3 files changed, 59 insertions(+), 18 deletions(-) diff --git a/mc2mc/internal/query/builder.go b/mc2mc/internal/query/builder.go index a2a9ac0..8678a0e 100644 --- a/mc2mc/internal/query/builder.go +++ b/mc2mc/internal/query/builder.go @@ -200,7 +200,7 @@ func (b *Builder) constructMergeQuery(hrs, vars, queries []string) string { if headers != "" { builder.WriteString(fmt.Sprintf("%s\n", headers)) } - if variables != "" { + if variables != "" && !IsDDL(q) { // skip variables if it's ddl builder.WriteString(fmt.Sprintf("%s\n", variables)) } builder.WriteString(fmt.Sprintf("%s\n;", q)) diff --git a/mc2mc/internal/query/builder_test.go b/mc2mc/internal/query/builder_test.go index 28862d2..fc1be6f 100644 --- a/mc2mc/internal/query/builder_test.go +++ b/mc2mc/internal/query/builder_test.go @@ -475,7 +475,7 @@ SET append_test.id = 2;` DROP TABLE IF EXISTS append_tmp; @src := SELECT 1 id; -CREATE TABLE append_tmp AS SELECT * FROM @src; +CREATE TABLE append_tmp AS SELECT * FROM sample_table; MERGE INTO append_test USING (SELECT * FROM @src) source @@ -498,9 +498,7 @@ DROP TABLE IF EXISTS append_tmp --*--optimus-break-marker--*-- SET odps.table.append2.enable=true ; -@src := SELECT 1 id -; -CREATE TABLE append_tmp AS SELECT * FROM @src +CREATE TABLE append_tmp AS SELECT * FROM sample_table ; --*--optimus-break-marker--*-- SET odps.table.append2.enable=true @@ -726,7 +724,7 @@ SET append_test.id = 2 DROP TABLE IF EXISTS append_tmp; @src := SELECT 1 id; -CREATE TABLE append_tmp AS SELECT * FROM @src; +CREATE TABLE append_tmp AS SELECT * FROM sample_table; @src2 := SELECT id FROM append_tmp; @@ -750,9 +748,51 @@ DROP TABLE IF EXISTS append_tmp --*--optimus-break-marker--*-- SET odps.table.append2.enable=true ; +CREATE TABLE append_tmp AS SELECT * FROM sample_table +; +--*--optimus-break-marker--*-- +SET odps.table.append2.enable=true +; @src := SELECT 1 id ; -CREATE TABLE append_tmp AS SELECT * FROM @src +@src2 := SELECT id FROM append_tmp +; +MERGE INTO append_test +USING (SELECT * FROM @src2) source +on append_test.id = source.id +WHEN MATCHED THEN UPDATE +SET append_test.id = 2 +;`, query) + }) + t.Run("returns query for merge load method with correct ddl ordering", func(t *testing.T) { + queryToExecute := `SET odps.table.append2.enable=true; +@src := SELECT 1 id; +@src2 := SELECT id FROM append_tmp; +DROP TABLE IF EXISTS append_tmp; + +CREATE TABLE append_tmp AS SELECT * FROM sample_table; + +MERGE INTO append_test +USING (SELECT * FROM @src2) source +on append_test.id = source.id +WHEN MATCHED THEN UPDATE +SET append_test.id = 2;` + odspClient := &mockOdpsClient{} + query, err := query.NewBuilder( + logger.NewDefaultLogger(), + odspClient, + query.WithQuery(queryToExecute), + query.WithMethod(query.MERGE), + ).Build() + assert.NoError(t, err) + assert.Equal(t, `SET odps.table.append2.enable=true +; +DROP TABLE IF EXISTS append_tmp +; +--*--optimus-break-marker--*-- +SET odps.table.append2.enable=true +; +CREATE TABLE append_tmp AS SELECT * FROM sample_table ; --*--optimus-break-marker--*-- SET odps.table.append2.enable=true diff --git a/mc2mc/internal/query/helper.go b/mc2mc/internal/query/helper.go index db03183..d97bed7 100644 --- a/mc2mc/internal/query/helper.go +++ b/mc2mc/internal/query/helper.go @@ -11,15 +11,15 @@ const ( ) var ( - semicolonPattern = regexp.MustCompile(`;\s*(\n+|$)`) // regex to match semicolons - commentPattern = regexp.MustCompile(`--[^\n]*`) // regex to match comments - multiCommentPattern = regexp.MustCompile(`(?s)/\*.*?\*/`) // regex to match multi-line comments - headerPattern = regexp.MustCompile(`(?i)^set`) // regex to match header statements - variablePattern = regexp.MustCompile(`(?i)^@`) // regex to match variable statements - dropPattern = regexp.MustCompile(`(?i)^DROP\s+`) // regex to match DROP statements - udfPattern = regexp.MustCompile(`(?i)^function\s+`) // regex to match UDF statements - ddlPattern = regexp.MustCompile(`(?i)^CREATE\s+`) // regex to match DDL statements - stringPattern = regexp.MustCompile(`'[^']*'`) // regex to match SQL strings (anything inside single quotes) + semicolonPattern = regexp.MustCompile(`;\s*(\n+|$)`) // regex to match semicolons + commentPattern = regexp.MustCompile(`--[^\n]*`) // regex to match comments + multiCommentPattern = regexp.MustCompile(`(?s)/\*.*?\*/`) // regex to match multi-line comments + headerPattern = regexp.MustCompile(`(?i)^set`) // regex to match header statements + variablePattern = regexp.MustCompile(`(?i)^@`) // regex to match variable statements + dropPattern = regexp.MustCompile(`(?i)^DROP\s+`) // regex to match DROP statements + udfPattern = regexp.MustCompile(`(?i)^function\s+`) // regex to match UDF statements + ddlPattern = regexp.MustCompile(`(?i)^(CREATE|ALTER|DROP|TRUNCATE)\s+`) // regex to match DDL statements + stringPattern = regexp.MustCompile(`'[^']*'`) // regex to match SQL strings (anything inside single quotes) ) func SplitQueryComponents(query string) (headers []string, varsUDFs []string, queries []string) { @@ -227,6 +227,7 @@ func RestoreStringLiteral(query string, placeholders map[string]string) string { return query } -func IsDDL(query string) bool { - return ddlPattern.MatchString(query) +func IsDDL(stmt string) bool { + stmtWithoutComment := RemoveComments(stmt) + return ddlPattern.MatchString(strings.TrimSpace(stmtWithoutComment)) } From ca1d9f31ec9042507d7c436076bc9e6318d55ed1 Mon Sep 17 00:00:00 2001 From: Dery Rahman Ahaddienata Date: Tue, 27 May 2025 16:46:10 +0700 Subject: [PATCH 2/4] fix: create statement as dml --- mc2mc/internal/query/builder_test.go | 28 ++++++++++++++++++++++------ mc2mc/internal/query/helper.go | 4 +++- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/mc2mc/internal/query/builder_test.go b/mc2mc/internal/query/builder_test.go index fc1be6f..8120e42 100644 --- a/mc2mc/internal/query/builder_test.go +++ b/mc2mc/internal/query/builder_test.go @@ -475,7 +475,7 @@ SET append_test.id = 2;` DROP TABLE IF EXISTS append_tmp; @src := SELECT 1 id; -CREATE TABLE append_tmp AS SELECT * FROM sample_table; +CREATE TABLE append_tmp AS SELECT * FROM @src; MERGE INTO append_test USING (SELECT * FROM @src) source @@ -498,7 +498,9 @@ DROP TABLE IF EXISTS append_tmp --*--optimus-break-marker--*-- SET odps.table.append2.enable=true ; -CREATE TABLE append_tmp AS SELECT * FROM sample_table +@src := SELECT 1 id +; +CREATE TABLE append_tmp AS SELECT * FROM @src ; --*--optimus-break-marker--*-- SET odps.table.append2.enable=true @@ -724,7 +726,7 @@ SET append_test.id = 2 DROP TABLE IF EXISTS append_tmp; @src := SELECT 1 id; -CREATE TABLE append_tmp AS SELECT * FROM sample_table; +CREATE TABLE append_tmp AS SELECT * FROM @src; @src2 := SELECT id FROM append_tmp; @@ -748,7 +750,9 @@ DROP TABLE IF EXISTS append_tmp --*--optimus-break-marker--*-- SET odps.table.append2.enable=true ; -CREATE TABLE append_tmp AS SELECT * FROM sample_table +@src := SELECT 1 id +; +CREATE TABLE append_tmp AS SELECT * FROM @src ; --*--optimus-break-marker--*-- SET odps.table.append2.enable=true @@ -767,10 +771,13 @@ SET append_test.id = 2 t.Run("returns query for merge load method with correct ddl ordering", func(t *testing.T) { queryToExecute := `SET odps.table.append2.enable=true; @src := SELECT 1 id; + @src2 := SELECT id FROM append_tmp; DROP TABLE IF EXISTS append_tmp; -CREATE TABLE append_tmp AS SELECT * FROM sample_table; +CREATE TABLE append_tmp AS SELECT * FROM @src; + +CREATE TABLE append_tmp2(id bigint); MERGE INTO append_test USING (SELECT * FROM @src2) source @@ -792,7 +799,16 @@ DROP TABLE IF EXISTS append_tmp --*--optimus-break-marker--*-- SET odps.table.append2.enable=true ; -CREATE TABLE append_tmp AS SELECT * FROM sample_table +@src := SELECT 1 id +; +@src2 := SELECT id FROM append_tmp +; +CREATE TABLE append_tmp AS SELECT * FROM @src +; +--*--optimus-break-marker--*-- +SET odps.table.append2.enable=true +; +CREATE TABLE append_tmp2(id bigint) ; --*--optimus-break-marker--*-- SET odps.table.append2.enable=true diff --git a/mc2mc/internal/query/helper.go b/mc2mc/internal/query/helper.go index d97bed7..af7aeff 100644 --- a/mc2mc/internal/query/helper.go +++ b/mc2mc/internal/query/helper.go @@ -20,6 +20,8 @@ var ( udfPattern = regexp.MustCompile(`(?i)^function\s+`) // regex to match UDF statements ddlPattern = regexp.MustCompile(`(?i)^(CREATE|ALTER|DROP|TRUNCATE)\s+`) // regex to match DDL statements stringPattern = regexp.MustCompile(`'[^']*'`) // regex to match SQL strings (anything inside single quotes) + // special case for DML CREATE TABLE statements + dmlCreatePattern = regexp.MustCompile(`(?i)^CREATE\s+TABLE\s+(IF\s+NOT\s+EXISTS\s+)?[^\s]+\s+(LIFECYCLE\s+\d+\s+)?AS\s+`) // regex to match DML CREATE TABLE statements ) func SplitQueryComponents(query string) (headers []string, varsUDFs []string, queries []string) { @@ -229,5 +231,5 @@ func RestoreStringLiteral(query string, placeholders map[string]string) string { func IsDDL(stmt string) bool { stmtWithoutComment := RemoveComments(stmt) - return ddlPattern.MatchString(strings.TrimSpace(stmtWithoutComment)) + return ddlPattern.MatchString(strings.TrimSpace(stmtWithoutComment)) && !dmlCreatePattern.MatchString(strings.TrimSpace(stmtWithoutComment)) } From 2d71a1e6846d9d316752b6e566750e218f2f6611 Mon Sep 17 00:00:00 2001 From: Dery Rahman Ahaddienata Date: Tue, 27 May 2025 17:16:03 +0700 Subject: [PATCH 3/4] fix: create statement as dml with properties --- mc2mc/internal/query/helper.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mc2mc/internal/query/helper.go b/mc2mc/internal/query/helper.go index af7aeff..3096f49 100644 --- a/mc2mc/internal/query/helper.go +++ b/mc2mc/internal/query/helper.go @@ -21,7 +21,7 @@ var ( ddlPattern = regexp.MustCompile(`(?i)^(CREATE|ALTER|DROP|TRUNCATE)\s+`) // regex to match DDL statements stringPattern = regexp.MustCompile(`'[^']*'`) // regex to match SQL strings (anything inside single quotes) // special case for DML CREATE TABLE statements - dmlCreatePattern = regexp.MustCompile(`(?i)^CREATE\s+TABLE\s+(IF\s+NOT\s+EXISTS\s+)?[^\s]+\s+(LIFECYCLE\s+\d+\s+)?AS\s+`) // regex to match DML CREATE TABLE statements + dmlCreatePattern = regexp.MustCompile(`(?i)^CREATE\s+TABLE\s+(IF\s+NOT\s+EXISTS\s+)?[^\s]+\s+(TBLPROPERTIES\s*\([^\)]+\)\s+)?(LIFECYCLE\s+\d+\s+)?AS\s+`) // regex to match DML CREATE TABLE statements ) func SplitQueryComponents(query string) (headers []string, varsUDFs []string, queries []string) { From e4e2dea543bdb6889b15b7f7f066b7cf08d91d8d Mon Sep 17 00:00:00 2001 From: Dery Rahman Ahaddienata Date: Tue, 27 May 2025 17:22:49 +0700 Subject: [PATCH 4/4] fix: use regex create ddl statement --- mc2mc/internal/query/helper.go | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/mc2mc/internal/query/helper.go b/mc2mc/internal/query/helper.go index 3096f49..050ecc2 100644 --- a/mc2mc/internal/query/helper.go +++ b/mc2mc/internal/query/helper.go @@ -11,17 +11,16 @@ const ( ) var ( - semicolonPattern = regexp.MustCompile(`;\s*(\n+|$)`) // regex to match semicolons - commentPattern = regexp.MustCompile(`--[^\n]*`) // regex to match comments - multiCommentPattern = regexp.MustCompile(`(?s)/\*.*?\*/`) // regex to match multi-line comments - headerPattern = regexp.MustCompile(`(?i)^set`) // regex to match header statements - variablePattern = regexp.MustCompile(`(?i)^@`) // regex to match variable statements - dropPattern = regexp.MustCompile(`(?i)^DROP\s+`) // regex to match DROP statements - udfPattern = regexp.MustCompile(`(?i)^function\s+`) // regex to match UDF statements - ddlPattern = regexp.MustCompile(`(?i)^(CREATE|ALTER|DROP|TRUNCATE)\s+`) // regex to match DDL statements - stringPattern = regexp.MustCompile(`'[^']*'`) // regex to match SQL strings (anything inside single quotes) - // special case for DML CREATE TABLE statements - dmlCreatePattern = regexp.MustCompile(`(?i)^CREATE\s+TABLE\s+(IF\s+NOT\s+EXISTS\s+)?[^\s]+\s+(TBLPROPERTIES\s*\([^\)]+\)\s+)?(LIFECYCLE\s+\d+\s+)?AS\s+`) // regex to match DML CREATE TABLE statements + semicolonPattern = regexp.MustCompile(`;\s*(\n+|$)`) // regex to match semicolons + commentPattern = regexp.MustCompile(`--[^\n]*`) // regex to match comments + multiCommentPattern = regexp.MustCompile(`(?s)/\*.*?\*/`) // regex to match multi-line comments + headerPattern = regexp.MustCompile(`(?i)^set`) // regex to match header statements + variablePattern = regexp.MustCompile(`(?i)^@`) // regex to match variable statements + dropPattern = regexp.MustCompile(`(?i)^DROP\s+`) // regex to match DROP statements + udfPattern = regexp.MustCompile(`(?i)^function\s+`) // regex to match UDF statements + ddlPattern = regexp.MustCompile(`(?i)^(ALTER|DROP|TRUNCATE)\s+`) // regex to match DDL statements + ddlCreatePattern = regexp.MustCompile(`(?i)^(CREATE\s+TABLE\s+[^\s]+\s*\()`) // regex to match CREATE DDL statements + stringPattern = regexp.MustCompile(`'[^']*'`) // regex to match SQL strings (anything inside single quotes) ) func SplitQueryComponents(query string) (headers []string, varsUDFs []string, queries []string) { @@ -231,5 +230,5 @@ func RestoreStringLiteral(query string, placeholders map[string]string) string { func IsDDL(stmt string) bool { stmtWithoutComment := RemoveComments(stmt) - return ddlPattern.MatchString(strings.TrimSpace(stmtWithoutComment)) && !dmlCreatePattern.MatchString(strings.TrimSpace(stmtWithoutComment)) + return ddlPattern.MatchString(strings.TrimSpace(stmtWithoutComment)) || ddlCreatePattern.MatchString(strings.TrimSpace(stmtWithoutComment)) }