From 4ebf5d31cba8739597e77f19e80aa6fa0881e3de Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Fri, 9 Jun 2023 18:03:46 +0800 Subject: [PATCH] Update code --- .../spark/sql/PlanGenerationTestSuite.scala | 12 +++++-- ...extract_all_with_regex_group_index.explain | 2 ++ ...ract_all_without_regex_group_index.explain | 2 ++ ...egexp_instr_with_regex_group_index.explain | 2 ++ ...xp_instr_without_regex_group_index.explain | 2 ++ ...xp_extract_all_with_regex_group_index.json | 33 ++++++++++++++++++ ...tract_all_with_regex_group_index.proto.bin | Bin 0 -> 212 bytes ...extract_all_without_regex_group_index.json | 29 +++++++++++++++ ...ct_all_without_regex_group_index.proto.bin | Bin 0 -> 206 bytes ...n_regexp_instr_with_regex_group_index.json | 33 ++++++++++++++++++ ...exp_instr_with_regex_group_index.proto.bin | Bin 0 -> 203 bytes ...egexp_instr_without_regex_group_index.json | 29 +++++++++++++++ ..._instr_without_regex_group_index.proto.bin | Bin 0 -> 197 bytes 13 files changed, 142 insertions(+), 2 deletions(-) create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_with_regex_group_index.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_without_regex_group_index.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_with_regex_group_index.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_without_regex_group_index.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.proto.bin diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala index df7f5fb1a3e07..ebb171af9ef33 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala @@ -1498,7 +1498,11 @@ class PlanGenerationTestSuite fn.regexp_extract(fn.col("g"), "(\\d+)-(\\d+)", 1) } - functionTest("regexp_extract_all") { + functionTest("regexp_extract_all without regex group index") { + fn.regexp_extract_all(fn.col("g"), lit("(\\d+)([a-z]+)")) + } + + functionTest("regexp_extract_all with regex group index") { fn.regexp_extract_all(fn.col("g"), lit("(\\d+)([a-z]+)"), lit(1)) } @@ -1510,7 +1514,11 @@ class PlanGenerationTestSuite fn.regexp_substr(fn.col("g"), lit("\\d{2}(a|b|m)")) } - functionTest("regexp_instr") { + functionTest("regexp_instr without regex group index") { + fn.regexp_instr(fn.col("g"), lit("\\d+(a|b|m)")) + } + + functionTest("regexp_instr with regex group index") { fn.regexp_instr(fn.col("g"), lit("\\d+(a|b|m)"), lit(1)) } diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_with_regex_group_index.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_with_regex_group_index.explain new file mode 100644 index 0000000000000..225379df43cc8 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_with_regex_group_index.explain @@ -0,0 +1,2 @@ +Project [regexp_extract_all(g#0, (\d+)([a-z]+), 1) AS regexp_extract_all(g, (\d+)([a-z]+), 1)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_without_regex_group_index.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_without_regex_group_index.explain new file mode 100644 index 0000000000000..225379df43cc8 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_without_regex_group_index.explain @@ -0,0 +1,2 @@ +Project [regexp_extract_all(g#0, (\d+)([a-z]+), 1) AS regexp_extract_all(g, (\d+)([a-z]+), 1)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_with_regex_group_index.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_with_regex_group_index.explain new file mode 100644 index 0000000000000..8ee6294b451fd --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_with_regex_group_index.explain @@ -0,0 +1,2 @@ +Project [regexp_instr(g#0, \d+(a|b|m), 1) AS regexp_instr(g, \d+(a|b|m), 1)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_without_regex_group_index.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_without_regex_group_index.explain new file mode 100644 index 0000000000000..e2ff4394a340c --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_without_regex_group_index.explain @@ -0,0 +1,2 @@ +Project [regexp_instr(g#0, \d+(a|b|m), 0) AS regexp_instr(g, \d+(a|b|m), 0)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.json b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.json new file mode 100644 index 0000000000000..ebe2f581e3de2 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.json @@ -0,0 +1,33 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "regexp_extract_all", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }, { + "literal": { + "string": "(\\d+)([a-z]+)" + } + }, { + "literal": { + "integer": 1 + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..2cf31e5f75f4f095e6e5bd14d84e1eda4e0e83fb GIT binary patch literal 212 zcmd;L5@3`%%g8l}k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1H|M6E2~m)b!Mf xg80;mlA^@qlK8}&93fUAW-i8bAwe$wEMAS66m3n7=tSMBSZz%q7A__OMgXi2LZ1Kt literal 0 HcmV?d00001 diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.json b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.json new file mode 100644 index 0000000000000..84a2e378ed2e3 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.json @@ -0,0 +1,29 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "regexp_extract_all", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }, { + "literal": { + "string": "(\\d+)([a-z]+)" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..529cae91ce595978d2cba2f039a9020e3439e230 GIT binary patch literal 206 zcmd;L5@3`%&d4>1k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1GMxJuac5)b!Mf rg80;mlA^@qlK8}&93fUAW-i8bAwe$wEMAS66m3n7=tSMBSZz%J7BfOR literal 0 HcmV?d00001 diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.json b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.json new file mode 100644 index 0000000000000..cb44dda5ba2c2 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.json @@ -0,0 +1,33 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "regexp_instr", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }, { + "literal": { + "string": "\\d+(a|b|m)" + } + }, { + "literal": { + "integer": 1 + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..55cc77eb3cd1f58e24bd4012bd40075e95e73ea6 GIT binary patch literal 203 zcmd;L5@3`%!pJp=k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1Jc$Z7!ao)b!Mf ng80llpr{b55HlBJx)2{1PZn29inc~#O;Sy+rVtAklK~?DEhInw literal 0 HcmV?d00001 diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.json b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.json new file mode 100644 index 0000000000000..eeab13abaa6da --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.json @@ -0,0 +1,29 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "regexp_instr", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }, { + "literal": { + "string": "\\d+(a|b|m)" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..3aee655d92c652950b0a924aa26e70728d57e79b GIT binary patch literal 197 zcmd;L5@3|t$H+B_k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1KQ3RW6>Q)b!Mf hg80llpr{b55HlBJx)2{1PZn29inc~#O;Sy+CIF|QKbZgk literal 0 HcmV?d00001