Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-43935][SQL][PYTHON][CONNECT] Add xpath_* functions to Scala and Python #41470

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4282,6 +4282,93 @@ object functions {
def array_except(col1: Column, col2: Column): Column =
Column.fn("array_except", col1, col2)

/**
* Returns a string array of values within the nodes of xml that match the XPath expression.
*
* @group "xml_funcs"
* @since 3.5.0
*/
def xpath(xml: Column, path: Column): Column =
Column.fn("xpath", xml, path)

/**
* Returns true if the XPath expression evaluates to true, or if a matching node is found.
*
* @group "xml_funcs"
* @since 3.5.0
*/
def xpath_boolean(xml: Column, path: Column): Column =
Column.fn("xpath_boolean", xml, path)

/**
* Returns a double value, the value zero if no match is found, or NaN if a match is found but
* the value is non-numeric.
*
* @group "xml_funcs"
* @since 3.5.0
*/
def xpath_double(xml: Column, path: Column): Column =
Column.fn("xpath_double", xml, path)

/**
* Returns a double value, the value zero if no match is found, or NaN if a match is found but
* the value is non-numeric.
*
* @group "xml_funcs"
* @since 3.5.0
*/
def xpath_number(xml: Column, path: Column): Column =
Column.fn("xpath_number", xml, path)

/**
* Returns a float value, the value zero if no match is found, or NaN if a match is found but
* the value is non-numeric.
*
* @group "xml_funcs"
* @since 3.5.0
*/
def xpath_float(xml: Column, path: Column): Column =
Column.fn("xpath_float", xml, path)

/**
* Returns an integer value, or the value zero if no match is found, or a match is found but the
* value is non-numeric.
*
* @group "xml_funcs"
* @since 3.5.0
*/
def xpath_int(xml: Column, path: Column): Column =
Column.fn("xpath_int", xml, path)

/**
* Returns a long integer value, or the value zero if no match is found, or a match is found but
* the value is non-numeric.
*
* @group "xml_funcs"
* @since 3.5.0
*/
def xpath_long(xml: Column, path: Column): Column =
Column.fn("xpath_long", xml, path)

/**
* Returns a short integer value, or the value zero if no match is found, or a match is found
* but the value is non-numeric.
*
* @group "xml_funcs"
* @since 3.5.0
*/
def xpath_short(xml: Column, path: Column): Column =
Column.fn("xpath_short", xml, path)

/**
* Returns the text contents of the first xml node that matches the XPath expression.
*
* @group "xml_funcs"
* @since 3.5.0
*/
def xpath_string(xml: Column, path: Column): Column =
Column.fn("xpath_string", xml, path)

private def newLambdaVariable(name: String): proto.Expression.UnresolvedNamedLambdaVariable = {
proto.Expression.UnresolvedNamedLambdaVariable
.newBuilder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1655,6 +1655,42 @@ class PlanGenerationTestSuite
fn.to_date(fn.col("s"), "yyyy-MM-dd")
}

temporalFunctionTest("xpath") {
fn.xpath(fn.col("s"), lit("a/b/text()"))
}

temporalFunctionTest("xpath_boolean") {
fn.xpath_boolean(fn.col("s"), lit("a/b"))
}

temporalFunctionTest("xpath_double") {
fn.xpath_double(fn.col("s"), lit("a/b"))
}

temporalFunctionTest("xpath_number") {
fn.xpath_number(fn.col("s"), lit("a/b"))
}

temporalFunctionTest("xpath_float") {
fn.xpath_float(fn.col("s"), lit("a/b"))
}

temporalFunctionTest("xpath_int") {
fn.xpath_int(fn.col("s"), lit("a/b"))
}

temporalFunctionTest("xpath_long") {
fn.xpath_long(fn.col("s"), lit("a/b"))
}

temporalFunctionTest("xpath_short") {
fn.xpath_short(fn.col("s"), lit("a/b"))
}

temporalFunctionTest("xpath_string") {
fn.xpath_string(fn.col("s"), lit("a/b"))
}

temporalFunctionTest("unix_date") {
fn.unix_date(fn.to_date(fn.col("s"), "yyyy-MM-dd"))
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [xpath(s#0, a/b/text()) AS xpath(s, a/b/text())#0]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [xpath_boolean(s#0, a/b) AS xpath_boolean(s, a/b)#0]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [xpath_double(s#0, a/b) AS xpath_double(s, a/b)#0]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [xpath_float(s#0, a/b) AS xpath_float(s, a/b)#0]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [xpath_int(s#0, a/b) AS xpath_int(s, a/b)#0]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [xpath_long(s#0, a/b) AS xpath_long(s, a/b)#0L]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [xpath_number(s#0, a/b) AS xpath_number(s, a/b)#0]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [xpath_short(s#0, a/b) AS xpath_short(s, a/b)#0]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [xpath_string(s#0, a/b) AS xpath_string(s, a/b)#0]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"common": {
"planId": "1"
},
"project": {
"input": {
"common": {
"planId": "0"
},
"localRelation": {
"schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
}
},
"expressions": [{
"unresolvedFunction": {
"functionName": "xpath",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "s"
}
}, {
"literal": {
"string": "a/b/text()"
}
}]
}
}]
}
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"common": {
"planId": "1"
},
"project": {
"input": {
"common": {
"planId": "0"
},
"localRelation": {
"schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
}
},
"expressions": [{
"unresolvedFunction": {
"functionName": "xpath_boolean",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "s"
}
}, {
"literal": {
"string": "a/b"
}
}]
}
}]
}
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"common": {
"planId": "1"
},
"project": {
"input": {
"common": {
"planId": "0"
},
"localRelation": {
"schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
}
},
"expressions": [{
"unresolvedFunction": {
"functionName": "xpath_double",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "s"
}
}, {
"literal": {
"string": "a/b"
}
}]
}
}]
}
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"common": {
"planId": "1"
},
"project": {
"input": {
"common": {
"planId": "0"
},
"localRelation": {
"schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
}
},
"expressions": [{
"unresolvedFunction": {
"functionName": "xpath_float",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "s"
}
}, {
"literal": {
"string": "a/b"
}
}]
}
}]
}
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"common": {
"planId": "1"
},
"project": {
"input": {
"common": {
"planId": "0"
},
"localRelation": {
"schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
}
},
"expressions": [{
"unresolvedFunction": {
"functionName": "xpath_int",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "s"
}
}, {
"literal": {
"string": "a/b"
}
}]
}
}]
}
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"common": {
"planId": "1"
},
"project": {
"input": {
"common": {
"planId": "0"
},
"localRelation": {
"schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
}
},
"expressions": [{
"unresolvedFunction": {
"functionName": "xpath_long",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "s"
}
}, {
"literal": {
"string": "a/b"
}
}]
}
}]
}
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"common": {
"planId": "1"
},
"project": {
"input": {
"common": {
"planId": "0"
},
"localRelation": {
"schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
}
},
"expressions": [{
"unresolvedFunction": {
"functionName": "xpath_number",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "s"
}
}, {
"literal": {
"string": "a/b"
}
}]
}
}]
}
}
Binary file not shown.