Skip to content

Commit

Permalink
[SPARK-48229][SQL] Add collation support for inputFile expressions
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Introduce collation awareness for inputFile expressions: input_file_name.

### Why are the changes needed?
Add collation support for inputFile expressions in Spark.

### Does this PR introduce _any_ user-facing change?
Yes, users should now be able to use collated strings within arguments for inputFile functions: input_file_name.

### How was this patch tested?
E2e sql tests.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #46503 from uros-db/input-file-block.

Authored-by: Uros Bojanic <157381213+uros-db@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
uros-db authored and cloud-fan committed May 14, 2024
1 parent 7974811 commit 9241b8e
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ import org.apache.spark.rdd.InputFileBlockHolder
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral}
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.types.{DataType, LongType, StringType}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{DataType, LongType}
import org.apache.spark.unsafe.types.UTF8String

// scalastyle:off whitespace.end.of.line
Expand All @@ -39,7 +40,7 @@ case class InputFileName() extends LeafExpression with Nondeterministic {

override def nullable: Boolean = false

override def dataType: DataType = StringType
override def dataType: DataType = SQLConf.get.defaultStringType

override def prettyName: String = "input_file_name"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1275,6 +1275,23 @@ class CollationSQLExpressionsSuite
})
}

test("Support InputFileName expression with collation") {
// Supported collations
Seq("UTF8_BINARY", "UTF8_BINARY_LCASE", "UNICODE", "UNICODE_CI").foreach(collationName => {
val query =
s"""
|select input_file_name()
|""".stripMargin
// Result
withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
val testQuery = sql(query)
checkAnswer(testQuery, Row(""))
val dataType = StringType(collationName)
assert(testQuery.schema.fields.head.dataType.sameType(dataType))
}
})
}

// TODO: Add more tests for other SQL expressions

}
Expand Down

0 comments on commit 9241b8e

Please sign in to comment.