-
Notifications
You must be signed in to change notification settings - Fork 28.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-29359][SQL][TESTS] Better exception handling in (SQL|ThriftServer)QueryTestSuite #26028
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -135,6 +135,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { | |
private val notIncludedMsg = "[not included in comparison]" | ||
private val clsName = this.getClass.getCanonicalName | ||
|
||
protected val emptySchema = StructType(Seq.empty).catalogString | ||
|
||
protected override def sparkConf: SparkConf = super.sparkConf | ||
// Fewer shuffle partitions to speed up testing. | ||
.set(SQLConf.SHUFFLE_PARTITIONS, 4) | ||
|
@@ -323,11 +325,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { | |
} | ||
// Run the SQL queries preparing them for comparison. | ||
val outputs: Seq[QueryOutput] = queries.map { sql => | ||
val (schema, output) = getNormalizedResult(localSparkSession, sql) | ||
val (schema, output) = handleExceptions(getNormalizedResult(localSparkSession, sql)) | ||
// We might need to do some query canonicalization in the future. | ||
QueryOutput( | ||
sql = sql, | ||
schema = schema.catalogString, | ||
schema = schema, | ||
output = output.mkString("\n").replaceAll("\\s+$", "")) | ||
} | ||
|
||
|
@@ -388,49 +390,58 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { | |
} | ||
} | ||
|
||
/** Executes a query and returns the result as (schema of the output, normalized output). */ | ||
private def getNormalizedResult(session: SparkSession, sql: String): (StructType, Seq[String]) = { | ||
// Returns true if the plan is supposed to be sorted. | ||
def isSorted(plan: LogicalPlan): Boolean = plan match { | ||
case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false | ||
case _: DescribeCommandBase | ||
| _: DescribeColumnCommand | ||
| _: DescribeTableStatement | ||
| _: DescribeColumnStatement => true | ||
case PhysicalOperation(_, _, Sort(_, true, _)) => true | ||
case _ => plan.children.iterator.exists(isSorted) | ||
} | ||
|
||
/** | ||
* This method handles exceptions occurred during query execution as they may need special care | ||
* to become comparable to the expected output. | ||
* | ||
* @param result a function that returns a pair of schema and output | ||
*/ | ||
protected def handleExceptions(result: => (String, Seq[String])): (String, Seq[String]) = { | ||
try { | ||
val df = session.sql(sql) | ||
val schema = df.schema | ||
// Get answer, but also get rid of the #1234 expression ids that show up in explain plans | ||
val answer = SQLExecution.withNewExecutionId(session, df.queryExecution, Some(sql)) { | ||
hiveResultString(df.queryExecution.executedPlan).map(replaceNotIncludedMsg) | ||
} | ||
|
||
// If the output is not pre-sorted, sort it. | ||
if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted) | ||
|
||
result | ||
} catch { | ||
case a: AnalysisException => | ||
// Do not output the logical plan tree which contains expression IDs. | ||
// Also implement a crude way of masking expression IDs in the error message | ||
// with a generic pattern "###". | ||
val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage | ||
(StructType(Seq.empty), Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x"))) | ||
(emptySchema, Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x"))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add a test case which this is required? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No particular test case. Since I touched this method and |
||
case s: SparkException if s.getCause != null => | ||
// For a runtime exception, it is hard to match because its message contains | ||
// information of stage, task ID, etc. | ||
// To make result matching simpler, here we match the cause of the exception if it exists. | ||
val cause = s.getCause | ||
(StructType(Seq.empty), Seq(cause.getClass.getName, cause.getMessage)) | ||
(emptySchema, Seq(cause.getClass.getName, cause.getMessage)) | ||
case NonFatal(e) => | ||
// If there is an exception, put the exception class followed by the message. | ||
(StructType(Seq.empty), Seq(e.getClass.getName, e.getMessage)) | ||
(emptySchema, Seq(e.getClass.getName, e.getMessage)) | ||
} | ||
} | ||
|
||
/** Executes a query and returns the result as (schema of the output, normalized output). */ | ||
private def getNormalizedResult(session: SparkSession, sql: String): (String, Seq[String]) = { | ||
// Returns true if the plan is supposed to be sorted. | ||
def isSorted(plan: LogicalPlan): Boolean = plan match { | ||
case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false | ||
case _: DescribeCommandBase | ||
| _: DescribeColumnCommand | ||
| _: DescribeTableStatement | ||
| _: DescribeColumnStatement => true | ||
case PhysicalOperation(_, _, Sort(_, true, _)) => true | ||
case _ => plan.children.iterator.exists(isSorted) | ||
} | ||
|
||
val df = session.sql(sql) | ||
val schema = df.schema.catalogString | ||
// Get answer, but also get rid of the #1234 expression ids that show up in explain plans | ||
val answer = SQLExecution.withNewExecutionId(session, df.queryExecution, Some(sql)) { | ||
hiveResultString(df.queryExecution.executedPlan).map(replaceNotIncludedMsg) | ||
} | ||
|
||
// If the output is not pre-sorted, sort it. | ||
if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted) | ||
} | ||
|
||
protected def replaceNotIncludedMsg(line: String): String = { | ||
line.replaceAll("#\\d+", "#x") | ||
.replaceAll( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add a function description because we override this differently?
(struct<>, ...)
("", answer.sorted)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, both returns a
(String, Seq[String])
tuple where the first is the schema and the second is the result. Since it's impossible to get the exact spark schema back from ajava.sql.ResultSet
we use empty string inThriftServerQueryTestSuite
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've added a description to it and to its override.