Skip to content

Commit

Permalink
[SPARK-12288] [SQL] Support UnsafeRow in Coalesce/Except/Intersect.
Browse files Browse the repository at this point in the history
Support UnsafeRow for the Coalesce/Except/Intersect.

Could you review if my code changes are ok? davies Thank you!

Author: gatorsmile <gatorsmile@gmail.com>

Closes #10285 from gatorsmile/unsafeSupportCIE.
  • Loading branch information
gatorsmile authored and davies committed Dec 15, 2015
1 parent d13ff82 commit 606f99b
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ case class Union(children: Seq[SparkPlan]) extends SparkPlan {
}
}
}
override def outputsUnsafeRows: Boolean = children.forall(_.outputsUnsafeRows)
override def outputsUnsafeRows: Boolean = children.exists(_.outputsUnsafeRows)
override def canProcessUnsafeRows: Boolean = true
override def canProcessSafeRows: Boolean = true
protected override def doExecute(): RDD[InternalRow] =
Expand Down Expand Up @@ -250,7 +250,9 @@ case class Coalesce(numPartitions: Int, child: SparkPlan) extends UnaryNode {
child.execute().coalesce(numPartitions, shuffle = false)
}

override def outputsUnsafeRows: Boolean = child.outputsUnsafeRows
override def canProcessUnsafeRows: Boolean = true
override def canProcessSafeRows: Boolean = true
}

/**
Expand All @@ -263,6 +265,10 @@ case class Except(left: SparkPlan, right: SparkPlan) extends BinaryNode {
protected override def doExecute(): RDD[InternalRow] = {
left.execute().map(_.copy()).subtract(right.execute().map(_.copy()))
}

override def outputsUnsafeRows: Boolean = children.exists(_.outputsUnsafeRows)
override def canProcessUnsafeRows: Boolean = true
override def canProcessSafeRows: Boolean = true
}

/**
Expand All @@ -275,6 +281,10 @@ case class Intersect(left: SparkPlan, right: SparkPlan) extends BinaryNode {
protected override def doExecute(): RDD[InternalRow] = {
left.execute().map(_.copy()).intersection(right.execute().map(_.copy()))
}

override def outputsUnsafeRows: Boolean = children.exists(_.outputsUnsafeRows)
override def canProcessUnsafeRows: Boolean = true
override def canProcessSafeRows: Boolean = true
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,41 @@ class RowFormatConvertersSuite extends SparkPlanTest with SharedSQLContext {
assert(!preparedPlan.outputsUnsafeRows)
}

test("coalesce can process unsafe rows") {
val plan = Coalesce(1, outputsUnsafe)
val preparedPlan = sqlContext.prepareForExecution.execute(plan)
assert(getConverters(preparedPlan).size === 1)
assert(preparedPlan.outputsUnsafeRows)
}

test("except can process unsafe rows") {
val plan = Except(outputsUnsafe, outputsUnsafe)
val preparedPlan = sqlContext.prepareForExecution.execute(plan)
assert(getConverters(preparedPlan).size === 2)
assert(preparedPlan.outputsUnsafeRows)
}

test("except requires all of its input rows' formats to agree") {
val plan = Except(outputsSafe, outputsUnsafe)
assert(plan.canProcessSafeRows && plan.canProcessUnsafeRows)
val preparedPlan = sqlContext.prepareForExecution.execute(plan)
assert(preparedPlan.outputsUnsafeRows)
}

test("intersect can process unsafe rows") {
val plan = Intersect(outputsUnsafe, outputsUnsafe)
val preparedPlan = sqlContext.prepareForExecution.execute(plan)
assert(getConverters(preparedPlan).size === 2)
assert(preparedPlan.outputsUnsafeRows)
}

test("intersect requires all of its input rows' formats to agree") {
val plan = Intersect(outputsSafe, outputsUnsafe)
assert(plan.canProcessSafeRows && plan.canProcessUnsafeRows)
val preparedPlan = sqlContext.prepareForExecution.execute(plan)
assert(preparedPlan.outputsUnsafeRows)
}

test("execute() fails an assertion if inputs rows are of different formats") {
val e = intercept[AssertionError] {
Union(Seq(outputsSafe, outputsUnsafe)).execute()
Expand Down

0 comments on commit 606f99b

Please sign in to comment.