Skip to content

Commit

Permalink
Correctly preserve the result attribute of python UDFs though transfo…
Browse files Browse the repository at this point in the history
…rmations
  • Loading branch information
marmbrus committed Oct 8, 2014
1 parent f18dd59 commit 9533286
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
execution.PhysicalRDD(Nil, singleRowRdd) :: Nil
case logical.Repartition(expressions, child) =>
execution.Exchange(HashPartitioning(expressions, numPartitions), planLater(child)) :: Nil
case e @ EvaluatePython(udf, child) =>
case e @ EvaluatePython(udf, child, _) =>
BatchPythonEvaluation(udf, e.output, planLater(child)) :: Nil
case LogicalRDD(output, rdd) => PhysicalRDD(output, rdd) :: Nil
case _ => Nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,21 @@ private[spark] object ExtractPythonUdfs extends Rule[LogicalPlan] {
}
}

object EvaluatePython {
def apply(udf: PythonUDF, child: LogicalPlan) =
new EvaluatePython(udf, child, AttributeReference("pythonUDF", udf.dataType)())
}

/**
* :: DeveloperApi ::
* Evaluates a [[PythonUDF]], appending the result to the end of the input tuple.
*/
@DeveloperApi
case class EvaluatePython(udf: PythonUDF, child: LogicalPlan) extends logical.UnaryNode {
val resultAttribute = AttributeReference("pythonUDF", udf.dataType, nullable=true)()
case class EvaluatePython(
udf: PythonUDF,
child: LogicalPlan,
resultAttribute: AttributeReference)
extends logical.UnaryNode {

def output = child.output :+ resultAttribute
}
Expand Down

0 comments on commit 9533286

Please sign in to comment.