apache · gatorsmile · Dec 23, 2015 · Dec 23, 2015 · Dec 23, 2015 · Dec 24, 2015
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -47,6 +47,7 @@ object DefaultOptimizer extends Optimizer {
       PushPredicateThroughProject,
       PushPredicateThroughGenerate,
       PushPredicateThroughAggregate,
+      PushLimitThroughOuterJoin,
       ColumnPruning,
       // Operator combine
       ProjectCollapsing,
@@ -857,6 +858,30 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
   }
 }
 
+/**
+  * Push [[Limit]] operators through [[Join]] operators, iff the join type is outer joins.
+  * Adding extra [[Limit]] operators on top of the outer-side child/children.
+  */
+object PushLimitThroughOuterJoin extends Rule[LogicalPlan] with PredicateHelper {
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case f @ Limit(expr, Join(left, right, joinType, joinCondition)) =>
+      joinType match {
+        case RightOuter =>
+          Limit(expr, Join(left, CombineLimits(Limit(expr, right)), joinType, joinCondition))
+        case LeftOuter =>
+          Limit(expr, Join(CombineLimits(Limit(expr, left)), right, joinType, joinCondition))
+        case FullOuter =>
+          Limit(expr,
+            Join(
+              CombineLimits(Limit(expr, left)),
+              CombineLimits(Limit(expr, right)),
+              joinType, joinCondition))
+        case _ => f // DO Nothing for the other join types
+      }
+  }
+}
+
 /**
  * Removes [[Cast Casts]] that are unnecessary because the input is already the correct type.
  */

diff --git a/...catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/...catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.{LeftSemi, PlanTest, LeftOuter, RightOuter}
+import org.apache.spark.sql.catalyst.plans.{FullOuter, LeftSemi, LeftOuter, PlanTest, RightOuter}
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -41,6 +41,7 @@ class FilterPushdownSuite extends PlanTest {
         PushPredicateThroughJoin,
         PushPredicateThroughGenerate,
         PushPredicateThroughAggregate,
+        PushLimitThroughOuterJoin,
         ColumnPruning,
         ProjectCollapsing) :: Nil
   }
@@ -750,4 +751,56 @@ class FilterPushdownSuite extends PlanTest {
 
     comparePlans(optimized, correctAnswer)
   }
+
+  test("limit: push down left outer join") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+
+    val originalQuery = {
+      x.join(y, LeftOuter)
+        .limit(1)
+    }
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val left = testRelation.limit(1)
+    val correctAnswer =
+      left.join(y, LeftOuter).limit(1).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("limit: push down right outer join") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+
+    val originalQuery = {
+      x.join(y, RightOuter)
+        .limit(1)
+    }
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val right = testRelation.limit(1)
+    val correctAnswer =
+      x.join(right, RightOuter).limit(1).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("limit: push down full outer join") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+
+    val originalQuery = {
+      x.join(y, FullOuter)
+        .limit(1)
+    }
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val left = testRelation.limit(1)
+    val right = testRelation.limit(1)
+    val correctAnswer =
+      left.join(right, FullOuter).limit(1).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import scala.collection.immutable.HashSet
 import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}