Skip to content

Commit

Permalink
Implement the local intersect operator
Browse files Browse the repository at this point in the history
  • Loading branch information
zsxwing committed Sep 2, 2015
1 parent d1acc2a commit 4ccca2a
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution.local

import scala.collection.mutable

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.Attribute

case class IntersectNode(left: LocalNode, right: LocalNode) extends BinaryLocalNode {

override def output: Seq[Attribute] = left.output

private[this] var leftRows: mutable.HashSet[InternalRow] = _

private[this] var currentRow: InternalRow = _

override def open(): Unit = {
left.open()
leftRows = mutable.HashSet[InternalRow]()
while (left.next()) {
leftRows += left.fetch().copy()
}
left.close()
right.open()
}

override def next(): Boolean = {
currentRow = null
while (currentRow == null && right.next()) {
currentRow = right.fetch()
if (!leftRows.contains(currentRow)) {
currentRow = null
}
}
currentRow != null
}

override def fetch(): InternalRow = currentRow

override def close(): Unit = {
left.close()
right.close()
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution.local

class IntersectNodeSuite extends LocalNodeTest {

test("basic") {
val input1 = (1 to 10).map(i => (i, i.toString)).toDF("key", "value")
val input2 = (1 to 10).filter(_ % 2 == 0).map(i => (i, i.toString)).toDF("key", "value")

checkAnswer2(
input1,
input2,
(node1, node2) => IntersectNode(node1, node2),
input1.intersect(input2).collect()
)
}
}

0 comments on commit 4ccca2a

Please sign in to comment.