Skip to content

Commit

Permalink
add org.apache.spark.sql.internal.connector.SupportsPushDownCatalystF…
Browse files Browse the repository at this point in the history
…ilters
  • Loading branch information
huaxingao committed Aug 31, 2021
1 parent 095a7b4 commit 68ace26
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 3 deletions.
@@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.internal.connector

import org.apache.spark.sql.catalyst.expressions.Expression

/**
* A mix-in interface for {@link FileScanBuilder}. This can be used to push down partitionFilters
* and dataFilters to FileIndex in the format of catalyst Expression.
*/
trait SupportsPushDownCatalystFilters {
/**
* Pushes down partitionFilters and dataFilters to FileIndex in the format of catalyst
* Expression. These catalyst Expression filters are used for partition pruning. The dataFilters
* are also translated into data source filters and used for selecting records.
*/
def pushCatalystFilters(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Unit
}
Expand Up @@ -22,13 +22,17 @@ import org.apache.spark.sql.{sources, SparkSession}
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.connector.read.{ScanBuilder, SupportsPushDownRequiredColumns}
import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, PartitioningAwareFileIndex, PartitioningUtils}
import org.apache.spark.sql.internal.connector.SupportsPushDownCatalystFilters
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types.StructType

abstract class FileScanBuilder(
sparkSession: SparkSession,
fileIndex: PartitioningAwareFileIndex,
dataSchema: StructType) extends ScanBuilder with SupportsPushDownRequiredColumns {
dataSchema: StructType)
extends ScanBuilder
with SupportsPushDownRequiredColumns
with SupportsPushDownCatalystFilters {
private val partitionSchema = fileIndex.partitionSchema
private val isCaseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
protected val supportsNestedSchemaPruning = false
Expand Down Expand Up @@ -66,7 +70,9 @@ abstract class FileScanBuilder(

// Note: The partitionFilters and dataFilters need to be pushed to FileIndex in the format of
// Expression because partition pruning uses the Expression Filters, not sources.Filters.
def pushFilters(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Unit = {
override def pushCatalystFilters(
partitionFilters: Seq[Expression],
dataFilters: Seq[Expression]): Unit = {
this.partitionFilters = partitionFilters
this.dataFilters = dataFilters
val translatedFilters = mutable.ArrayBuffer.empty[sources.Filter]
Expand Down
Expand Up @@ -74,7 +74,7 @@ object PushDownUtils extends PredicateHelper {
val (partitionFilters, dataFilters) =
DataSourceUtils.getPartitionKeyFiltersAndDataFilters(
f.getSparkSession, scanBuilderHolder.relation, f.readPartitionSchema(), filters)
f.pushFilters(ExpressionSet(partitionFilters).toSeq, dataFilters)
f.pushCatalystFilters(ExpressionSet(partitionFilters).toSeq, dataFilters)
(Nil, dataFilters)
case _ => (Nil, filters)
}
Expand Down

0 comments on commit 68ace26

Please sign in to comment.