Skip to content

Commit

Permalink
Merge e89b957 into 6a9437b
Browse files Browse the repository at this point in the history
  • Loading branch information
karenfeng committed Apr 22, 2019
2 parents 6a9437b + e89b957 commit 42d11ca
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@
*/
package org.bdgenomics.adam.cli

import htsjdk.samtools.ValidationStringency
import java.time.Instant
import org.apache.parquet.filter2.dsl.Dsl._
import java.lang.{ Boolean => JBoolean }

import htsjdk.samtools.ValidationStringency
import org.apache.parquet.filter2.predicate.FilterApi
import org.apache.parquet.filter2.predicate.Operators.BooleanColumn
import org.apache.spark.SparkContext
import org.apache.spark.storage.StorageLevel
import org.bdgenomics.adam.algorithms.consensus._
Expand Down Expand Up @@ -478,7 +481,8 @@ class TransformAlignments(protected val args: TransformAlignmentsArgs) extends B
args.useAlignedReadPredicate ||
args.limitProjection) {
val pred = if (args.useAlignedReadPredicate) {
Some(BooleanColumn("readMapped") === true)
Some(FilterApi.eq[JBoolean, BooleanColumn](
FilterApi.booleanColumn("readMapped"), true))
} else if (args.regionPredicate != null) {
Some(ReferenceRegion.createPredicate(
ReferenceRegion.fromString(args.regionPredicate).toSeq: _*
Expand Down
5 changes: 0 additions & 5 deletions adam-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,6 @@
<artifactId>parquet-avro</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-scala_2.10</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.seqdoop</groupId>
<artifactId>hadoop-bam</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@
*/
package org.bdgenomics.adam.models

import java.lang.{ Long => JLong }

import com.esotericsoftware.kryo.io.{ Input, Output }
import com.esotericsoftware.kryo.{ Kryo, Serializer }
import org.apache.parquet.filter2.dsl.Dsl._
import org.apache.parquet.filter2.predicate.FilterPredicate
import org.apache.parquet.filter2.predicate.{ FilterApi, FilterPredicate }
import org.apache.parquet.filter2.predicate.Operators.{ BinaryColumn, LongColumn }
import org.apache.parquet.io.api.Binary
import org.bdgenomics.formats.avro._
import org.bdgenomics.utils.interval.array.Interval
import scala.math.{ max, min }
Expand Down Expand Up @@ -373,7 +376,7 @@ object ReferenceRegion {
"Cannot create a predicate from an empty set of regions.")
regions.toIterable
.map(_.toPredicate)
.reduce(_ || _)
.reduce(FilterApi.or)
}
}

Expand Down Expand Up @@ -745,9 +748,12 @@ case class ReferenceRegion(
* region.
*/
def toPredicate: FilterPredicate = {
((LongColumn("end") > start) &&
(LongColumn("start") <= end) &&
(BinaryColumn("referenceName") === referenceName))
FilterApi.and(
FilterApi.and(
FilterApi.gt[JLong, LongColumn](FilterApi.longColumn("end"), start),
FilterApi.ltEq[JLong, LongColumn](FilterApi.longColumn("start"), end)),
FilterApi.eq[Binary, BinaryColumn](
FilterApi.binaryColumn("referenceName"), Binary.fromString(referenceName)))
}

override def hashCode: Int = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ import htsjdk.samtools.{
ValidationStringency
}
import java.io.{ File, FileNotFoundException }
import java.lang.{ Long => JLong }
import com.google.common.io.Files
import org.apache.hadoop.fs.Path
import org.apache.parquet.filter2.dsl.Dsl._
import org.apache.parquet.filter2.predicate.FilterPredicate
import org.apache.parquet.filter2.predicate.Operators.LongColumn
import org.apache.parquet.filter2.predicate.{ FilterApi, FilterPredicate }
import org.apache.parquet.hadoop.metadata.CompressionCodecName
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.models._
Expand Down Expand Up @@ -355,7 +356,8 @@ class ADAMContextSuite extends ADAMFunSuite {
val loc = tmpLocation()
variants.saveAsParquet(loc, 1024, 1024) // force more than one row group (block)

val pred: FilterPredicate = (LongColumn("start") === 16097631L)
val pred: FilterPredicate = FilterApi.eq[JLong, LongColumn](
FilterApi.longColumn("start"), 16097631L)
// the following only reads one row group
val adamVariants = sc.loadParquetVariants(loc, optPredicate = Some(pred))
assert(adamVariants.rdd.count === 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
package org.bdgenomics.adam.rdd.contig

import java.io.File
import java.lang.{ Long => JLong }

import com.google.common.io.Files
import org.apache.parquet.filter2.dsl.Dsl._
import org.apache.parquet.filter2.predicate.FilterPredicate
import org.apache.parquet.filter2.predicate.Operators.{ BinaryColumn, LongColumn }
import org.apache.parquet.filter2.predicate.{ FilterApi, FilterPredicate }
import org.apache.parquet.io.api.Binary
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{ Dataset, SQLContext }
import org.bdgenomics.adam.models._
Expand Down Expand Up @@ -703,9 +705,13 @@ class NucleotideContigFragmentDatasetSuite extends ADAMFunSuite {
val fragments3 = sc.loadContigFragments(output,
optPredicate = Some(
// ReferenceRegion.toPredicate uses referenceName instead of contigName
(BinaryColumn("contigName") === "HLA-DQB1*05:01:01:02") &&
(LongColumn("end") > 500L) &&
(LongColumn("start") <= 1500L)
FilterApi.and(
FilterApi.and(
FilterApi.eq[Binary, BinaryColumn](
FilterApi.binaryColumn("contigName"),
Binary.fromString("HLA-DQB1*05:01:01:02")),
FilterApi.gt[JLong, LongColumn](FilterApi.longColumn("end"), 500L)),
FilterApi.ltEq[JLong, LongColumn](FilterApi.longColumn("start"), 1500L))
)
)
assert(fragments3.rdd.count === 2)
Expand Down

0 comments on commit 42d11ca

Please sign in to comment.