Skip to content

Commit

Permalink
Support Apache Spark 3.x in build (#2255)
Browse files Browse the repository at this point in the history
* add move to spark 2/3 scripts

* fixes for removed deprecated code

* fixes for code removed in Spark 3

* add missing file

* -print_metrics was removed

* add -v to curl

* use lazy val for spark session

* Update bdg-formats to 0.3.0, remove hadoop 2.6 from jenkins-test

* move to Spark version 3.0.0

* move SCALAVER to SCALA_VERSION

* use without-hadoop binary only for scala 2.12 and spark 2.4.6

* fix syntax

* R build is not supported on Spark 3.0.0
  • Loading branch information
heuermh committed Jun 26, 2020
1 parent 50797dc commit de67bf2
Show file tree
Hide file tree
Showing 49 changed files with 525 additions and 750 deletions.
5 changes: 0 additions & 5 deletions adam-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,6 @@
<artifactId>utils-cli-spark2_${scala.version.prefix}</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.bdgenomics.utils</groupId>
<artifactId>utils-metrics-spark2_${scala.version.prefix}</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.bdgenomics.bdg-formats</groupId>
<artifactId>bdg-formats</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
import org.bdgenomics.adam.algorithms.consensus._
import org.bdgenomics.adam.cli.FileSystemUtils._
import org.bdgenomics.adam.instrumentation.Timers._
import org.bdgenomics.adam.io.FastqRecordReader
import org.bdgenomics.adam.models.{ ReferenceRegion, SnpTable }
import org.bdgenomics.adam.projections.{ AlignmentField, Filter }
Expand Down Expand Up @@ -334,9 +333,7 @@ class TransformAlignments(protected val args: TransformAlignmentsArgs) extends B

// create the known sites file, if one is available
val knownSnps: SnpTable = createKnownSnpsTable(ds.rdd.context)
val broadcastedSnps = BroadcastingKnownSnps.time {
ds.rdd.context.broadcast(knownSnps)
}
val broadcastedSnps = ds.rdd.context.broadcast(knownSnps)

// run bqsr
val bqsredDs = ds.recalibrateBaseQualities(
Expand Down
5 changes: 0 additions & 5 deletions adam-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,6 @@
<artifactId>utils-misc-spark2_${scala.version.prefix}</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.bdgenomics.utils</groupId>
<artifactId>utils-metrics-spark2_${scala.version.prefix}</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.bdgenomics.utils</groupId>
<artifactId>utils-io-spark2_${scala.version.prefix}</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.bdgenomics.adam.converters

import htsjdk.samtools.{ SAMFileHeader, SAMRecord }
import org.bdgenomics.adam.instrumentation.Timers._
import org.bdgenomics.adam.models._
import org.bdgenomics.adam.rich.RichAlignment
import org.bdgenomics.formats.avro.{ Alignment, Fragment }
Expand Down Expand Up @@ -239,7 +238,7 @@ class AlignmentConverter extends Serializable {
*/
def convert(adamRecord: Alignment,
header: SAMFileHeader,
rgd: ReadGroupDictionary): SAMRecord = ConvertToSAMRecord.time {
rgd: ReadGroupDictionary): SAMRecord = {

// attach header
val builder: SAMRecord = new SAMRecord(header)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.bdgenomics.adam.converters

import com.google.common.base.Splitter
import com.google.common.collect.ImmutableList
import grizzled.slf4j.Logging
import htsjdk.samtools.ValidationStringency
import htsjdk.variant.variantcontext.{
Allele,
Expand All @@ -41,7 +42,7 @@ import htsjdk.variant.vcf.{
}
import java.util.Collections
import org.apache.hadoop.conf.Configuration
import org.bdgenomics.utils.misc.{ Logging, MathUtils }
import org.bdgenomics.utils.misc.MathUtils
import org.bdgenomics.adam.models.{
SequenceDictionary,
VariantContext => ADAMVariantContext
Expand Down Expand Up @@ -410,7 +411,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Caught exception %s when converting %s.".format(t, vc))
logger.warn("Caught exception %s when converting %s.".format(t, vc))
}
Seq.empty
}
Expand Down Expand Up @@ -921,7 +922,7 @@ class VariantContextConverter(
gb.setGenotypeLikelihoods(likelihoods)
} catch {
case _: ArrayIndexOutOfBoundsException => {
log.warn("Ran into Array Out of Bounds when accessing indices %s of genotype %s.".format(gIndices.mkString(","), g))
logger.warn("Ran into Array Out of Bounds when accessing indices %s of genotype %s.".format(gIndices.mkString(","), g))
gb
}
}
Expand Down Expand Up @@ -1099,7 +1100,7 @@ class VariantContextConverter(
g.getVariant.getAlternateAllele != null ||
nls.isEmpty) {
if (nls.nonEmpty) {
log.warn("Expected empty non-reference likelihoods for genotype with empty likelihoods (%s).".format(g))
logger.warn("Expected empty non-reference likelihoods for genotype with empty likelihoods (%s).".format(g))
}
gb.noPL
} else {
Expand Down Expand Up @@ -1532,7 +1533,7 @@ class VariantContextConverter(
}
case (false, VCFHeaderLineCount.G) => {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Ignoring INFO field with Number=G described in header row: %s".format(headerLine))
logger.warn("Ignoring INFO field with Number=G described in header row: %s".format(headerLine))
None
} else
throw new IllegalArgumentException("Number=G INFO lines are not supported in split-allelic model: %s".format(
Expand Down Expand Up @@ -1659,7 +1660,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Saw invalid info field %s. Ignoring...".format(t))
logger.warn("Saw invalid info field %s. Ignoring...".format(t))
}
None
}
Expand Down Expand Up @@ -1705,7 +1706,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Converting variant field from %s with function %s line %s failed: %s".format(vc, fn, t))
logger.warn("Converting variant field from %s with function %s line %s failed: %s".format(vc, fn, t))
}
vb
}
Expand All @@ -1730,7 +1731,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Generating annotation tag from line %s with function %sfailed: %s".format(vab, fn, t))
logger.warn("Generating annotation tag from line %s with function %sfailed: %s".format(vab, fn, t))
}
vab
}
Expand Down Expand Up @@ -1833,7 +1834,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Converting genotype field in %s with function %s failed: %s".format(
logger.warn("Converting genotype field in %s with function %s failed: %s".format(
g, fn, t))
}
gb
Expand Down Expand Up @@ -1868,7 +1869,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Converting genotype annotation field in %s with function %s failed: %s".format(
logger.warn("Converting genotype annotation field in %s with function %s failed: %s".format(
g, fn, t))
}
vcab
Expand Down Expand Up @@ -2166,7 +2167,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Generating field extractor from header line %s failed: %s".format(il, t))
logger.warn("Generating field extractor from header line %s failed: %s".format(il, t))
}
None
}
Expand Down Expand Up @@ -2198,7 +2199,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Applying extraction function %s to %s failed with %s.".format(fn, v, t))
logger.warn("Applying extraction function %s to %s failed with %s.".format(fn, v, t))
}
vcb
}
Expand Down Expand Up @@ -2227,7 +2228,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Applying annotation extraction function %s to %s failed with %s.".format(fn, v, t))
logger.warn("Applying annotation extraction function %s to %s failed with %s.".format(fn, v, t))
}
vcb
}
Expand Down Expand Up @@ -2269,7 +2270,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Generating field extractor from header line %s failed: %s".format(fl, t))
logger.warn("Generating field extractor from header line %s failed: %s".format(fl, t))
}
None
}
Expand Down Expand Up @@ -2297,7 +2298,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Applying annotation extraction function %s to %s failed with %s.".format(fn, g, t))
logger.warn("Applying annotation extraction function %s to %s failed with %s.".format(fn, g, t))
}

gb
Expand All @@ -2321,7 +2322,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Applying annotation extraction function %s to %s failed with %s.".format(fn, vca, t))
logger.warn("Applying annotation extraction function %s to %s failed with %s.".format(fn, vca, t))
}

gb
Expand All @@ -2347,7 +2348,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Applying attribute extraction function %s to %s failed with %s.".format(fn, vca, t))
logger.warn("Applying attribute extraction function %s to %s failed with %s.".format(fn, vca, t))
}

gb
Expand Down Expand Up @@ -2385,7 +2386,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.error(
logger.error(
"Encountered error %s when converting variant context with variant:\n%s\nand genotypes: \n%s".format(t, vc.variant.variant, vc.genotypes.mkString("\n")))
}
None
Expand Down

This file was deleted.

0 comments on commit de67bf2

Please sign in to comment.