Skip to content
Permalink
Browse files

Update Spark version to 2.4.3, add move to Scala 2.12 script.

  • Loading branch information...
heuermh committed Sep 21, 2018
1 parent 6e6ac8d commit 9841310b6e4b6dba1ca0b5e6167843f3c1c9a402
Showing with 1,562 additions and 320 deletions.
  1. +14 −0 adam-assembly/pom.xml
  2. +4 −2 adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAM2Fasta.scala
  3. +3 −1 adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAM2Fastq.scala
  4. +3 −1 adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountReadKmers.scala
  5. +3 −1 adam-cli/src/main/scala/org/bdgenomics/adam/cli/Fasta2ADAM.scala
  6. +4 −2 adam-cli/src/main/scala/org/bdgenomics/adam/cli/Reads2Coverage.scala
  7. +67 −66 adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformAlignments.scala
  8. +15 −12 adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformGenotypes.scala
  9. +15 −12 adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformVariants.scala
  10. +1 −1 adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala
  11. +1 −1 adam-cli/src/test/scala/org/bdgenomics/adam/cli/ADAM2FastqSuite.scala
  12. +471 −0 adam-core/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
  13. +2 −1 adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala
  14. +4 −4 adam-core/src/main/scala/org/bdgenomics/adam/rdd/GenomicDataset.scala
  15. +2 −3 adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentDataset.scala
  16. +1 −1 adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/CoverageDataset.scala
  17. +12 −12 adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureDataset.scala
  18. +2 −2 adam-core/src/main/scala/org/bdgenomics/adam/rdd/fragment/FragmentDataset.scala
  19. +15 −16 adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDataset.scala
  20. +3 −2 adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/BinQualities.scala
  21. +9 −9 adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/GenotypeDataset.scala
  22. +10 −10 adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VariantDataset.scala
  23. +57 −44 adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala
  24. +10 −10 adam-core/src/test/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentDatasetSuite.scala
  25. +1 −1 adam-core/src/test/scala/org/bdgenomics/adam/rdd/feature/CoverageDatasetSuite.scala
  26. +14 −14 adam-core/src/test/scala/org/bdgenomics/adam/rdd/feature/FeatureDatasetSuite.scala
  27. +13 −13 adam-core/src/test/scala/org/bdgenomics/adam/rdd/fragment/FragmentDatasetSuite.scala
  28. +19 −17 adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDatasetSuite.scala
  29. +13 −13 adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/GenotypeDatasetSuite.scala
  30. +8 −7 adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextDatasetSuite.scala
  31. +13 −13 adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantDatasetSuite.scala
  32. +5 −3 adam-core/src/test/scala/org/bdgenomics/adam/util/ParallelFileMergerSuite.scala
  33. +47 −0 adam-shade/pom.xml
  34. +677 −0 adam-shade/src/main/java/org/bdgenomics/adam/shade/ParquetAvroWorkaroundShader.java
  35. +5 −20 pom.xml
  36. +2 −6 scripts/move_to_scala_2.11.sh
  37. +17 −0 scripts/move_to_scala_2.12.sh
@@ -39,6 +39,13 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<dependencies>
<dependency>
<groupId>org.bdgenomics.adam</groupId>
<artifactId>adam-shade-spark2_${scala.version.prefix}</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
<filters>
@@ -51,6 +58,12 @@
</excludes>
</filter>
</filters>
<relocations>
<relocation>
<pattern>org.apache.parquet.avro</pattern>
<shadedPattern>org.bdgenomics.adam.shaded.org.apache.parquet.avro</shadedPattern>
</relocation>
</relocations>
</configuration>
<executions>
<execution>
@@ -59,6 +72,7 @@
<goal>shade</goal>
</goals>
<configuration>
<shaderHint>workaround</shaderHint>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
</transformers>
@@ -19,8 +19,10 @@ package org.bdgenomics.adam.cli

import grizzled.slf4j.Logging
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.cli.FileSystemUtils._
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.NucleotideContigFragment
import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }

@@ -63,9 +65,9 @@ class ADAM2Fasta(val args: ADAM2FastaArgs) extends BDGSparkCommand[ADAM2FastaArg

val cc = if (args.coalesce > 0) {
if (args.coalesce > contigs.rdd.partitions.length || args.forceShuffle) {
contigs.transform(_.coalesce(args.coalesce, shuffle = true))
contigs.transform((rdd: RDD[NucleotideContigFragment]) => rdd.coalesce(args.coalesce, shuffle = true))
} else {
contigs.transform(_.coalesce(args.coalesce, shuffle = false))
contigs.transform((rdd: RDD[NucleotideContigFragment]) => rdd.coalesce(args.coalesce, shuffle = false))
}
} else {
contigs
@@ -19,10 +19,12 @@ package org.bdgenomics.adam.cli

import htsjdk.samtools.ValidationStringency
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
import org.bdgenomics.adam.cli.FileSystemUtils._
import org.bdgenomics.adam.projections.{ AlignmentRecordField, Projection }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.AlignmentRecord
import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }

@@ -81,7 +83,7 @@ class ADAM2Fastq(val args: ADAM2FastqArgs) extends BDGSparkCommand[ADAM2FastqArg

if (args.repartition != -1) {
info("Repartitioning reads to to '%d' partitions".format(args.repartition))
reads = reads.transform(_.repartition(args.repartition))
reads = reads.transform((rdd: RDD[AlignmentRecord]) => rdd.repartition(args.repartition))
}

reads.saveAsFastq(
@@ -19,9 +19,11 @@ package org.bdgenomics.adam.cli

import grizzled.slf4j.Logging
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.cli.FileSystemUtils._
import org.bdgenomics.adam.projections.{ AlignmentRecordField, Projection }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.AlignmentRecord
import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }

@@ -61,7 +63,7 @@ class CountReadKmers(protected val args: CountReadKmersArgs) extends BDGSparkCom

if (args.repartition != -1) {
info("Repartitioning reads to '%d' partitions".format(args.repartition))
adamRecords = adamRecords.transform(_.repartition(args.repartition))
adamRecords = adamRecords.transform((rdd: RDD[AlignmentRecord]) => rdd.repartition(args.repartition))
}

// count kmers
@@ -19,8 +19,10 @@ package org.bdgenomics.adam.cli

import grizzled.slf4j.Logging
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.cli.FileSystemUtils._
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.NucleotideContigFragment
import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }

@@ -63,7 +65,7 @@ class Fasta2ADAM(protected val args: Fasta2ADAMArgs) extends BDGSparkCommand[Fas

info("Writing records to disk.")
val finalFasta = if (args.partitions > 0) {
adamFasta.transform(_.repartition(args.partitions))
adamFasta.transform((rdd: RDD[NucleotideContigFragment]) => rdd.repartition(args.partitions))
} else {
adamFasta
}
@@ -18,11 +18,13 @@
package org.bdgenomics.adam.cli

import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.cli.FileSystemUtils._
import org.bdgenomics.adam.projections.AlignmentRecordField._
import org.bdgenomics.adam.projections.Projection
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
import org.bdgenomics.formats.avro.AlignmentRecord
import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }

@@ -82,9 +84,9 @@ class Reads2Coverage(protected val args: Reads2CoverageArgs) extends BDGSparkCom
val readsRdd: AlignmentRecordDataset = sc.loadAlignments(args.inputPath)

val finalReads = if (args.onlyNegativeStrands && !args.onlyPositiveStrands) {
readsRdd.transform(rdd => rdd.filter(_.getReadNegativeStrand))
readsRdd.transform((rdd: RDD[AlignmentRecord]) => rdd.filter(_.getReadNegativeStrand))
} else if (!args.onlyNegativeStrands && args.onlyPositiveStrands) {
readsRdd.transform(rdd => rdd.filter(!_.getReadNegativeStrand))
readsRdd.transform((rdd: RDD[AlignmentRecord]) => rdd.filter(!_.getReadNegativeStrand))
} else {
readsRdd
}

0 comments on commit 9841310

Please sign in to comment.
You can’t perform that action at this time.