bigdatagenomics · gunjanbaid · Oct 11, 2017 · Oct 11, 2017 · Oct 11, 2017 · Oct 11, 2017
diff --git a/adam-apis/pom.xml b/adam-apis/pom.xml
@@ -33,7 +33,7 @@
               As explained here: http://stackoverflow.com/questions/1660441/java-flag-to-enable-extended-serialization-debugging-info
               The second option allows us better debugging for serialization-based errors.
           -->
-          <argLine>-Xmx1024m -Dsun.io.serialization.extendedDebugInfo=true</argLine>
+          <argLine>-Xmx1024m -Dsun.io.serialization.extendedDebugInfo=true -Djava.io.tmpdir=/tmp/adamTestMvnNTasIKA</argLine>
           <stdout>F</stdout>
         </configuration>
         <executions>

diff --git a/adam-cli/pom.xml b/adam-cli/pom.xml
@@ -60,7 +60,7 @@
               As explained here: http://stackoverflow.com/questions/1660441/java-flag-to-enable-extended-serialization-debugging-info
               The second option allows us better debugging for serialization-based errors.
           -->
-          <argLine>-Xmx1024m -Dsun.io.serialization.extendedDebugInfo=true</argLine>
+          <argLine>-Xmx1024m -Dsun.io.serialization.extendedDebugInfo=true -Djava.io.tmpdir=/tmp/adamTestMvnNTasIKA</argLine>
           <stdout>F</stdout>
         </configuration>
         <executions>

diff --git a/adam-codegen/pom.xml b/adam-codegen/pom.xml
@@ -66,7 +66,7 @@
               As explained here: http://stackoverflow.com/questions/1660441/java-flag-to-enable-extended-serialization-debugging-info
               The second option allows us better debugging for serialization-based errors.
             -->
-          <argLine>-Xmx1024m -Dsun.io.serialization.extendedDebugInfo=true</argLine>
+          <argLine>-Xmx1024m -Dsun.io.serialization.extendedDebugInfo=true -Djava.io.tmpdir=/tmp/adamTestMvnNTasIKA</argLine>
           <stdout>F</stdout>
         </configuration>
         <executions>

diff --git a/adam-core/pom.xml b/adam-core/pom.xml
@@ -33,7 +33,7 @@
               As explained here: http://stackoverflow.com/questions/1660441/java-flag-to-enable-extended-serialization-debugging-info
               The second option allows us better debugging for serialization-based errors.
           -->
-          <argLine>-Xmx1024m -Dsun.io.serialization.extendedDebugInfo=true</argLine>
+          <argLine>-Xmx1024m -Dsun.io.serialization.extendedDebugInfo=true -Djava.io.tmpdir=/tmp/adamTestMvnNTasIKA</argLine>
           <stdout>F</stdout>
         </configuration>
         <executions>

diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/GenomicRDD.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/GenomicRDD.scala
@@ -464,11 +464,13 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] extends Logging {
   def pipe[X, Y <: GenomicRDD[X, Y], V <: InFormatter[T, U, V]](cmd: String,
                                                                 files: Seq[String] = Seq.empty,
                                                                 environment: Map[String, String] = Map.empty,
-                                                                flankSize: Int = 0)(implicit tFormatterCompanion: InFormatterCompanion[T, U, V],
-                                                                                    xFormatter: OutFormatter[X],
-                                                                                    convFn: (U, RDD[X]) => Y,
-                                                                                    tManifest: ClassTag[T],
-                                                                                    xManifest: ClassTag[X]): Y = {
+                                                                flankSize: Int = 0,
+                                                                repartitionInput: Boolean = true,
+                                                                filterOutput: Boolean = true)(implicit tFormatterCompanion: InFormatterCompanion[T, U, V],
+                                                                                              xFormatter: OutFormatter[X],
+                                                                                              convFn: (U, RDD[X]) => Y,
+                                                                                              tManifest: ClassTag[T],
+                                                                                              xManifest: ClassTag[X]): Y = {
 
     // TODO: support broadcasting files
     files.foreach(f => {
@@ -484,7 +486,10 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] extends Logging {
     val bins = GenomeBins(totalLength / rdd.partitions.size, seqLengths)
 
     // if the input rdd is mapped, then we need to repartition
-    val partitionedRdd = if (sequences.records.size > 0) {
+    val partitionedRdd = if (sequences.records.isEmpty ||
+      !repartitionInput) {
+      rdd
+    } else {
       // get region covered, expand region by flank size, and tag with bins
       val binKeyedRdd = rdd.flatMap(r => {
 
@@ -506,8 +511,6 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] extends Logging {
         .repartitionAndSortWithinPartitions(
           ManualRegionPartitioner(bins.numBins))
         .values
-    } else {
-      rdd
     }
 
     // are we in local mode?
@@ -561,7 +564,8 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] extends Logging {
 
     // if the original rdd was aligned and the final rdd is aligned, then we must filter
     if (newRdd.sequences.isEmpty ||
-      sequences.isEmpty) {
+      sequences.isEmpty ||
+      !filterOutput) {
       newRdd
     } else {
       def filterPartition(idx: Int, iter: Iterator[X]): Iterator[X] = {