Skip to content

Commit

Permalink
[ADAM-1186] Mask null from fs.globStatus.
Browse files Browse the repository at this point in the history
Resolves #1186. Check for a null pointer in ADAMContext.getFiles. Make
ADAMContext.getFsAndFiles package private so that we can unit test it.
Added unit tests to check for proper exception with empty directories and
bad globs.
  • Loading branch information
fnothaft authored and heuermh committed Sep 28, 2016
1 parent 54371cb commit f4a33a5
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ class ADAMContext private (@transient val sc: SparkContext) extends Serializable
val paths = if (fs.isDirectory(path)) fs.listStatus(path) else fs.globStatus(path)

// the path must match at least one file
if (paths.isEmpty) {
if (paths == null || paths.isEmpty) {
throw new FileNotFoundException(
s"Couldn't find any files matching ${path.toUri}. If you are trying to" +
" glob a directory of Parquet files, you need to glob inside the" +
Expand All @@ -327,7 +327,7 @@ class ADAMContext private (@transient val sc: SparkContext) extends Serializable
*
* @throws FileNotFoundException if the path does not match any files.
*/
private def getFsAndFiles(path: Path): Array[Path] = {
private[rdd] def getFsAndFiles(path: Path): Array[Path] = {

// get the underlying fs for the file
val fs = Option(path.getFileSystem(sc.hadoopConfiguration)).getOrElse(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*/
package org.bdgenomics.adam.rdd

import java.io.File
import java.io.{ File, FileNotFoundException }
import java.util.UUID
import htsjdk.samtools.DiskBasedBAMFileIndex
import com.google.common.io.Files
Expand Down Expand Up @@ -378,8 +378,26 @@ class ADAMContextSuite extends ADAMFunSuite {
val outputPath = tmpLocation()
reads.saveAsParquet(outputPath)
reads.saveAsParquet(outputPath.replace(".adam", ".2.adam"))

val paths = new Path(outputPath.replace(".adam", "*.adam") + "/*")
assert(sc.getFsAndFiles(paths).size > 2)

val reloadedReads = sc.loadParquetAlignments(outputPath.replace(".adam", "*.adam") + "/*")
assert((2 * reads.rdd.count) === reloadedReads.rdd.count)
}

sparkTest("bad glob should fail") {
val inputPath = resourcePath("small.sam")
intercept[FileNotFoundException] {
sc.getFsAndFiles(new Path(inputPath.replace(".sam", "*.sad")))
}
}

sparkTest("empty directory should fail") {
val outputPath = tmpLocation()
intercept[FileNotFoundException] {
sc.getFsAndFiles(new Path(outputPath))
}
}
}

0 comments on commit f4a33a5

Please sign in to comment.