Skip to content

Commit

Permalink
[ADAM-2143] Use fold instead of reduce when loading SAM/BAM/CRAM head…
Browse files Browse the repository at this point in the history
…ers.

Resolves #2143.
  • Loading branch information
Frank Austin Nothaft authored and heuermh committed May 1, 2019
1 parent 6e6ac8d commit e8ff4b2
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -1558,7 +1558,9 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log
None
}
}
}).reduce((kv1, kv2) => {
}).fold((SequenceDictionary.empty,
ReadGroupDictionary.empty,
Seq[ProcessingStep]()))((kv1, kv2) => {
(kv1._1 ++ kv2._1, kv1._2 ++ kv2._2, kv1._3 ++ kv2._3)
})

Expand Down
25 changes: 25 additions & 0 deletions adam-core/src/test/resources/small.badheader.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
@SQ SN:1 LN:249250621
@SQ SN:2 LN:243199373
@PG ID:p1 PN:myProg CL:"myProg 123" VN:1.0.0
@PG ID:p2 PN:myProg CL:"myProg 456" VN:1.0.0 PP:p1
@RG ID:badrg DT:2018-06-27 20:01:02
simread:1:26472783:false 16 1 26472784 60 75M * 0 0 GTATAAGAGCAGCCTTATTCCTATTTATAATCAGGGTGAAACACCTGTGCCAATGCCAAGACAGGGGTGCCAAGA * NM:i:0 AS:i:75 XS:i:0
simread:1:240997787:true 0 1 240997788 60 75M * 0 0 CTTTATTTTTATTTTTAAGGTTTTTTTTGTTTGTTTGTTTTGAGATGGAGTCTCGCTCCACCGCCCAGACTGGAG * NM:i:0 AS:i:75 XS:i:39
simread:1:189606653:true 0 1 189606654 60 75M * 0 0 TGTATCTTCCTCCCCTGCTGTATGTTTCCTGCCCTCAAACATCACACTCCACGTTCTTCAGCTTTAGGACTTGGA * NM:i:0 AS:i:75 XS:i:0
simread:1:207027738:true 0 1 207027739 60 75M * 0 0 TTTAATAAATGTTGATTGTCCTATTTAATTATTCTCAACTTTCCGATTTTATTTCCCATGTAACAGTGTTGTTTT * NM:i:0 AS:i:75 XS:i:0
simread:1:14397233:false 16 1 14397234 60 75M * 0 0 TAAAATGCCCCCATCTTCCCAGAGCTGCCAGCCCTCACAATGCCAACAGCTAAATGTACCCAAGTGTTACTGAAC * NM:i:0 AS:i:75 XS:i:0
simread:1:240344442:true 0 1 240344443 24 75M * 0 0 TACAGGCACCCACCATCATGCCCAGCTAATTTTTGTATTTTTGTAGAAACGGGGTTTCACCATGTTGGCCCAGCT * NM:i:0 AS:i:75 XS:i:61
simread:1:153978724:false 16 1 153978725 60 75M * 0 0 GCTCACTGCAGCCTCAACCTCCTGGGCCCAAGTGATTTCATCTTATTTTTGGAAAAAAAAACAAACTAAACCAAA * NM:i:0 AS:i:75 XS:i:0
simread:1:237728409:true 0 1 237728410 28 75M * 0 0 TTTCTTTTTCTTTCTTTCTTTCTTTCTTTCTTTTTCTTTCTTTCTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCT * NM:i:0 AS:i:75 XS:i:59
simread:1:231911906:false 16 1 231911907 60 75M * 0 0 TCATGTAGCATGCATATGGCTAACGGCAAAGTGAGGGAGGAATAATTATAGTAATAATCACAGTGATGACGTGGA * NM:i:0 AS:i:75 XS:i:0
simread:1:50683371:false 16 1 50683372 60 75M * 0 0 GCTCAGGCCTTGCAAGAATCTCTACTGCCCAACAAGTCCCTACAAGATGGCATTTAAAAGCAGTCCCTCACGCAC * NM:i:0 AS:i:75 XS:i:0
simread:1:37577445:false 16 1 37577446 60 75M * 0 0 CCTAGAGAAGCTCCCACTAGGGCTGCAGTCAATTCCCAGGTCTTAGGTGCTGAGCAGTGGGAGGTGGTGGCCATG * NM:i:0 AS:i:75 XS:i:0
simread:1:195211965:false 16 1 195211966 60 75M * 0 0 AAATAAAGTTTGGCTTTCAGTTGTAACTTTGAATATCTTTATCACAGTTATTTAAAGCCTTTAAAAAGCTTTAAT * NM:i:0 AS:i:75 XS:i:0
simread:1:163841413:false 16 1 163841414 60 75M * 0 0 TGTGTAACTAACATAATTGGCACTGTCCCTGTAAATTCAAATTGGATATCCTCCCAAATTTTATTTAAGCAATTG * NM:i:0 AS:i:75 XS:i:0
simread:1:101556378:false 16 1 101556379 60 75M * 0 0 TTTATTTTTTGAGCATGAAAGTAATATATGCTCAGTGTAAACAATTAGGTCATTATAAATATATTTAACAGGAAT * NM:i:0 AS:i:75 XS:i:0
simread:1:20101800:true 0 1 20101801 35 75M * 0 0 CTCAGGTGATCCACCCGCCTCGGCCTCCCAAAGTGCTGGGACTACAGGCATGAGGCACCGCGCCTGGCCAGGACT * NM:i:0 AS:i:75 XS:i:55
simread:1:186794283:true 0 1 186794284 60 75M * 0 0 GACAAGATAGTACTTGAGCTAAGCCTTGCAGGTTGAGTAGGATTATTCTAGTGGAATTTAGGGAAACGATGTGCA * NM:i:0 AS:i:75 XS:i:0
simread:1:165341382:true 0 1 165341383 60 75M * 0 0 CTACTCTCATTGACTGTTCAATGCCTATACAAGTAAAACTTTACCAGCACCCAAGTCAAAAAGAAAAAAAAGGGG * NM:i:0 AS:i:75 XS:i:0
simread:1:5469106:true 0 1 5469107 60 75M * 0 0 CTCATTCTCTCTCCTGCTGCACTGTGAAGAGGTGCCTGTTGCCAAGAGTATAAGTTTCCTGAGGCCTCCCAGGCC * NM:i:0 AS:i:75 XS:i:0
simread:1:89554252:false 16 1 89554253 60 75M * 0 0 AAATTAAACAGCTCGTTTAACTGATAATCCATACTATATTTGAGTAGGGCTGTCACATGGTTGGAACCTCCGGTT * NM:i:0 AS:i:75 XS:i:0
simread:1:169801933:true 0 1 169801934 40 75M * 0 0 AGACTGGGTCTCACTATGTTGCCTAGGCTGGTCTCAAACTCCTGGGCTCAAGTGATCCATCTCTGCCTTCCAAAG * NM:i:0 AS:i:75 XS:i:52
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,14 @@ class ADAMContextSuite extends ADAMFunSuite {
assert(reads.dataset.rdd.count === 20)
}

sparkTest("can read a small .SAM file with a bad header with lenient validation") {
val path = testFile("small.badheader.sam")
val reads = sc.loadAlignments(path, stringency = ValidationStringency.SILENT)
assert(reads.rdd.count() === 20)
assert(reads.dataset.count === 20)
assert(reads.dataset.rdd.count === 20)
}

sparkTest("loading a sam file with a bad header and strict stringency should fail") {
val path = testFile("badheader.sam")
intercept[SAMFormatException] {
Expand Down

0 comments on commit e8ff4b2

Please sign in to comment.