-
Notifications
You must be signed in to change notification settings - Fork 365
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Protection against using the wrong sample alias which produces zero L… #1242
Changes from 4 commits
232addf
3eeec7d
318cbf5
f92ff60
427a60f
b024781
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -224,7 +224,9 @@ protected int doWork() { | |
outputDetailMetricsFile = DETAIL_OUTPUT; | ||
outputSummaryMetricsFile = SUMMARY_OUTPUT; | ||
} else { | ||
if (!OUTPUT.endsWith(".")) OUTPUT = OUTPUT + "."; | ||
if (!OUTPUT.endsWith(".")) { | ||
OUTPUT = OUTPUT + "."; | ||
} | ||
outputDetailMetricsFile = new File(OUTPUT + FINGERPRINT_DETAIL_FILE_SUFFIX); | ||
outputSummaryMetricsFile = new File(OUTPUT + FINGERPRINT_SUMMARY_FILE_SUFFIX); | ||
} | ||
|
@@ -311,6 +313,7 @@ protected int doWork() { | |
final MetricsFile<FingerprintingSummaryMetrics, ?> summaryFile = getMetricsFile(); | ||
final MetricsFile<FingerprintingDetailMetrics, ?> detailsFile = getMetricsFile(); | ||
|
||
boolean anyNonzeroLod = false; | ||
for (final FingerprintResults fpr : results) { | ||
final MatchResults mr = fpr.getMatchResults().first(); | ||
|
||
|
@@ -365,11 +368,21 @@ protected int doWork() { | |
|
||
summaryFile.addMetric(metrics); | ||
log.info("Read Group: " + metrics.READ_GROUP + " / " + observedSampleAlias + " vs. " + metrics.SAMPLE + ": LOD = " + metrics.LOD_EXPECTED_SAMPLE); | ||
if (metrics.LOD_EXPECTED_SAMPLE != 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you could write
(also, I think the proper spelling is Conversely avoiding negatives would make this a bit nicer, IMO, e.g.
I find reading code with double negatives like |
||
anyNonzeroLod = true; | ||
} | ||
} | ||
|
||
summaryFile.write(outputSummaryMetricsFile); | ||
detailsFile.write(outputDetailMetricsFile); | ||
|
||
if (!anyNonzeroLod) { | ||
log.error("No non-zero results found. This is likely an error. " + | ||
"Probable cause: EXPECTED_SAMPLE (if provided) or the sample name from INPUT (if EXPECTED_SAMPLE isn't provided)" + | ||
"isn't a sample in GENOTYPES file."); | ||
return 1; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
|
@@ -389,10 +402,6 @@ protected String[] customCommandLineValidation() { | |
return super.customCommandLineValidation(); | ||
} | ||
|
||
static boolean isBamOrSam(final File f) { | ||
return isBamOrSam(f.toPath()); | ||
} | ||
|
||
static boolean isBamOrSam(final Path p) { | ||
return (p.toUri().getRawPath().endsWith(BamFileIoUtils.BAM_FILE_EXTENSION) || p.toUri().getRawPath().endsWith(IOUtil.SAM_FILE_EXTENSION)); | ||
} | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -268,38 +268,38 @@ protected int doWork() { | |||||
// Key: previous PG ID on a SAM Record (or null). Value: New PG ID to replace it. | ||||||
final Map<String, String> chainedPgIds = getChainedPgIds(outputHeader); | ||||||
|
||||||
final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader, | ||||||
try(SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
true, | ||||||
OUTPUT); | ||||||
|
||||||
// Now copy over the file while marking all the necessary indexes as duplicates | ||||||
long recordInFileIndex = 0; | ||||||
long nextOpticalDuplicateIndex = this.opticalDuplicateIndexes != null && this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : NO_SUCH_INDEX; | ||||||
long nextDuplicateIndex = (this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : NO_SUCH_INDEX); | ||||||
|
||||||
// initialize variables for optional representative read tagging | ||||||
CloseableIterator<RepresentativeReadIndexer> representativeReadIterator = null; | ||||||
RepresentativeReadIndexer rri = null; | ||||||
int representativeReadIndexInFile = -1; | ||||||
int duplicateSetSize = -1; | ||||||
int nextRepresentativeIndex = -1; | ||||||
if (TAG_DUPLICATE_SET_MEMBERS) { | ||||||
representativeReadIterator = this.representativeReadIndicesForDuplicates.iterator(); | ||||||
if (representativeReadIterator.hasNext()) { | ||||||
rri = representativeReadIterator.next(); | ||||||
nextRepresentativeIndex = rri.readIndexInFile; | ||||||
representativeReadIndexInFile = rri.representativeReadIndexInFile; | ||||||
duplicateSetSize = rri.setSize; | ||||||
OUTPUT)) { | ||||||
|
||||||
// Now copy over the file while marking all the necessary indexes as duplicates | ||||||
long recordInFileIndex = 0; | ||||||
long nextOpticalDuplicateIndex = this.opticalDuplicateIndexes != null && this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : NO_SUCH_INDEX; | ||||||
long nextDuplicateIndex = (this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : NO_SUCH_INDEX); | ||||||
|
||||||
// initialize variables for optional representative read tagging | ||||||
CloseableIterator<RepresentativeReadIndexer> representativeReadIterator = null; | ||||||
RepresentativeReadIndexer rri = null; | ||||||
int representativeReadIndexInFile = -1; | ||||||
int duplicateSetSize = -1; | ||||||
int nextRepresentativeIndex = -1; | ||||||
if (TAG_DUPLICATE_SET_MEMBERS) { | ||||||
representativeReadIterator = this.representativeReadIndicesForDuplicates.iterator(); | ||||||
if (representativeReadIterator.hasNext()) { | ||||||
rri = representativeReadIterator.next(); | ||||||
nextRepresentativeIndex = rri.readIndexInFile; | ||||||
representativeReadIndexInFile = rri.representativeReadIndexInFile; | ||||||
duplicateSetSize = rri.setSize; | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Written"); | ||||||
final CloseableIterator<SAMRecord> iterator = headerAndIterator.iterator; | ||||||
String duplicateQueryName = null; | ||||||
String opticalDuplicateQueryName = null; | ||||||
final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Written"); | ||||||
final CloseableIterator<SAMRecord> iterator = headerAndIterator.iterator; | ||||||
String duplicateQueryName = null; | ||||||
String opticalDuplicateQueryName = null; | ||||||
|
||||||
while (iterator.hasNext()) { | ||||||
final SAMRecord rec = iterator.next(); | ||||||
while (iterator.hasNext()) { | ||||||
final SAMRecord rec = iterator.next(); | ||||||
|
||||||
final String library = LibraryIdGenerator.getLibraryName(header, rec); | ||||||
DuplicationMetrics metrics = libraryIdGenerator.getMetricsByLibrary(library); | ||||||
|
@@ -312,26 +312,26 @@ protected int doWork() { | |||||
// First bring the simple metrics up to date | ||||||
if (rec.getReadUnmappedFlag()) { | ||||||
++metrics.UNMAPPED_READS; | ||||||
} else if(rec.isSecondaryOrSupplementary()) { | ||||||
} else if (rec.isSecondaryOrSupplementary()) { | ||||||
++metrics.SECONDARY_OR_SUPPLEMENTARY_RDS; | ||||||
} else if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) { | ||||||
++metrics.UNPAIRED_READS_EXAMINED; | ||||||
} else { | ||||||
++metrics.READ_PAIRS_EXAMINED; // will need to be divided by 2 at the end | ||||||
} | ||||||
|
||||||
// Now try and figure out the next duplicate index (if going by coordinate. if going by query name, only do this | ||||||
// if the query name has changed. | ||||||
final boolean needNextDuplicateIndex = recordInFileIndex > nextDuplicateIndex && | ||||||
(sortOrder == SAMFileHeader.SortOrder.coordinate || !rec.getReadName().equals(duplicateQueryName)); | ||||||
// Now try and figure out the next duplicate index (if going by coordinate. if going by query name, only do this | ||||||
// if the query name has changed. | ||||||
final boolean needNextDuplicateIndex = recordInFileIndex > nextDuplicateIndex && | ||||||
(sortOrder == SAMFileHeader.SortOrder.coordinate || !rec.getReadName().equals(duplicateQueryName)); | ||||||
|
||||||
if (needNextDuplicateIndex) { | ||||||
if (needNextDuplicateIndex) { | ||||||
nextDuplicateIndex = (this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : NO_SUCH_INDEX); | ||||||
} | ||||||
} | ||||||
|
||||||
final boolean isDuplicate = recordInFileIndex == nextDuplicateIndex || | ||||||
(sortOrder == SAMFileHeader.SortOrder.queryname && | ||||||
recordInFileIndex > nextDuplicateIndex && rec.getReadName().equals(duplicateQueryName)); | ||||||
final boolean isDuplicate = recordInFileIndex == nextDuplicateIndex || | ||||||
(sortOrder == SAMFileHeader.SortOrder.queryname && | ||||||
recordInFileIndex > nextDuplicateIndex && rec.getReadName().equals(duplicateQueryName)); | ||||||
|
||||||
if (isDuplicate) { | ||||||
duplicateQueryName = rec.getReadName(); | ||||||
|
@@ -350,81 +350,81 @@ protected int doWork() { | |||||
rec.setDuplicateReadFlag(false); | ||||||
} | ||||||
|
||||||
// Manage the flagging of optical/sequencing duplicates | ||||||
final boolean needNextOpticalDuplicateIndex = recordInFileIndex > nextOpticalDuplicateIndex && | ||||||
(sortOrder == SAMFileHeader.SortOrder.coordinate || !rec.getReadName().equals(opticalDuplicateQueryName)); | ||||||
|
||||||
// Possibly figure out the next opticalDuplicate index (if going by coordinate, if going by query name, only do this | ||||||
// if the query name has changed) | ||||||
if (needNextOpticalDuplicateIndex) { | ||||||
nextOpticalDuplicateIndex = (this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : NO_SUCH_INDEX); | ||||||
} | ||||||
// Manage the flagging of optical/sequencing duplicates | ||||||
final boolean needNextOpticalDuplicateIndex = recordInFileIndex > nextOpticalDuplicateIndex && | ||||||
(sortOrder == SAMFileHeader.SortOrder.coordinate || !rec.getReadName().equals(opticalDuplicateQueryName)); | ||||||
|
||||||
final boolean isOpticalDuplicate = sortOrder == SAMFileHeader.SortOrder.queryname && | ||||||
recordInFileIndex > nextOpticalDuplicateIndex && | ||||||
rec.getReadName().equals(opticalDuplicateQueryName) || | ||||||
recordInFileIndex == nextOpticalDuplicateIndex; | ||||||
// Possibly figure out the next opticalDuplicate index (if going by coordinate, if going by query name, only do this | ||||||
// if the query name has changed) | ||||||
if (needNextOpticalDuplicateIndex) { | ||||||
nextOpticalDuplicateIndex = (this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : NO_SUCH_INDEX); | ||||||
} | ||||||
|
||||||
if (CLEAR_DT) { | ||||||
rec.setAttribute(DUPLICATE_TYPE_TAG, null); | ||||||
} | ||||||
final boolean isOpticalDuplicate = sortOrder == SAMFileHeader.SortOrder.queryname && | ||||||
recordInFileIndex > nextOpticalDuplicateIndex && | ||||||
rec.getReadName().equals(opticalDuplicateQueryName) || | ||||||
recordInFileIndex == nextOpticalDuplicateIndex; | ||||||
|
||||||
if (this.TAGGING_POLICY != DuplicateTaggingPolicy.DontTag && rec.getDuplicateReadFlag()) { | ||||||
if (isOpticalDuplicate) { | ||||||
opticalDuplicateQueryName = rec.getReadName(); | ||||||
rec.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.SEQUENCING.code()); | ||||||
} else if (this.TAGGING_POLICY == DuplicateTaggingPolicy.All) { | ||||||
rec.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.LIBRARY.code()); | ||||||
if (CLEAR_DT) { | ||||||
rec.setAttribute(DUPLICATE_TYPE_TAG, null); | ||||||
} | ||||||
} | ||||||
|
||||||
// Tag any read pair that was in a duplicate set with the duplicate set size and a representative read name | ||||||
if (TAG_DUPLICATE_SET_MEMBERS) { | ||||||
final boolean needNextRepresentativeIndex = recordInFileIndex > nextRepresentativeIndex; | ||||||
if (needNextRepresentativeIndex && representativeReadIterator.hasNext()) { | ||||||
rri = representativeReadIterator.next(); | ||||||
nextRepresentativeIndex = rri.readIndexInFile; | ||||||
representativeReadIndexInFile = rri.representativeReadIndexInFile; | ||||||
duplicateSetSize = rri.setSize; | ||||||
if (this.TAGGING_POLICY != DuplicateTaggingPolicy.DontTag && rec.getDuplicateReadFlag()) { | ||||||
if (isOpticalDuplicate) { | ||||||
opticalDuplicateQueryName = rec.getReadName(); | ||||||
rec.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.SEQUENCING.code()); | ||||||
} else if (this.TAGGING_POLICY == DuplicateTaggingPolicy.All) { | ||||||
rec.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.LIBRARY.code()); | ||||||
} | ||||||
} | ||||||
final boolean isInDuplicateSet = recordInFileIndex == nextRepresentativeIndex || | ||||||
(sortOrder == SAMFileHeader.SortOrder.queryname && | ||||||
recordInFileIndex > nextDuplicateIndex); | ||||||
if (isInDuplicateSet) { | ||||||
if (!rec.isSecondaryOrSupplementary() && !rec.getReadUnmappedFlag()) { | ||||||
if (TAG_DUPLICATE_SET_MEMBERS) { | ||||||
rec.setAttribute(DUPLICATE_SET_INDEX_TAG, representativeReadIndexInFile); | ||||||
rec.setAttribute(DUPLICATE_SET_SIZE_TAG, duplicateSetSize); | ||||||
|
||||||
// Tag any read pair that was in a duplicate set with the duplicate set size and a representative read name | ||||||
if (TAG_DUPLICATE_SET_MEMBERS) { | ||||||
final boolean needNextRepresentativeIndex = recordInFileIndex > nextRepresentativeIndex; | ||||||
if (needNextRepresentativeIndex && representativeReadIterator.hasNext()) { | ||||||
rri = representativeReadIterator.next(); | ||||||
nextRepresentativeIndex = rri.readIndexInFile; | ||||||
representativeReadIndexInFile = rri.representativeReadIndexInFile; | ||||||
duplicateSetSize = rri.setSize; | ||||||
} | ||||||
final boolean isInDuplicateSet = recordInFileIndex == nextRepresentativeIndex || | ||||||
(sortOrder == SAMFileHeader.SortOrder.queryname && | ||||||
recordInFileIndex > nextDuplicateIndex); | ||||||
if (isInDuplicateSet) { | ||||||
if (!rec.isSecondaryOrSupplementary() && !rec.getReadUnmappedFlag()) { | ||||||
if (TAG_DUPLICATE_SET_MEMBERS) { | ||||||
rec.setAttribute(DUPLICATE_SET_INDEX_TAG, representativeReadIndexInFile); | ||||||
rec.setAttribute(DUPLICATE_SET_SIZE_TAG, duplicateSetSize); | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
// Output the record if desired and bump the record index | ||||||
recordInFileIndex++; | ||||||
if (this.REMOVE_DUPLICATES && rec.getDuplicateReadFlag()) { | ||||||
continue; | ||||||
} | ||||||
if (this.REMOVE_SEQUENCING_DUPLICATES && isOpticalDuplicate) { | ||||||
continue; | ||||||
} | ||||||
if (PROGRAM_RECORD_ID != null && pgTagArgumentCollection.ADD_PG_TAG_TO_READS) { | ||||||
rec.setAttribute(SAMTag.PG.name(), chainedPgIds.get(rec.getStringAttribute(SAMTag.PG.name()))); | ||||||
// Output the record if desired and bump the record index | ||||||
recordInFileIndex++; | ||||||
if (this.REMOVE_DUPLICATES && rec.getDuplicateReadFlag()) { | ||||||
continue; | ||||||
} | ||||||
if (this.REMOVE_SEQUENCING_DUPLICATES && isOpticalDuplicate) { | ||||||
continue; | ||||||
} | ||||||
if (PROGRAM_RECORD_ID != null && pgTagArgumentCollection.ADD_PG_TAG_TO_READS) { | ||||||
rec.setAttribute(SAMTag.PG.name(), chainedPgIds.get(rec.getStringAttribute(SAMTag.PG.name()))); | ||||||
} | ||||||
out.addAlignment(rec); | ||||||
progress.record(rec); | ||||||
} | ||||||
out.addAlignment(rec); | ||||||
progress.record(rec); | ||||||
} | ||||||
|
||||||
// remember to close the inputs | ||||||
iterator.close(); | ||||||
// remember to close the inputs | ||||||
iterator.close(); | ||||||
|
||||||
this.duplicateIndexes.cleanup(); | ||||||
if (TAG_DUPLICATE_SET_MEMBERS) { | ||||||
this.representativeReadIndicesForDuplicates.cleanup(); | ||||||
} | ||||||
this.duplicateIndexes.cleanup(); | ||||||
if (TAG_DUPLICATE_SET_MEMBERS) { | ||||||
this.representativeReadIndicesForDuplicates.cleanup(); | ||||||
} | ||||||
|
||||||
reportMemoryStats("Before output close"); | ||||||
out.close(); | ||||||
reportMemoryStats("Before output close"); | ||||||
} | ||||||
reportMemoryStats("After output close"); | ||||||
|
||||||
// Write out the metrics | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.