Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Protection against using the wrong sample alias which produces zero L… #1242

Merged
merged 6 commits into from
Oct 23, 2018
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 14 additions & 5 deletions src/main/java/picard/fingerprint/CheckFingerprint.java
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,9 @@ protected int doWork() {
outputDetailMetricsFile = DETAIL_OUTPUT;
outputSummaryMetricsFile = SUMMARY_OUTPUT;
} else {
if (!OUTPUT.endsWith(".")) OUTPUT = OUTPUT + ".";
if (!OUTPUT.endsWith(".")) {
OUTPUT = OUTPUT + ".";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
OUTPUT = OUTPUT + ".";
OUTPUT += ".";

}
outputDetailMetricsFile = new File(OUTPUT + FINGERPRINT_DETAIL_FILE_SUFFIX);
outputSummaryMetricsFile = new File(OUTPUT + FINGERPRINT_SUMMARY_FILE_SUFFIX);
}
Expand Down Expand Up @@ -311,6 +313,7 @@ protected int doWork() {
final MetricsFile<FingerprintingSummaryMetrics, ?> summaryFile = getMetricsFile();
final MetricsFile<FingerprintingDetailMetrics, ?> detailsFile = getMetricsFile();

boolean anyNonzeroLod = false;
for (final FingerprintResults fpr : results) {
final MatchResults mr = fpr.getMatchResults().first();

Expand Down Expand Up @@ -365,11 +368,21 @@ protected int doWork() {

summaryFile.addMetric(metrics);
log.info("Read Group: " + metrics.READ_GROUP + " / " + observedSampleAlias + " vs. " + metrics.SAMPLE + ": LOD = " + metrics.LOD_EXPECTED_SAMPLE);
if (metrics.LOD_EXPECTED_SAMPLE != 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could write

anyNonzeroLod |= metrics.LOD_EXPECTED_SAMPLE != 0;

(also, I think the proper spelling is anyNonZeroLod as non-zero is a hyphenated word)

Conversely avoiding negatives would make this a bit nicer, IMO, e.g.

allZeroLod &= metrics.LOD_EXPECTED_SAMPLE == 0;
...
if (allZeroLod) {

I find reading code with double negatives like !anyNonZeroLod to be unnecessarily confusing. The same is true of the text No Non-zero results found.

anyNonzeroLod = true;
}
}

summaryFile.write(outputSummaryMetricsFile);
detailsFile.write(outputDetailMetricsFile);

if (!anyNonzeroLod) {
log.error("No non-zero results found. This is likely an error. " +
"Probable cause: EXPECTED_SAMPLE (if provided) or the sample name from INPUT (if EXPECTED_SAMPLE isn't provided)" +
"isn't a sample in GENOTYPES file.");
return 1;
}

return 0;
}

Expand All @@ -389,10 +402,6 @@ protected String[] customCommandLineValidation() {
return super.customCommandLineValidation();
}

static boolean isBamOrSam(final File f) {
return isBamOrSam(f.toPath());
}

static boolean isBamOrSam(final Path p) {
return (p.toUri().getRawPath().endsWith(BamFileIoUtils.BAM_FILE_EXTENSION) || p.toUri().getRawPath().endsWith(IOUtil.SAM_FILE_EXTENSION));
}
Expand Down
198 changes: 99 additions & 99 deletions src/main/java/picard/sam/markduplicates/MarkDuplicates.java
Original file line number Diff line number Diff line change
Expand Up @@ -268,38 +268,38 @@ protected int doWork() {
// Key: previous PG ID on a SAM Record (or null). Value: New PG ID to replace it.
final Map<String, String> chainedPgIds = getChainedPgIds(outputHeader);

final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader,
try(SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
try(SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader,
try (SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader,

true,
OUTPUT);

// Now copy over the file while marking all the necessary indexes as duplicates
long recordInFileIndex = 0;
long nextOpticalDuplicateIndex = this.opticalDuplicateIndexes != null && this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : NO_SUCH_INDEX;
long nextDuplicateIndex = (this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : NO_SUCH_INDEX);

// initialize variables for optional representative read tagging
CloseableIterator<RepresentativeReadIndexer> representativeReadIterator = null;
RepresentativeReadIndexer rri = null;
int representativeReadIndexInFile = -1;
int duplicateSetSize = -1;
int nextRepresentativeIndex = -1;
if (TAG_DUPLICATE_SET_MEMBERS) {
representativeReadIterator = this.representativeReadIndicesForDuplicates.iterator();
if (representativeReadIterator.hasNext()) {
rri = representativeReadIterator.next();
nextRepresentativeIndex = rri.readIndexInFile;
representativeReadIndexInFile = rri.representativeReadIndexInFile;
duplicateSetSize = rri.setSize;
OUTPUT)) {

// Now copy over the file while marking all the necessary indexes as duplicates
long recordInFileIndex = 0;
long nextOpticalDuplicateIndex = this.opticalDuplicateIndexes != null && this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : NO_SUCH_INDEX;
long nextDuplicateIndex = (this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : NO_SUCH_INDEX);

// initialize variables for optional representative read tagging
CloseableIterator<RepresentativeReadIndexer> representativeReadIterator = null;
RepresentativeReadIndexer rri = null;
int representativeReadIndexInFile = -1;
int duplicateSetSize = -1;
int nextRepresentativeIndex = -1;
if (TAG_DUPLICATE_SET_MEMBERS) {
representativeReadIterator = this.representativeReadIndicesForDuplicates.iterator();
if (representativeReadIterator.hasNext()) {
rri = representativeReadIterator.next();
nextRepresentativeIndex = rri.readIndexInFile;
representativeReadIndexInFile = rri.representativeReadIndexInFile;
duplicateSetSize = rri.setSize;
}
}
}

final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Written");
final CloseableIterator<SAMRecord> iterator = headerAndIterator.iterator;
String duplicateQueryName = null;
String opticalDuplicateQueryName = null;
final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Written");
final CloseableIterator<SAMRecord> iterator = headerAndIterator.iterator;
String duplicateQueryName = null;
String opticalDuplicateQueryName = null;

while (iterator.hasNext()) {
final SAMRecord rec = iterator.next();
while (iterator.hasNext()) {
final SAMRecord rec = iterator.next();

final String library = LibraryIdGenerator.getLibraryName(header, rec);
DuplicationMetrics metrics = libraryIdGenerator.getMetricsByLibrary(library);
Expand All @@ -312,26 +312,26 @@ protected int doWork() {
// First bring the simple metrics up to date
if (rec.getReadUnmappedFlag()) {
++metrics.UNMAPPED_READS;
} else if(rec.isSecondaryOrSupplementary()) {
} else if (rec.isSecondaryOrSupplementary()) {
++metrics.SECONDARY_OR_SUPPLEMENTARY_RDS;
} else if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) {
++metrics.UNPAIRED_READS_EXAMINED;
} else {
++metrics.READ_PAIRS_EXAMINED; // will need to be divided by 2 at the end
}

// Now try and figure out the next duplicate index (if going by coordinate. if going by query name, only do this
// if the query name has changed.
final boolean needNextDuplicateIndex = recordInFileIndex > nextDuplicateIndex &&
(sortOrder == SAMFileHeader.SortOrder.coordinate || !rec.getReadName().equals(duplicateQueryName));
// Now try and figure out the next duplicate index (if going by coordinate. if going by query name, only do this
// if the query name has changed.
final boolean needNextDuplicateIndex = recordInFileIndex > nextDuplicateIndex &&
(sortOrder == SAMFileHeader.SortOrder.coordinate || !rec.getReadName().equals(duplicateQueryName));

if (needNextDuplicateIndex) {
if (needNextDuplicateIndex) {
nextDuplicateIndex = (this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : NO_SUCH_INDEX);
}
}

final boolean isDuplicate = recordInFileIndex == nextDuplicateIndex ||
(sortOrder == SAMFileHeader.SortOrder.queryname &&
recordInFileIndex > nextDuplicateIndex && rec.getReadName().equals(duplicateQueryName));
final boolean isDuplicate = recordInFileIndex == nextDuplicateIndex ||
(sortOrder == SAMFileHeader.SortOrder.queryname &&
recordInFileIndex > nextDuplicateIndex && rec.getReadName().equals(duplicateQueryName));

if (isDuplicate) {
duplicateQueryName = rec.getReadName();
Expand All @@ -350,81 +350,81 @@ protected int doWork() {
rec.setDuplicateReadFlag(false);
}

// Manage the flagging of optical/sequencing duplicates
final boolean needNextOpticalDuplicateIndex = recordInFileIndex > nextOpticalDuplicateIndex &&
(sortOrder == SAMFileHeader.SortOrder.coordinate || !rec.getReadName().equals(opticalDuplicateQueryName));

// Possibly figure out the next opticalDuplicate index (if going by coordinate, if going by query name, only do this
// if the query name has changed)
if (needNextOpticalDuplicateIndex) {
nextOpticalDuplicateIndex = (this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : NO_SUCH_INDEX);
}
// Manage the flagging of optical/sequencing duplicates
final boolean needNextOpticalDuplicateIndex = recordInFileIndex > nextOpticalDuplicateIndex &&
(sortOrder == SAMFileHeader.SortOrder.coordinate || !rec.getReadName().equals(opticalDuplicateQueryName));

final boolean isOpticalDuplicate = sortOrder == SAMFileHeader.SortOrder.queryname &&
recordInFileIndex > nextOpticalDuplicateIndex &&
rec.getReadName().equals(opticalDuplicateQueryName) ||
recordInFileIndex == nextOpticalDuplicateIndex;
// Possibly figure out the next opticalDuplicate index (if going by coordinate, if going by query name, only do this
// if the query name has changed)
if (needNextOpticalDuplicateIndex) {
nextOpticalDuplicateIndex = (this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : NO_SUCH_INDEX);
}

if (CLEAR_DT) {
rec.setAttribute(DUPLICATE_TYPE_TAG, null);
}
final boolean isOpticalDuplicate = sortOrder == SAMFileHeader.SortOrder.queryname &&
recordInFileIndex > nextOpticalDuplicateIndex &&
rec.getReadName().equals(opticalDuplicateQueryName) ||
recordInFileIndex == nextOpticalDuplicateIndex;

if (this.TAGGING_POLICY != DuplicateTaggingPolicy.DontTag && rec.getDuplicateReadFlag()) {
if (isOpticalDuplicate) {
opticalDuplicateQueryName = rec.getReadName();
rec.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.SEQUENCING.code());
} else if (this.TAGGING_POLICY == DuplicateTaggingPolicy.All) {
rec.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.LIBRARY.code());
if (CLEAR_DT) {
rec.setAttribute(DUPLICATE_TYPE_TAG, null);
}
}

// Tag any read pair that was in a duplicate set with the duplicate set size and a representative read name
if (TAG_DUPLICATE_SET_MEMBERS) {
final boolean needNextRepresentativeIndex = recordInFileIndex > nextRepresentativeIndex;
if (needNextRepresentativeIndex && representativeReadIterator.hasNext()) {
rri = representativeReadIterator.next();
nextRepresentativeIndex = rri.readIndexInFile;
representativeReadIndexInFile = rri.representativeReadIndexInFile;
duplicateSetSize = rri.setSize;
if (this.TAGGING_POLICY != DuplicateTaggingPolicy.DontTag && rec.getDuplicateReadFlag()) {
if (isOpticalDuplicate) {
opticalDuplicateQueryName = rec.getReadName();
rec.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.SEQUENCING.code());
} else if (this.TAGGING_POLICY == DuplicateTaggingPolicy.All) {
rec.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.LIBRARY.code());
}
}
final boolean isInDuplicateSet = recordInFileIndex == nextRepresentativeIndex ||
(sortOrder == SAMFileHeader.SortOrder.queryname &&
recordInFileIndex > nextDuplicateIndex);
if (isInDuplicateSet) {
if (!rec.isSecondaryOrSupplementary() && !rec.getReadUnmappedFlag()) {
if (TAG_DUPLICATE_SET_MEMBERS) {
rec.setAttribute(DUPLICATE_SET_INDEX_TAG, representativeReadIndexInFile);
rec.setAttribute(DUPLICATE_SET_SIZE_TAG, duplicateSetSize);

// Tag any read pair that was in a duplicate set with the duplicate set size and a representative read name
if (TAG_DUPLICATE_SET_MEMBERS) {
final boolean needNextRepresentativeIndex = recordInFileIndex > nextRepresentativeIndex;
if (needNextRepresentativeIndex && representativeReadIterator.hasNext()) {
rri = representativeReadIterator.next();
nextRepresentativeIndex = rri.readIndexInFile;
representativeReadIndexInFile = rri.representativeReadIndexInFile;
duplicateSetSize = rri.setSize;
}
final boolean isInDuplicateSet = recordInFileIndex == nextRepresentativeIndex ||
(sortOrder == SAMFileHeader.SortOrder.queryname &&
recordInFileIndex > nextDuplicateIndex);
if (isInDuplicateSet) {
if (!rec.isSecondaryOrSupplementary() && !rec.getReadUnmappedFlag()) {
if (TAG_DUPLICATE_SET_MEMBERS) {
rec.setAttribute(DUPLICATE_SET_INDEX_TAG, representativeReadIndexInFile);
rec.setAttribute(DUPLICATE_SET_SIZE_TAG, duplicateSetSize);
}
}
}
}
}

// Output the record if desired and bump the record index
recordInFileIndex++;
if (this.REMOVE_DUPLICATES && rec.getDuplicateReadFlag()) {
continue;
}
if (this.REMOVE_SEQUENCING_DUPLICATES && isOpticalDuplicate) {
continue;
}
if (PROGRAM_RECORD_ID != null && pgTagArgumentCollection.ADD_PG_TAG_TO_READS) {
rec.setAttribute(SAMTag.PG.name(), chainedPgIds.get(rec.getStringAttribute(SAMTag.PG.name())));
// Output the record if desired and bump the record index
recordInFileIndex++;
if (this.REMOVE_DUPLICATES && rec.getDuplicateReadFlag()) {
continue;
}
if (this.REMOVE_SEQUENCING_DUPLICATES && isOpticalDuplicate) {
continue;
}
if (PROGRAM_RECORD_ID != null && pgTagArgumentCollection.ADD_PG_TAG_TO_READS) {
rec.setAttribute(SAMTag.PG.name(), chainedPgIds.get(rec.getStringAttribute(SAMTag.PG.name())));
}
out.addAlignment(rec);
progress.record(rec);
}
out.addAlignment(rec);
progress.record(rec);
}

// remember to close the inputs
iterator.close();
// remember to close the inputs
iterator.close();

this.duplicateIndexes.cleanup();
if (TAG_DUPLICATE_SET_MEMBERS) {
this.representativeReadIndicesForDuplicates.cleanup();
}
this.duplicateIndexes.cleanup();
if (TAG_DUPLICATE_SET_MEMBERS) {
this.representativeReadIndicesForDuplicates.cleanup();
}

reportMemoryStats("Before output close");
out.close();
reportMemoryStats("Before output close");
}
reportMemoryStats("After output close");

// Write out the metrics
Expand Down