Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding mode and 95th percentile to InsertSizeMetrics. #1001

Merged
merged 3 commits into from
Dec 15, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/main/java/picard/analysis/InsertSizeMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ public class InsertSizeMetrics extends MultilevelMetrics {
/** The MEDIAN insert size of all paired end reads where both ends mapped to the same chromosome. */
public double MEDIAN_INSERT_SIZE;

/** The MODE insert size of all paired end reads where both ends mapped to the same chromosome. */
public double MODE_INSERT_SIZE;

/**
* The median absolute deviation of the distribution. If the distribution is essentially normal then
* the standard deviation can be estimated as ~1.4826 * MAD.
Expand Down Expand Up @@ -89,6 +92,8 @@ public class InsertSizeMetrics extends MultilevelMetrics {
public int WIDTH_OF_80_PERCENT;
/** The "width" of the bins, centered around the median, that encompass 90% of all read pairs. */
public int WIDTH_OF_90_PERCENT;
/** The "width" of the bins, centered around the median, that encompass 95% of all read pairs. */
public int WIDTH_OF_95_PERCENT;
/** The "width" of the bins, centered around the median, that encompass 100% of all read pairs. */
public int WIDTH_OF_99_PERCENT;
}
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ public void addMetricsToFile(final MetricsFile<InsertSizeMetrics,Integer> file)
metrics.MAX_INSERT_SIZE = (int) histogram.getMax();
metrics.MIN_INSERT_SIZE = (int) histogram.getMin();
metrics.MEDIAN_INSERT_SIZE = histogram.getMedian();
metrics.MODE_INSERT_SIZE = histogram.getMode();
metrics.MEDIAN_ABSOLUTE_DEVIATION = histogram.getMedianAbsoluteDeviation();

final double median = histogram.getMedian();
Expand Down Expand Up @@ -168,6 +169,7 @@ public void addMetricsToFile(final MetricsFile<InsertSizeMetrics,Integer> file)
if (percentCovered >= 0.7 && metrics.WIDTH_OF_70_PERCENT == 0) metrics.WIDTH_OF_70_PERCENT = distance;
if (percentCovered >= 0.8 && metrics.WIDTH_OF_80_PERCENT == 0) metrics.WIDTH_OF_80_PERCENT = distance;
if (percentCovered >= 0.9 && metrics.WIDTH_OF_90_PERCENT == 0) metrics.WIDTH_OF_90_PERCENT = distance;
if (percentCovered >= 0.95 && metrics.WIDTH_OF_95_PERCENT == 0) metrics.WIDTH_OF_95_PERCENT = distance;
if (percentCovered >= 0.99 && metrics.WIDTH_OF_99_PERCENT == 0) metrics.WIDTH_OF_99_PERCENT = distance;

--low;
Expand Down
96 changes: 96 additions & 0 deletions src/test/java/picard/analysis/CollectInsertSizeMetricsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
*/
package picard.analysis;

import htsjdk.samtools.*;
import htsjdk.samtools.metrics.MetricsFile;
import org.testng.Assert;
import org.testng.annotations.Test;
Expand All @@ -32,6 +33,7 @@
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.List;

/**
* Tests multi-level CollectInsertSizeMetrics
Expand Down Expand Up @@ -67,6 +69,7 @@ public void test() throws IOException {
Assert.assertEquals(metrics.PAIR_ORIENTATION.name(), "FR");
if (metrics.LIBRARY==null) { // SAMPLE or ALL_READS level
Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 41);
Assert.assertEquals((int)metrics.MODE_INSERT_SIZE, 41);
Assert.assertEquals(metrics.MIN_INSERT_SIZE, 36);
Assert.assertEquals(metrics.MAX_INSERT_SIZE, 45);
Assert.assertEquals(metrics.READ_PAIRS, 13);
Expand All @@ -79,11 +82,13 @@ public void test() throws IOException {
Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 9);
Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 11);
Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 11);
Assert.assertEquals(metrics.WIDTH_OF_95_PERCENT, 11);
Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 11);

}
else if (metrics.LIBRARY.equals("Solexa-41753")) { // one LIBRARY and one READ_GROUP
Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 44);
Assert.assertEquals((int)metrics.MODE_INSERT_SIZE, 44);
Assert.assertEquals(metrics.MIN_INSERT_SIZE, 44);
Assert.assertEquals(metrics.MAX_INSERT_SIZE, 44);
Assert.assertEquals(metrics.READ_PAIRS, 2);
Expand All @@ -96,11 +101,13 @@ else if (metrics.LIBRARY.equals("Solexa-41753")) { // one LIBRARY and one READ_G
Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 1);
Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 1);
Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 1);
Assert.assertEquals(metrics.WIDTH_OF_95_PERCENT, 1);
Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 1);

}
else if (metrics.LIBRARY.equals("Solexa-41748") && metrics.READ_GROUP == null) {
Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 40);
Assert.assertEquals((int)metrics.MODE_INSERT_SIZE, 41);
Assert.assertEquals(metrics.MIN_INSERT_SIZE, 36);
Assert.assertEquals(metrics.MAX_INSERT_SIZE, 45);
Assert.assertEquals(metrics.READ_PAIRS, 9);
Expand All @@ -113,10 +120,12 @@ else if (metrics.LIBRARY.equals("Solexa-41748") && metrics.READ_GROUP == null) {
Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 9);
Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 9);
Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 11);
Assert.assertEquals(metrics.WIDTH_OF_95_PERCENT, 11);
Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 11);
}
else if (metrics.LIBRARY.equals("Solexa-41734") && metrics.READ_GROUP == null) {
Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 38);
Assert.assertEquals((int)metrics.MODE_INSERT_SIZE, 36);
Assert.assertEquals(metrics.MIN_INSERT_SIZE, 36);
Assert.assertEquals(metrics.MAX_INSERT_SIZE, 41);
Assert.assertEquals(metrics.READ_PAIRS, 2);
Expand All @@ -129,10 +138,12 @@ else if (metrics.LIBRARY.equals("Solexa-41734") && metrics.READ_GROUP == null) {
Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 7);
Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 7);
Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 7);
Assert.assertEquals(metrics.WIDTH_OF_95_PERCENT, 7);
Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 7);
}
else if (metrics.READ_GROUP.equals("62A79AAXX100907.7")) {
Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 37);
Assert.assertEquals((int)metrics.MODE_INSERT_SIZE, 36);
Assert.assertEquals(metrics.MIN_INSERT_SIZE, 36);
Assert.assertEquals(metrics.MAX_INSERT_SIZE, 41);
Assert.assertEquals(metrics.READ_PAIRS, 4);
Expand All @@ -145,10 +156,12 @@ else if (metrics.READ_GROUP.equals("62A79AAXX100907.7")) {
Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 3);
Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 9);
Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 9);
Assert.assertEquals(metrics.WIDTH_OF_95_PERCENT, 9);
Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 9);
}
else if (metrics.READ_GROUP.equals("62A79AAXX100907.6")) {
Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 41);
Assert.assertEquals((int)metrics.MODE_INSERT_SIZE, 41);
Assert.assertEquals(metrics.MIN_INSERT_SIZE, 38);
Assert.assertEquals(metrics.MAX_INSERT_SIZE, 45);
Assert.assertEquals(metrics.READ_PAIRS, 5);
Expand All @@ -161,10 +174,12 @@ else if (metrics.READ_GROUP.equals("62A79AAXX100907.6")) {
Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 7);
Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 7);
Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 9);
Assert.assertEquals(metrics.WIDTH_OF_95_PERCENT, 9);
Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 9);
}
else if (metrics.READ_GROUP.equals("62A79AAXX100907.5")) {
Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 41);
Assert.assertEquals((int)metrics.MODE_INSERT_SIZE, 41);
Assert.assertEquals(metrics.MIN_INSERT_SIZE, 41);
Assert.assertEquals(metrics.MAX_INSERT_SIZE, 41);
Assert.assertEquals(metrics.READ_PAIRS, 1);
Expand All @@ -177,10 +192,12 @@ else if (metrics.READ_GROUP.equals("62A79AAXX100907.5")) {
Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 1);
Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 1);
Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 1);
Assert.assertEquals(metrics.WIDTH_OF_95_PERCENT, 1);
Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 1);
}
else if (metrics.READ_GROUP.equals("62A79AAXX100907.3")) {
Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 36);
Assert.assertEquals((int)metrics.MODE_INSERT_SIZE, 36);
Assert.assertEquals(metrics.MIN_INSERT_SIZE, 36);
Assert.assertEquals(metrics.MAX_INSERT_SIZE, 36);
Assert.assertEquals(metrics.READ_PAIRS, 1);
Expand All @@ -193,6 +210,7 @@ else if (metrics.READ_GROUP.equals("62A79AAXX100907.3")) {
Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 1);
Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 1);
Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 1);
Assert.assertEquals(metrics.WIDTH_OF_95_PERCENT, 1);
Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 1);
}
else {
Expand Down Expand Up @@ -242,4 +260,82 @@ public void testMultipleOrientationsForHistogram() throws IOException {

Assert.assertEquals(rResult, 0);
}

@Test
public void testWdithOfMetrics() throws IOException {
final File testSamFile = File.createTempFile("CollectInsertSizeMetrics", ".bam", TEST_DATA_DIR);
testSamFile.deleteOnExit();

final SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate, true, 100);
setBuilder.setReadLength(10);

final int insertBy = 3; // the # of bases to increase the insert by in the records below.
int queryIndex = 0;

// Create records such that we have 10 records in the 10th through the 90th percentiles, and 5 for the 95th and 99th percentiles.
// WIDTH_OF_10_PERCENT through WIDTH_OF_90_PERCENT (90 pairs total))
for (int j = 0; j < 9; j++) {
for (int i = 0; i < 5; i++, queryIndex++) {
setBuilder.addPair("query:" + queryIndex, 0, 1, 50 + j*insertBy, false, false, "10M", "10M", false, true, 60);
}
for (int i = 0; i < 5; i++, queryIndex++) {
setBuilder.addPair("query:" + queryIndex, 0, 1, 50 - j*insertBy, false, false, "10M", "10M", false, true, 60);
}
}
// WIDTH_OF_95_PERCENT through WIDTH_OF_99_PERCENT (10 pairs total)
for (int j = 9; j < 11; j++) {
for (int i = 0; i < 3; i++, queryIndex++) {
setBuilder.addPair("query:" + queryIndex, 0, 1, 50 + j*insertBy, false, false, "10M", "10M", false, true, 60);
}
for (int i = 0; i < 2; i++, queryIndex++) {
setBuilder.addPair("query:" + queryIndex, 0, 1, 50 - j*insertBy, false, false, "10M", "10M", false, true, 60);
}
}

// Add one to make the an odd # of pairs for the median
setBuilder.addPair("query:" + queryIndex, 0, 1, 50, false, false, "10M", "10M", false, true, 60);

final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(setBuilder.getHeader(), false, testSamFile);
setBuilder.forEach(writer::addAlignment);
writer.close();


final File outfile = File.createTempFile("test", ".insert_size_metrics");
final File pdf = File.createTempFile("test", ".pdf");
outfile.deleteOnExit();
pdf.deleteOnExit();
final String[] args = new String[] {
"INPUT=" + testSamFile.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"Histogram_FILE=" + pdf.getAbsolutePath()
};
Assert.assertEquals(runPicardCommandLine(args), 0);

final MetricsFile<InsertSizeMetrics, Comparable<?>> output = new MetricsFile<InsertSizeMetrics, Comparable<?>>();
output.read(new FileReader(outfile));

final List<InsertSizeMetrics> metrics = output.getMetrics();

Assert.assertEquals(metrics.size(), 1);

final InsertSizeMetrics metric = metrics.get(0);

Assert.assertEquals(metric.PAIR_ORIENTATION.name(), "FR");
Assert.assertEquals((int) metric.MEDIAN_INSERT_SIZE, 59);
Assert.assertEquals((int) metric.MODE_INSERT_SIZE, 59);
Assert.assertEquals(metric.MIN_INSERT_SIZE, 29);
Assert.assertEquals(metric.MAX_INSERT_SIZE, 89);
Assert.assertEquals(metric.READ_PAIRS, 101);
Assert.assertEquals(metric.WIDTH_OF_10_PERCENT, 1);
Assert.assertEquals(metric.WIDTH_OF_20_PERCENT, 1 + insertBy*2);
Assert.assertEquals(metric.WIDTH_OF_30_PERCENT, 1 + insertBy*4);
Assert.assertEquals(metric.WIDTH_OF_40_PERCENT, 1 + insertBy*6);
Assert.assertEquals(metric.WIDTH_OF_50_PERCENT, 1 + insertBy*8);
Assert.assertEquals(metric.WIDTH_OF_60_PERCENT, 1 + insertBy*10);
Assert.assertEquals(metric.WIDTH_OF_70_PERCENT, 1 + insertBy*12);
Assert.assertEquals(metric.WIDTH_OF_80_PERCENT, 1 + insertBy*14);
Assert.assertEquals(metric.WIDTH_OF_90_PERCENT, 1 + insertBy*16);
Assert.assertEquals(metric.WIDTH_OF_95_PERCENT, 1 + insertBy*18);
Assert.assertEquals(metric.WIDTH_OF_99_PERCENT, 1 + insertBy*20);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Look Ma, different values for each!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

}
}