Permalink
Browse files

Adding custom adapter pairs to IlluminaBasecallsToSam (#795)

  • Loading branch information...
1 parent 3c23ba9 commit 7a85c957ab05af1bf4a5b227325ca77a5fcd64c4 @nh13 nh13 committed on GitHub Apr 19, 2017
@@ -89,7 +89,7 @@
public ClusterDataToSamConverter(final String runBarcode,
final String readGroupId,
final ReadStructure readStructure,
- final List<IlluminaUtil.IlluminaAdapterPair> adapters) {
+ final List<AdapterPair> adapters) {
this.readGroupId = readGroupId;
this.readNameEncoder = new IlluminaReadNameEncoder(runBarcode);
@@ -0,0 +1,63 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.illumina;
+
+import htsjdk.samtools.util.SequenceUtil;
+import htsjdk.samtools.util.StringUtil;
+import picard.util.AdapterPair;
+
+class CustomAdapterPair implements AdapterPair {
+
+ private final String fivePrime, threePrime, fivePrimeReadOrder;
+ private final byte[] fivePrimeBytes, threePrimeBytes, fivePrimeReadOrderBytes;
+
+ CustomAdapterPair(final String fivePrime, final String threePrime) {
+ this.threePrime = threePrime;
+ this.threePrimeBytes = StringUtil.stringToBytes(threePrime);
+
+ this.fivePrime = fivePrime;
+ this.fivePrimeReadOrder = SequenceUtil.reverseComplement(fivePrime);
+ this.fivePrimeBytes = StringUtil.stringToBytes(fivePrime);
+ this.fivePrimeReadOrderBytes = StringUtil.stringToBytes(fivePrimeReadOrder);
+ }
+
+ public String get3PrimeAdapter() { return threePrime; }
+
+ public String get5PrimeAdapter() { return fivePrime; }
+
+ public String get3PrimeAdapterInReadOrder() { return threePrime; }
+
+ public String get5PrimeAdapterInReadOrder() { return fivePrimeReadOrder; }
+
+ public byte[] get3PrimeAdapterBytes() { return threePrimeBytes; }
+
+ public byte[] get5PrimeAdapterBytes() { return fivePrimeBytes; }
+
+ public byte[] get3PrimeAdapterBytesInReadOrder() { return threePrimeBytes; }
+
+ public byte[] get5PrimeAdapterBytesInReadOrder() { return fivePrimeReadOrderBytes; }
+
+ public String getName() { return "Custom adapter pair"; }
+}
@@ -150,12 +150,9 @@
mutex = {"OUTPUT_PREFIX"})
public File MULTIPLEX_PARAMS;
- @Option(doc = "Which adapters to look for in the read.")
- public List<IlluminaUtil.IlluminaAdapterPair> ADAPTERS_TO_CHECK = new ArrayList<>(
- Arrays.asList(IlluminaUtil.IlluminaAdapterPair.INDEXED,
- IlluminaUtil.IlluminaAdapterPair.DUAL_INDEXED,
- IlluminaUtil.IlluminaAdapterPair.NEXTERA_V2,
- IlluminaUtil.IlluminaAdapterPair.FLUIDIGM));
+ @Deprecated
+ @Option(doc = "Deprecated (No longer used). Which adapters to look for in the read.")
+ public List<IlluminaUtil.IlluminaAdapterPair> ADAPTERS_TO_CHECK = null;
@Option(doc = "The number of threads to run in parallel. If NUM_PROCESSORS = 0, number of cores is automatically set to " +
"the number of cores available on the machine. If NUM_PROCESSORS < 0, then the number of cores used will" +
@@ -233,6 +230,10 @@ protected int doWork() {
if (READ_NAME_FORMAT == ReadNameFormat.CASAVA_1_8 && FLOWCELL_BARCODE == null) {
errors.add("FLOWCELL_BARCODE is required when using Casava1.8-style read name headers.");
}
+
+ if (ADAPTERS_TO_CHECK != null) {
+ log.warn("ADAPTERS_TO_CHECK is not used");
+ }
if (errors.isEmpty()) {
return null;
@@ -45,6 +45,7 @@
import picard.cmdline.StandardOptionDefinitions;
import picard.illumina.parser.ReadStructure;
import picard.illumina.parser.readers.BclQualityEvaluationStrategy;
+import picard.util.AdapterPair;
import picard.util.IlluminaUtil;
import picard.util.IlluminaUtil.IlluminaAdapterPair;
import picard.util.TabbedTextFileWithHeaderParser;
@@ -201,6 +202,12 @@
IlluminaAdapterPair.NEXTERA_V2,
IlluminaAdapterPair.FLUIDIGM));
+ @Option(doc = "For specifying adapters other than standard Illumina", optional = true)
+ public String FIVE_PRIME_ADAPTER;
+
+ @Option(doc = "For specifying adapters other than standard Illumina", optional = true)
+ public String THREE_PRIME_ADAPTER;
+
@Option(doc = "The number of threads to run in parallel. If NUM_PROCESSORS = 0, number of cores is automatically set to " +
"the number of cores available on the machine. If NUM_PROCESSORS < 0, then the number of cores used will" +
" be the number available on the machine less NUM_PROCESSORS.")
@@ -289,12 +296,19 @@ private void initialize() {
log.info("DONE_READING STRUCTURE IS " + readStructure.toString());
+ // Combine any adapters and custom adapter pairs from the command line into an array for use in clipping
+ final List<AdapterPair> adapters = new ArrayList<>();
+ adapters.addAll(ADAPTERS_TO_CHECK);
+ if (FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER != null) {
+ adapters.add(new CustomAdapterPair(FIVE_PRIME_ADAPTER, THREE_PRIME_ADAPTER));
+ }
+
/**
* Be sure to pass the outputReadStructure to ClusterDataToSamConverter, which reflects the structure of the output cluster
* data which may be different from the input read structure (specifically if there are skips).
*/
final ClusterDataToSamConverter converter = new ClusterDataToSamConverter(RUN_BARCODE, READ_GROUP_ID,
- basecallsConverter.getFactory().getOutputReadStructure(), ADAPTERS_TO_CHECK)
+ basecallsConverter.getFactory().getOutputReadStructure(), adapters)
.withMolecularIndexTag(MOLECULAR_INDEX_TAG)
.withMolecularIndexQualityTag(MOLECULAR_INDEX_BASE_QUALITY_TAG)
.withTagPerMolecularIndex(TAG_PER_MOLECULAR_INDEX);
@@ -492,6 +506,10 @@ public static void main(final String[] args) {
messages.add("The number of tags given in TAG_PER_MOLECULAR_INDEX does not match the number of molecular indexes in READ_STRUCTURE");
}
+ if ((FIVE_PRIME_ADAPTER == null) != (THREE_PRIME_ADAPTER == null)) {
+ messages.add("THREE_PRIME_ADAPTER and FIVE_PRIME_ADAPTER must either both be null or both be set.");
+ }
+
if (messages.isEmpty()) {
return null;
}
@@ -146,7 +146,7 @@ public static void main(final String[] args) {
@Override
protected String[] customCommandLineValidation() {
if ((FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER == null) || (THREE_PRIME_ADAPTER != null && FIVE_PRIME_ADAPTER == null)) {
- return new String[]{"Either both or neither of THREE_PRIME_ADAPTER and FIVE_PRIME_ADAPTER must be set."};
+ return new String[]{"THREE_PRIME_ADAPTER and FIVE_PRIME_ADAPTER must either both be null or both be set."};
} else {
return null;
}
@@ -251,38 +251,4 @@ protected int doWork() {
CloserUtil.close(in);
return 0;
}
-
- private final class CustomAdapterPair implements AdapterPair {
-
- final String fivePrime, threePrime, fivePrimeReadOrder;
- final byte[] fivePrimeBytes, threePrimeBytes, fivePrimeReadOrderBytes;
-
- private CustomAdapterPair(final String fivePrime, final String threePrime) {
- this.threePrime = threePrime;
- this.threePrimeBytes = StringUtil.stringToBytes(threePrime);
-
- this.fivePrime = fivePrime;
- this.fivePrimeReadOrder = SequenceUtil.reverseComplement(fivePrime);
- this.fivePrimeBytes = StringUtil.stringToBytes(fivePrime);
- this.fivePrimeReadOrderBytes = StringUtil.stringToBytes(fivePrimeReadOrder);
- }
-
- public String get3PrimeAdapter() { return threePrime; }
-
- public String get5PrimeAdapter() { return fivePrime; }
-
- public String get3PrimeAdapterInReadOrder() { return threePrime; }
-
- public String get5PrimeAdapterInReadOrder() { return fivePrimeReadOrder; }
-
- public byte[] get3PrimeAdapterBytes() { return threePrimeBytes; }
-
- public byte[] get5PrimeAdapterBytes() { return fivePrimeBytes; }
-
- public byte[] get3PrimeAdapterBytesInReadOrder() { return threePrimeBytes; }
-
- public byte[] get5PrimeAdapterBytesInReadOrder() { return fivePrimeReadOrderBytes; }
-
- public String getName() { return "Custom adapter pair"; }
- }
}
@@ -27,12 +27,16 @@
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.util.CollectionUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.cmdline.CommandLineProgramTest;
import java.io.File;
+import java.util.List;
+
+import static picard.util.IlluminaUtil.IlluminaAdapterPair.*;
/**
@@ -52,22 +56,28 @@ public String getCommandLineProgramName() {
* Run IlluminaBasecallsToSam on a few test cases, and verify that results agree with hand-checked expectation.
*/
@Test(dataProvider="data")
- public void testBasic(final String LANE, final String readStructure) throws Exception {
+ public void testBasic(final String LANE, final String readStructure,
+ final String fivePrimerAdapter, final String threePrimerAdapter,
+ final int expectedNumClippedRecords) throws Exception {
// Create the SAM file from Gerald output
final File samFile = File.createTempFile("." + LANE + ".illuminaBasecallsToSam", ".sam");
samFile.deleteOnExit();
- final String[] illuminaArgv = {
- "BASECALLS_DIR=" + TEST_DATA_DIR,
+ final List<String> illuminaArgv = CollectionUtil.makeList("BASECALLS_DIR=" + TEST_DATA_DIR,
"LANE=" + LANE,
"RUN_BARCODE=" + RUN_BARCODE,
"READ_STRUCTURE=" + readStructure,
"OUTPUT=" + samFile,
"ALIAS=" + ALIAS
- };
+ );
+ if (fivePrimerAdapter != null) {
+ illuminaArgv.addAll(CollectionUtil.makeList(
+ "ADAPTERS_TO_CHECK=null",
+ "FIVE_PRIME_ADAPTER=" + fivePrimerAdapter,
+ "THREE_PRIME_ADAPTER=" + threePrimerAdapter
+ ));
+ }
Assert.assertEquals(runPicardCommandLine(illuminaArgv), 0);
- System.out.println ("Ouput Sam file is in " + samFile.getAbsolutePath());
-
// Read the file and confirm it contains what is expected
final SamReader samReader = SamReaderFactory.makeDefault().open(samFile);
@@ -83,17 +93,40 @@ public void testBasic(final String LANE, final String readStructure) throws Exce
}
}
}
+
+ // Check the total number of clipped records
+ Assert.assertEquals(count, expectedNumClippedRecords);
+
samReader.close();
samFile.delete();
}
@DataProvider(name="data")
private Object[][] getIlluminaBasecallsToSamTestData(){
return new Object[][] {
- {"1", "125T125T"},
- {"2", "125T125T"},
- };
- }
+ // Use the default set of adapters
+ {"1", "125T125T", null, null, 32},
+ {"2", "125T125T", null, null, 108},
+
+ // Use adapters that don't match
+ {"1", "125T125T", "ACGTACGTACGTACGT", "ACGTACGTACGTACGT", 0},
+ {"2", "125T125T", "ACGTACGTACGTACGT", "ACGTACGTACGTACGT", 0},
+
+ // Add just the "nextera v2" adapters, which should not match
+ {"1", "125T125T", NEXTERA_V2.get5PrimeAdapter(), NEXTERA_V2.get3PrimeAdapter(), 0},
+ {"2", "125T125T", NEXTERA_V2.get5PrimeAdapter(), NEXTERA_V2.get3PrimeAdapter(), 0},
+ // Add just the "fludigm" adapters, which should not match
+ {"1", "125T125T", FLUIDIGM.get5PrimeAdapter(), FLUIDIGM.get3PrimeAdapter(), 0},
+ {"2", "125T125T", FLUIDIGM.get5PrimeAdapter(), FLUIDIGM.get3PrimeAdapter(), 0},
+ // Add just the "dual indexed" adapters, which should match
+ {"1", "125T125T", DUAL_INDEXED.get5PrimeAdapter(), DUAL_INDEXED.get3PrimeAdapter(), 32},
+ {"2", "125T125T", DUAL_INDEXED.get5PrimeAdapter(), DUAL_INDEXED.get3PrimeAdapter(), 108},
+
+ // Add just the "indexed" adapters, which should match
+ {"1", "125T125T", INDEXED.get5PrimeAdapter(), INDEXED.get3PrimeAdapter(), 32},
+ {"2", "125T125T", INDEXED.get5PrimeAdapter(), INDEXED.get3PrimeAdapter(), 108}
+ };
+ }
}

0 comments on commit 7a85c95

Please sign in to comment.