Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring #1868

Merged
merged 5 commits into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
27 changes: 17 additions & 10 deletions src/main/java/picard/illumina/IlluminaBasecallsToFastq.java
Original file line number Diff line number Diff line change
Expand Up @@ -441,23 +441,30 @@ private Writer<ClusterData> buildWriter(final File outputPrefix, final int numSa
final File[] templateFiles = new File[inputReadStructure.templates.length()];
final File[] sampleBarcodeFiles = new File[inputReadStructure.sampleBarcodes.length()];
final File[] molecularBarcodeFiles = new File[inputReadStructure.molecularBarcode.length()];
final String templateFormat = "%s.%d.%s";
final String sampleBarcodeFormat = "%s.barcode_%d.%s";
final String molecularBarcodeFormat = "%s.index_%d.%s";

for (int i = 0; i < templateFiles.length; ++i) {
templateFiles[i] = new File(outputDir, String.format("%s.%d.%s", prefixString, i + 1, suffixString));
}
// write templateFiles
writeFileWithFormat(outputDir, templateFormat, prefixString, suffixString, templateFiles);

for (int i = 0; i < sampleBarcodeFiles.length; ++i) {
sampleBarcodeFiles[i] = new File(outputDir, String.format("%s.barcode_%d.%s", prefixString, i + 1, suffixString));
}
// write sampleBarcodeFiles
writeFileWithFormat(outputDir, sampleBarcodeFormat, prefixString, suffixString, sampleBarcodeFiles);

for (int i = 0; i < molecularBarcodeFiles.length; ++i) {
molecularBarcodeFiles[i] = new File(outputDir, String.format("%s.index_%d.%s", prefixString, i + 1, suffixString));
}
// write molecularBarcodeFiles
writeFileWithFormat(outputDir, molecularBarcodeFormat, prefixString, suffixString, molecularBarcodeFiles);

int queueSize = (MAX_RECORDS_IN_RAM / 2) / numSamples;
return writerPool.pool(new ClusterToFastqWriter(templateFiles, sampleBarcodeFiles, molecularBarcodeFiles, TRIMMING_QUALITY, adapters), new LinkedBlockingQueue<>(queueSize), (int) (queueSize * 0.5));
}

/**
* A separate method to write the different types of files in desired format
*/
private void writeFileWithFormat(File outputDir, String format,String prefixString, String suffixString, File[] files) {
for (int i = 0; i < files.length; ++i) {
files[i] = new File(outputDir, String.format(format, prefixString, i + 1, suffixString));
}
}
/**
* Trivial class to avoid converting ClusterData to another type when not sorting outputs.
*/
Expand Down
12 changes: 10 additions & 2 deletions src/main/java/picard/illumina/MarkIlluminaAdapters.java
Original file line number Diff line number Diff line change
Expand Up @@ -140,13 +140,17 @@ public class MarkIlluminaAdapters extends CommandLineProgram {

@Override
protected String[] customCommandLineValidation() {
if ((FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER == null) || (THREE_PRIME_ADAPTER != null && FIVE_PRIME_ADAPTER == null)) {
if (hasEitherAdapter()) {
return new String[]{"THREE_PRIME_ADAPTER and FIVE_PRIME_ADAPTER must either both be null or both be set."};
} else {
return null;
}
}

private boolean hasEitherAdapter() {
return (FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER == null) || (THREE_PRIME_ADAPTER != null && FIVE_PRIME_ADAPTER == null);
}

@Override
protected int doWork() {
IOUtil.assertFileIsReadable(INPUT);
Expand All @@ -167,7 +171,7 @@ protected int doWork() {
{
final List<AdapterPair> tmp = new ArrayList<AdapterPair>();
tmp.addAll(ADAPTERS);
if (FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER != null) {
if (hasBothAdapters()) {
tmp.add(new CustomAdapterPair(FIVE_PRIME_ADAPTER, THREE_PRIME_ADAPTER));
}
adapters = tmp.toArray(new AdapterPair[tmp.size()]);
Expand Down Expand Up @@ -246,4 +250,8 @@ protected int doWork() {
CloserUtil.close(in);
return 0;
}

private boolean hasBothAdapters(){
return FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER != null;
}
}
5 changes: 4 additions & 1 deletion src/main/java/picard/sam/markduplicates/MarkDuplicates.java
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,7 @@ public ReadEndsForMarkDuplicates buildReadEnds(final SAMFileHeader header, final
* Goes through the accumulated ReadEndsForMarkDuplicates objects and determines which of them are
* to be marked as duplicates.
*/
public void generateDuplicateIndexes(final boolean useBarcodes, final boolean indexOpticalDuplicates) {
protected void sortIndicesForDuplicates(final boolean indexOpticalDuplicates){
final int entryOverhead;
if (TAG_DUPLICATE_SET_MEMBERS) {
// Memory requirements for RepresentativeReadIndexer:
Expand All @@ -735,6 +735,9 @@ public void generateDuplicateIndexes(final boolean useBarcodes, final boolean in
maxInMemory,
TMP_DIR);
}
}
public void generateDuplicateIndexes(final boolean useBarcodes, final boolean indexOpticalDuplicates) {
sortIndicesForDuplicates(indexOpticalDuplicates);

ReadEndsForMarkDuplicates firstOfNextChunk = null;
final List<ReadEndsForMarkDuplicates> nextChunk = new ArrayList<>(200);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,9 @@
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.SortingCollection;
import htsjdk.samtools.util.SortingLongCollection;
import picard.sam.markduplicates.util.ReadEndsForMarkDuplicates;
import picard.sam.markduplicates.util.RepresentativeReadIndexerCodec;
import picard.sam.util.RepresentativeReadIndexer;

import java.io.File;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;

/**
Expand Down Expand Up @@ -86,32 +80,7 @@ private void validateFlowParameteres() {
* applicable for flow mode invocation.
*/
public void generateDuplicateIndexes(final boolean useBarcodes, final boolean indexOpticalDuplicates) {
final int entryOverhead;
if (md.TAG_DUPLICATE_SET_MEMBERS) {
// Memory requirements for RepresentativeReadIndexer:
// three int entries + overhead: (3 * 4) + 4 = 16 bytes
entryOverhead = 16;
} else {
entryOverhead = SortingLongCollection.SIZEOF;
}
// Keep this number from getting too large even if there is a huge heap.
int maxInMemory = (int) Math.min((Runtime.getRuntime().maxMemory() * 0.25) / entryOverhead, (double) (Integer.MAX_VALUE - 5));
// If we're also tracking optical duplicates, reduce maxInMemory, since we'll need two sorting collections
if (indexOpticalDuplicates) {
maxInMemory /= ((entryOverhead + SortingLongCollection.SIZEOF) / entryOverhead);
md.opticalDuplicateIndexes = new SortingLongCollection(maxInMemory, md.TMP_DIR.toArray(new File[md.TMP_DIR.size()]));
}
log.info("Will retain up to " + maxInMemory + " duplicate indices before spilling to disk.");
md.duplicateIndexes = new SortingLongCollection(maxInMemory, md.TMP_DIR.toArray(new File[md.TMP_DIR.size()]));
if (md.TAG_DUPLICATE_SET_MEMBERS) {
final RepresentativeReadIndexerCodec representativeIndexCodec = new RepresentativeReadIndexerCodec();
md.representativeReadIndicesForDuplicates = SortingCollection.newInstance(RepresentativeReadIndexer.class,
representativeIndexCodec,
Comparator.comparing(read -> read.readIndexInFile),
maxInMemory,
md.TMP_DIR);
}

md.sortIndicesForDuplicates(indexOpticalDuplicates);
// this code does support pairs at this time
if ( md.pairSort.iterator().hasNext() ) {
throw new IllegalArgumentException("Flow based code does not support paired reads");
Expand Down