Skip to content

Commit

Permalink
Unit test for GATKReadCoder
Browse files Browse the repository at this point in the history
Added a unit test to verify that GATKReads are being coded correctly
regardless of backing implementation.
  • Loading branch information
droazen committed Jul 13, 2015
1 parent 182d9c3 commit 6b3a0f1
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,49 @@ public static GATKRead createArtificialRead(final Cigar cigar) {
return new SAMRecordToGATKReadAdapter(createArtificialSAMRecord(cigar));
}

/**
* Creates an artificial GATKRead backed by a SAMRecord, with the specified UUID.
*
* The read will consist of the specified number of Q30 'A' bases, and will be
* mapped to contig "1" at the specified start position.
*
* @param uuid UUID of the new read
* @param name name of the new read
* @param start start position of the new read
* @param length number of bases in the new read
* @return an artificial GATKRead backed by a SAMRecord, with the specified UUID.
*/
public static GATKRead createSamBackedReadWithUUID( final UUID uuid, final String name, final int start, final int length ) {
final SAMFileHeader header = createArtificialSamHeader();
final byte[] bases = Utils.dupBytes((byte)'A', length);
final byte[] quals = Utils.dupBytes((byte) 30, length);

final SAMRecord sam = createArtificialSAMRecord(header, bases, quals, length + "M");
sam.setReadName(name);
sam.setAlignmentStart(start);
return new SAMRecordToGATKReadAdapter(sam, uuid);
}

/**
* Creates an artificial GATKRead backed by a Google Genomics read, with the specified UUID.
*
* The read will consist of the specified number of Q30 'A' bases, and will be
* mapped to contig "1" at the specified start position.
*
* @param uuid UUID of the new read
* @param name name of the new read
* @param start start position of the new read
* @param length number of bases in the new read
* @return an artificial GATKRead backed by a Google Genomics read, with the specified UUID.
*/
public static GATKRead createGoogleBackedReadWithUUID( final UUID uuid, final String name, final int start, final int length ) {
final byte[] bases = Utils.dupBytes((byte)'A', length);
final byte[] quals = Utils.dupBytes((byte) 30, length);

final Read googleRead = createArtificialGoogleGenomicsRead(name, "1", start, bases, quals, length + "M");
return new GoogleGenomicsReadToGATKReadAdapter(googleRead, uuid);
}

/**
* Create an artificial SAMRecord based on the parameters. The cigar string will be *M, where * is the length of the read
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@ public GoogleGenomicsReadToGATKReadAdapter( final Read genomicsRead ) {
this(genomicsRead, UUID.randomUUID());
}

private GoogleGenomicsReadToGATKReadAdapter( final Read genomicsRead, final UUID uuid ) {
/**
* Constructor that allows an explicit UUID to be passed in -- only meant
* for internal use and test class use, which is why it's package protected.
*/
GoogleGenomicsReadToGATKReadAdapter( final Read genomicsRead, final UUID uuid ) {
this.genomicsRead = genomicsRead;
this.uuid = uuid;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ public SAMRecordToGATKReadAdapter( final SAMRecord samRecord ) {
this(samRecord, UUID.randomUUID());
}

private SAMRecordToGATKReadAdapter( final SAMRecord samRecord, final UUID uuid ) {
/**
* Constructor that allows an explicit UUID to be passed in -- only meant
* for internal use and test class use, which is why it's package protected.
*/
SAMRecordToGATKReadAdapter( final SAMRecord samRecord, final UUID uuid ) {
this.samRecord = samRecord;
this.uuid = uuid;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package org.broadinstitute.hellbender.engine.dataflow.coders;

import com.google.cloud.dataflow.sdk.Pipeline;
import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
import com.google.cloud.dataflow.sdk.testing.TestPipeline;
import com.google.cloud.dataflow.sdk.transforms.Create;
import com.google.cloud.dataflow.sdk.values.PCollection;
import org.broadinstitute.hellbender.utils.dataflow.DataflowUtils;
import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.test.BaseTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.util.Arrays;
import java.util.List;
import java.util.UUID;


public class GATKReadCoderUnitTest extends BaseTest {

private GATKRead makeGoogleRead( final int uuid, final String name, final int start, final int length ) {
return ArtificialReadUtils.createGoogleBackedReadWithUUID(new UUID(0, uuid), name, start, length);
}

public GATKRead makeSamRead( final int uuid, final String name, final int start, final int length ) {
return ArtificialReadUtils.createSamBackedReadWithUUID(new UUID(0, uuid), name, start, length);
}

@DataProvider(name = "reads")
public Object[][] makeReads() {
final List<GATKRead> googleReads = Arrays.asList(makeGoogleRead(1, "google1", 1, 10), makeGoogleRead(2, "google2", 20, 40));
final List<GATKRead> samReads = Arrays.asList(makeSamRead(1, "sam1", 1, 10), makeSamRead(2, "sam2", 20, 40));

return new Object[][] {
{ googleReads },
{ samReads }
};
}

@Test(dataProvider = "reads")
public void testGATKReadCoding( final List<GATKRead> reads ) {
// The simplest way to figure out if a class is coded correctly is to create a PCollection
// of that type and see if it matches the List version.
final Pipeline p = TestPipeline.create();
DataflowUtils.registerGATKCoders(p);

final PCollection<GATKRead> dataflowReads = p.apply(Create.of(reads));
DataflowAssert.that(dataflowReads).containsInAnyOrder(reads);
p.run();
}
}

0 comments on commit 6b3a0f1

Please sign in to comment.