diff --git a/src/main/java/picard/arrays/VcfToAdpc.java b/src/main/java/picard/arrays/VcfToAdpc.java index 1db244335a..bc6ea772df 100644 --- a/src/main/java/picard/arrays/VcfToAdpc.java +++ b/src/main/java/picard/arrays/VcfToAdpc.java @@ -31,6 +31,7 @@ import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; import org.apache.commons.lang.StringUtils; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; @@ -42,7 +43,12 @@ import picard.cmdline.CommandLineProgram; import picard.cmdline.StandardOptionDefinitions; +import java.io.BufferedWriter; import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -62,55 +68,85 @@ public class VcfToAdpc extends CommandLineProgram { "An adpc.bin file is a binary file containing genotyping array intensity data that can be exported " + "by Illumina's GenomeStudio and Beadstudio analysis tools. The adpc.bin file is used as an input to " + "VerifyIDintensity a tool for " + - "detecting and estimating sample contamination of Illumina genotyping array data." + + "detecting and estimating sample contamination of Illumina genotyping array data. " + + "If more than one VCF is used, they must all have the same number of loci." + "

Usage example:

" + "
" +
                     "java -jar picard.jar VcfToAdpc \\
" + " VCF=input.vcf \\
" + - " OUTPUT=output.adpc.bin" + + " OUTPUT=output.adpc.bin \\
" + + " SAMPLES_FILE=output.samples.txt \\
" + + " NUM_MARKERS_FILE=output.num_markers.txt \\
" + "
"; private final Log log = Log.getInstance(VcfToAdpc.class); - @Argument(doc = "The Input VCF") - public File VCF; + @Argument(doc = "One or more VCF files containing array intensity data.") + public List VCF; @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output (adpc.bin) file to write.") public File OUTPUT; + @Argument(shortName = "SF", doc = "A text file into which the names of the samples will be written. " + + "These will be in the same order as the data in the adpc.bin file.") + public File SAMPLES_FILE; + + @Argument(shortName = "NMF", doc = "A text file into which the number of loci in the VCF will be written. " + + "This is useful for calling verifyIDIntensity.") + public File NUM_MARKERS_FILE; + @Override protected int doWork() { - IOUtil.assertFileIsReadable(VCF); + final List inputs = IOUtil.unrollFiles(VCF, IOUtil.VCF_EXTENSIONS); + IOUtil.assertFilesAreWritable(inputs); + IOUtil.assertFileIsWritable(SAMPLES_FILE); IOUtil.assertFileIsWritable(OUTPUT); - VCFFileReader vcfFileReader = new VCFFileReader(VCF, false); + final List sampleNames = new ArrayList<>(); + + Integer numberOfLoci = null; + try (IlluminaAdpcFileWriter adpcFileWriter = new IlluminaAdpcFileWriter(OUTPUT)) { + for (final File inputVcf : inputs) { + VCFFileReader vcfFileReader = new VCFFileReader(inputVcf, false); + final VCFHeader header = vcfFileReader.getFileHeader(); + for (int sampleNumber = 0; sampleNumber < header.getNGenotypeSamples(); sampleNumber++) { + final String sampleName = header.getGenotypeSamples().get(sampleNumber); + sampleNames.add(sampleName); + log.info("Processing sample: " + sampleName + " from VCF: " + inputVcf.getAbsolutePath()); - try (CloseableIterator variants = vcfFileReader.iterator(); - IlluminaAdpcFileWriter adpcFileWriter = new IlluminaAdpcFileWriter(OUTPUT)) { + CloseableIterator variants = vcfFileReader.iterator(); + int lociCount = 0; + while (variants.hasNext()) { + final VariantContext context = variants.next(); + final float gcScore = getFloatAttribute(context, InfiniumVcfFields.GC_SCORE); - final List adpcRecordList = new ArrayList<>(); - while (variants.hasNext()) { - final VariantContext context = variants.next(); - final float gcScore = getFloatAttribute(context, InfiniumVcfFields.GC_SCORE); + final Genotype genotype = context.getGenotype(sampleNumber); + final IlluminaGenotype illuminaGenotype = getIlluminaGenotype(genotype, context); - for (final Genotype genotype : context.getGenotypes()) { - final IlluminaGenotype illuminaGenotype = getIlluminaGenotype(genotype, context); + final int rawXIntensity = getUnsignedShortAttributeAsInt(genotype, InfiniumVcfFields.X); + final int rawYIntensity = getUnsignedShortAttributeAsInt(genotype, InfiniumVcfFields.Y); - final short rawXIntensity = getShortAttribute(genotype, InfiniumVcfFields.X); - final short rawYIntensity = getShortAttribute(genotype, InfiniumVcfFields.Y); + final Float normalizedXIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMX); + final Float normalizedYIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMY); - final Float normalizedXIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMX); - final Float normalizedYIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMY); - if ((normalizedXIntensity != null) && (normalizedYIntensity != null)) { final IlluminaAdpcFileWriter.Record record = new IlluminaAdpcFileWriter.Record(rawXIntensity, rawYIntensity, normalizedXIntensity, normalizedYIntensity, gcScore, illuminaGenotype); - adpcRecordList.add(record); + adpcFileWriter.write(record); + lociCount++; + } + if (lociCount == 0) { + throw new PicardException("Found no records in VCF' " + inputVcf.getAbsolutePath() + "'"); + } + if (numberOfLoci == null) { + numberOfLoci = lociCount; + } else { + if (lociCount != numberOfLoci) { + throw new PicardException("VCFs have differing number of loci"); + } } } } - if (adpcRecordList.isEmpty()) { - throw new PicardException("No valid records found in VCF!"); - } - adpcFileWriter.write(adpcRecordList); + writeTextToFile(SAMPLES_FILE, StringUtils.join(sampleNames, "\n")); + writeTextToFile(NUM_MARKERS_FILE, "" + numberOfLoci); } catch (Exception e) { log.error(e); return 1; @@ -119,6 +155,13 @@ protected int doWork() { return 0; } + private void writeTextToFile(final File output, final String text) throws IOException { + try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(output), StandardCharsets.UTF_8))) { + writer.write(text); + } + } + private IlluminaGenotype getIlluminaGenotype(final Genotype genotype, final VariantContext context) { final IlluminaGenotype illuminaGenotype; if (genotype.isCalled()) { @@ -157,16 +200,16 @@ private IlluminaGenotype getIlluminaGenotype(final Genotype genotype, final Vari return illuminaGenotype; } - private short getShortAttribute(final Genotype genotype, final String key) { + private int getUnsignedShortAttributeAsInt(final Genotype genotype, final String key) { final int attributeAsInt = Integer.parseInt(getRequiredAttribute(genotype, key).toString()); - final short returnedAttribute; - if (attributeAsInt <= Short.MAX_VALUE) { - returnedAttribute = (short) attributeAsInt; - } else { - log.warn("Value for key " + key + " (" + attributeAsInt + ") is > " + Short.MAX_VALUE + " (truncating it)"); - returnedAttribute = Short.MAX_VALUE; + if (attributeAsInt < 0) { + throw new PicardException("Value for key " + key + " (" + attributeAsInt + ") is <= 0! Invalid value for unsigned int"); + } + if (attributeAsInt > picard.arrays.illumina.InfiniumDataFile.MAX_UNSIGNED_SHORT) { + log.warn("Value for key " + key + " (" + attributeAsInt + ") is > " + picard.arrays.illumina.InfiniumDataFile.MAX_UNSIGNED_SHORT + " (truncating it)"); + return picard.arrays.illumina.InfiniumDataFile.MAX_UNSIGNED_SHORT; } - return returnedAttribute; + return attributeAsInt; } private Float getFloatAttribute(final Genotype genotype, final String key) { diff --git a/src/main/java/picard/arrays/illumina/IlluminaAdpcFileWriter.java b/src/main/java/picard/arrays/illumina/IlluminaAdpcFileWriter.java index c92202bba4..43a6a50c00 100644 --- a/src/main/java/picard/arrays/illumina/IlluminaAdpcFileWriter.java +++ b/src/main/java/picard/arrays/illumina/IlluminaAdpcFileWriter.java @@ -59,6 +59,8 @@ */ public class IlluminaAdpcFileWriter implements AutoCloseable { + private final String HEADER = "1234567890123456"; + private final DataOutputStream outputStream; public IlluminaAdpcFileWriter(final File adpcFile) throws IOException { @@ -67,44 +69,48 @@ public IlluminaAdpcFileWriter(final File adpcFile) throws IOException { } private void writeHeaderData() throws IOException { - outputStream.write("1234567890123456".getBytes()); + outputStream.write(HEADER.getBytes()); } public void write(Iterable illuminaAdpcRecords) throws IOException { for (Record illuminaAdpcRecord : illuminaAdpcRecords) { - illuminaAdpcRecord.write(outputStream); + write(illuminaAdpcRecord); } } + public void write(Record illuminaAdpcRecord) throws IOException { + illuminaAdpcRecord.write(outputStream); + } + @Override public void close() throws Exception { outputStream.close(); } public static class Record { - final short aIntensity; - final short bIntensity; + final int aIntensity; + final int bIntensity; final float aNormalizedIntensity; final float bNormalizedIntensity; final float gcScore; final IlluminaGenotype genotype; - public Record(short aIntensity, short bIntensity, float aNormalizedIntensity, float bNormalizedIntensity, float gcScore, IlluminaGenotype genotype) { + public Record(int aIntensity, int bIntensity, Float aNormalizedIntensity, Float bNormalizedIntensity, float gcScore, IlluminaGenotype genotype) { this.aIntensity = aIntensity; this.bIntensity = bIntensity; - this.aNormalizedIntensity = aNormalizedIntensity; - this.bNormalizedIntensity = bNormalizedIntensity; + this.aNormalizedIntensity = aNormalizedIntensity != null ? aNormalizedIntensity : Float.NaN; + this.bNormalizedIntensity = bNormalizedIntensity != null ? bNormalizedIntensity : Float.NaN; this.gcScore = gcScore; this.genotype = genotype; } public void write(final DataOutputStream outputStream) throws IOException { - InfiniumDataFile.writeShort(outputStream, aIntensity); - InfiniumDataFile.writeShort(outputStream, bIntensity); + InfiniumDataFile.writeUnsignedShort(outputStream, aIntensity); + InfiniumDataFile.writeUnsignedShort(outputStream, bIntensity); InfiniumDataFile.writeFloat(outputStream, aNormalizedIntensity); InfiniumDataFile.writeFloat(outputStream, bNormalizedIntensity); InfiniumDataFile.writeFloat(outputStream, gcScore); - InfiniumDataFile.writeShort(outputStream, genotype.value); + InfiniumDataFile.writeUnsignedShort(outputStream, genotype.value); } } } diff --git a/src/main/java/picard/arrays/illumina/InfiniumDataFile.java b/src/main/java/picard/arrays/illumina/InfiniumDataFile.java index 5c25bd31c0..21b0dd8baf 100644 --- a/src/main/java/picard/arrays/illumina/InfiniumDataFile.java +++ b/src/main/java/picard/arrays/illumina/InfiniumDataFile.java @@ -25,6 +25,7 @@ package picard.arrays.illumina; import org.apache.commons.io.IOUtils; +import picard.PicardException; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -38,6 +39,8 @@ */ public abstract class InfiniumDataFile { + public static final int MAX_UNSIGNED_SHORT = 65535; + private String identifier; private int numberOfEntries; private int fileVersion; @@ -178,11 +181,15 @@ float parseFloat() throws IOException { } /** - * Utility method for writing a short value to an outputStream. + * Utility method for writing an unsigned short value to an outputStream. + * Note that Java has no unsigned short value, so we pass it as an int and size-validate here. * Writes in Illumina (little-endian) format */ - static void writeShort(final DataOutputStream outputStream, final short value) throws IOException { - final byte[] byteArray = shortToByteArray(value); + static void writeUnsignedShort(final DataOutputStream outputStream, final int value) throws IOException { + if (value < 0 || value > MAX_UNSIGNED_SHORT) { + throw new PicardException("Value " + value + " is out of range for a unsigned short"); + } + final byte[] byteArray = shortToByteArray((short) (value & 0x0000ffff)); outputStream.write(byteArray); } diff --git a/src/test/java/picard/arrays/illumina/IlluminaAdpcFileWriterTest.java b/src/test/java/picard/arrays/illumina/IlluminaAdpcFileWriterTest.java index 773e17d1eb..53abc7e4a5 100644 --- a/src/test/java/picard/arrays/illumina/IlluminaAdpcFileWriterTest.java +++ b/src/test/java/picard/arrays/illumina/IlluminaAdpcFileWriterTest.java @@ -11,7 +11,7 @@ public class IlluminaAdpcFileWriterTest { private static final File TEST_DATA_DIR = new File("testdata/picard/arrays/illumina"); - private static final File TEST_EXPECTED_ADPC_BIN_FILE = new File(TEST_DATA_DIR, "TestIlluminaAdpcFileWriter.adpc.bin"); + private static final File TEST_EXPECTED_ADPC_BIN_FILE = new File(TEST_DATA_DIR, "TestVcfToAdpc.adpc.bin"); @Test public void testWriteIlluminaAdpcFile() throws Exception { @@ -20,11 +20,12 @@ public void testWriteIlluminaAdpcFile() throws Exception { try (final IlluminaAdpcFileWriter adpcFileWriter = new IlluminaAdpcFileWriter(output)) { final List adpcRecordList = new ArrayList<>(); - adpcRecordList.add(new IlluminaAdpcFileWriter.Record((short) 11352, (short) 405, 1.444f, 0.088f, 0.705f, IlluminaGenotype.AA)); - adpcRecordList.add(new IlluminaAdpcFileWriter.Record((short) 458, (short) 2743, 0.043f, 0.852f, 0.818f, IlluminaGenotype.BB)); - adpcRecordList.add(new IlluminaAdpcFileWriter.Record((short) 7548, (short) 303, 1.072f, 0.076f, 0.0f, IlluminaGenotype.NN)); - adpcRecordList.add(new IlluminaAdpcFileWriter.Record((short) 7414, (short) 2158, 0.805f, 0.597f, 0.881f, IlluminaGenotype.AB)); - adpcRecordList.add(new IlluminaAdpcFileWriter.Record((short) 222, (short) 215, 0.0f, 0.0f, 0.91f, IlluminaGenotype.NN)); + adpcRecordList.add(new IlluminaAdpcFileWriter.Record(11352, 405, 1.444f, 0.088f, 0.705f, IlluminaGenotype.AA)); + adpcRecordList.add(new IlluminaAdpcFileWriter.Record(458, 2743, 0.043f, 0.852f, 0.818f, IlluminaGenotype.BB)); + adpcRecordList.add(new IlluminaAdpcFileWriter.Record(7548, 303, 1.072f, 0.076f, 0.0f, IlluminaGenotype.NN)); + adpcRecordList.add(new IlluminaAdpcFileWriter.Record(7414, 2158, 0.805f, 0.597f, 0.881f, IlluminaGenotype.AB)); + adpcRecordList.add(new IlluminaAdpcFileWriter.Record(222, 215, 0.0f, 0.0f, 0.91f, IlluminaGenotype.NN)); + adpcRecordList.add(new IlluminaAdpcFileWriter.Record(232, 246, null, null, 0.926f, IlluminaGenotype.NN)); adpcFileWriter.write(adpcRecordList); } IOUtil.assertFilesEqual(TEST_EXPECTED_ADPC_BIN_FILE, output); diff --git a/src/test/java/picard/arrays/illumina/VcfToAdpcTest.java b/src/test/java/picard/arrays/illumina/VcfToAdpcTest.java index b5e957ae41..ee6877224a 100644 --- a/src/test/java/picard/arrays/illumina/VcfToAdpcTest.java +++ b/src/test/java/picard/arrays/illumina/VcfToAdpcTest.java @@ -2,29 +2,93 @@ import htsjdk.samtools.util.IOUtil; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import picard.PicardException; import picard.arrays.VcfToAdpc; import java.io.File; import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; public class VcfToAdpcTest { private static final File TEST_DATA_DIR = new File("testdata/picard/arrays/illumina"); private static final File TEST_VCF = new File(TEST_DATA_DIR, "TestVcfToAdpc.vcf"); - private static final File TEST_EXPECTED_ADPC_BIN_FILE = new File(TEST_DATA_DIR, "TestIlluminaAdpcFileWriter.adpc.bin"); + private static final File SINGLE_SAMPLE_VCF = new File(TEST_DATA_DIR, "TestAdpc1.vcf"); + private static final File MULTI_SAMPLE_VCF = new File(TEST_DATA_DIR, "TestAdpc23.vcf"); - @Test - public void testVcfToAdpc() throws IOException { + private static final File EXPECTED_ADPC_BIN_FILE = new File(TEST_DATA_DIR, "TestVcfToAdpc.adpc.bin"); + private static final File EXPECTED_SAMPLES_FILE = new File(TEST_DATA_DIR, "TestVcfToAdpc.samples.txt"); + private static final File EXPECTED_NUM_MARKERS_FILE = new File(TEST_DATA_DIR, "TestVcfToAdpc.num_markers.txt"); + + // Test with a single sample VCF as input. + private static final File EXPECTED_SINGLE_SAMPLE_ADPC_BIN_FILE = new File(TEST_DATA_DIR, "TestAdpc1.adpc.bin"); + private static final File EXPECTED_SINGLE_SAMPLE_SAMPLES_FILE = new File(TEST_DATA_DIR, "TestAdpc1.samples.txt"); + private static final File EXPECTED_SINGLE_SAMPLE_NUM_MARKERS_FILE = new File(TEST_DATA_DIR, "TestAdpc.num_markers.txt"); + // Test with a multi sample VCF as input + private static final File EXPECTED_MULTI_SAMPLE_ADPC_BIN_FILE = new File(TEST_DATA_DIR, "TestAdpc23.adpc.bin"); + private static final File EXPECTED_MULTI_SAMPLE_SAMPLES_FILE = new File(TEST_DATA_DIR, "TestAdpc23.samples.txt"); + private static final File EXPECTED_MULTI_SAMPLE_NUM_MARKERS_FILE = new File(TEST_DATA_DIR, "TestAdpc.num_markers.txt"); + // Test with both a single sample VCF and multi sample VCF as input. In that order + private static final File EXPECTED_S_M_ADPC_BIN_FILE = new File(TEST_DATA_DIR, "TestAdpc1_23.adpc.bin"); + private static final File EXPECTED_S_M_SAMPLES_FILE = new File(TEST_DATA_DIR, "TestAdpc1_23.samples.txt"); + private static final File EXPECTED_S_M_NUM_MARKERS_FILE = new File(TEST_DATA_DIR, "TestAdpc.num_markers.txt"); + // Test with both a multi sample VCF and single sample VCF as input. In that order + private static final File EXPECTED_M_S_ADPC_BIN_FILE = new File(TEST_DATA_DIR, "TestAdpc23_1.adpc.bin"); + private static final File EXPECTED_M_S_SAMPLES_FILE = new File(TEST_DATA_DIR, "TestAdpc23_1.samples.txt"); + private static final File EXPECTED_M_S_NUM_MARKERS_FILE = new File(TEST_DATA_DIR, "TestAdpc.num_markers.txt"); + + @DataProvider(name = "vcfToAdpcBinCombinations") + public Object[][] vcfToAdpcBinCombinations() { + return new Object[][]{ + {Collections.singletonList(TEST_VCF), EXPECTED_ADPC_BIN_FILE, EXPECTED_SAMPLES_FILE, EXPECTED_NUM_MARKERS_FILE}, + {Collections.singletonList(SINGLE_SAMPLE_VCF), EXPECTED_SINGLE_SAMPLE_ADPC_BIN_FILE, EXPECTED_SINGLE_SAMPLE_SAMPLES_FILE, EXPECTED_SINGLE_SAMPLE_NUM_MARKERS_FILE}, + {Collections.singletonList(MULTI_SAMPLE_VCF), EXPECTED_MULTI_SAMPLE_ADPC_BIN_FILE, EXPECTED_MULTI_SAMPLE_SAMPLES_FILE, EXPECTED_MULTI_SAMPLE_NUM_MARKERS_FILE}, + {Arrays.asList(SINGLE_SAMPLE_VCF, MULTI_SAMPLE_VCF), EXPECTED_S_M_ADPC_BIN_FILE, EXPECTED_S_M_SAMPLES_FILE, EXPECTED_S_M_NUM_MARKERS_FILE}, + {Arrays.asList(MULTI_SAMPLE_VCF, SINGLE_SAMPLE_VCF), EXPECTED_M_S_ADPC_BIN_FILE, EXPECTED_M_S_SAMPLES_FILE, EXPECTED_M_S_NUM_MARKERS_FILE} + }; + } + + @Test(dataProvider = "vcfToAdpcBinCombinations") + public void testVcfToAdpc(final List vcfs, final File expectedAdpcBinFile, final File expectedSamplesFile, final File expectedNumMarkersFile) throws IOException { final File output = File.createTempFile("testIlluminaAdpcFileWriter.", ".adpc.bin"); output.deleteOnExit(); + final File samplesFile = File.createTempFile("testIlluminaAdpcFileWriter.", ".samples.txt"); + samplesFile.deleteOnExit(); + final File numMarkersFile = File.createTempFile("testIlluminaAdpcFileWriter.", ".num_markers.txt"); + samplesFile.deleteOnExit(); final VcfToAdpc vcfToAdpc = new VcfToAdpc(); - vcfToAdpc.VCF = TEST_VCF; + vcfToAdpc.VCF = vcfs; vcfToAdpc.OUTPUT = output; + vcfToAdpc.SAMPLES_FILE = samplesFile; + vcfToAdpc.NUM_MARKERS_FILE = numMarkersFile; Assert.assertEquals(vcfToAdpc.instanceMain(new String[0]), 0); - IOUtil.assertFilesEqual(TEST_EXPECTED_ADPC_BIN_FILE, output); + IOUtil.assertFilesEqual(expectedAdpcBinFile, output); + IOUtil.assertFilesEqual(expectedSamplesFile, samplesFile); + IOUtil.assertFilesEqual(expectedNumMarkersFile, numMarkersFile); + } + + @Test + public void testVcfToAdpcFailOnDifferingNumberOfLoci() throws IOException { + final File output = File.createTempFile("testIlluminaAdpcFileWriter.", ".adpc.bin"); + output.deleteOnExit(); + final File samplesFile = File.createTempFile("testIlluminaAdpcFileWriter.", ".samples.txt"); + samplesFile.deleteOnExit(); + final File numMarkersFile = File.createTempFile("testIlluminaAdpcFileWriter.", ".num_markers.txt"); + samplesFile.deleteOnExit(); + + final VcfToAdpc vcfToAdpc = new VcfToAdpc(); + vcfToAdpc.VCF = Arrays.asList(TEST_VCF, SINGLE_SAMPLE_VCF); + vcfToAdpc.OUTPUT = output; + vcfToAdpc.SAMPLES_FILE = samplesFile; + vcfToAdpc.NUM_MARKERS_FILE = numMarkersFile; + + Assert.assertEquals(vcfToAdpc.instanceMain(new String[0]), 1); } } diff --git a/testdata/picard/arrays/illumina/TestAdpc.num_markers.txt b/testdata/picard/arrays/illumina/TestAdpc.num_markers.txt new file mode 100644 index 0000000000..3cacc0b93c --- /dev/null +++ b/testdata/picard/arrays/illumina/TestAdpc.num_markers.txt @@ -0,0 +1 @@ +12 \ No newline at end of file diff --git a/testdata/picard/arrays/illumina/TestAdpc1.adpc.bin b/testdata/picard/arrays/illumina/TestAdpc1.adpc.bin new file mode 100644 index 0000000000..a340c9d1cb Binary files /dev/null and b/testdata/picard/arrays/illumina/TestAdpc1.adpc.bin differ diff --git a/testdata/picard/arrays/illumina/TestAdpc1.samples.txt b/testdata/picard/arrays/illumina/TestAdpc1.samples.txt new file mode 100644 index 0000000000..f48e29bdbc --- /dev/null +++ b/testdata/picard/arrays/illumina/TestAdpc1.samples.txt @@ -0,0 +1 @@ +203469020115_R01C01 \ No newline at end of file diff --git a/testdata/picard/arrays/illumina/TestAdpc1.vcf b/testdata/picard/arrays/illumina/TestAdpc1.vcf new file mode 100644 index 0000000000..9f7f7b41a3 --- /dev/null +++ b/testdata/picard/arrays/illumina/TestAdpc1.vcf @@ -0,0 +1,200 @@ +##fileformat=VCFv4.2 +##Biotin(Bgnd)=Biotin(Bgnd)|Staining|488|97 +##Biotin(High)=Biotin(High)|Staining|1367|12751 +##DNP(Bgnd)=DNP(Bgnd)|Staining|591|348 +##DNP(High)=DNP(High)|Staining|39060|309 +##Extension(A)=Extension(A)|Extension|40282|630 +##Extension(C)=Extension(C)|Extension|2503|18619 +##Extension(G)=Extension(G)|Extension|2937|17960 +##Extension(T)=Extension(T)|Extension|41073|496 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##Hyb(High)=Hyb(High)|Hybridization|2841|16054 +##Hyb(Low)=Hyb(Low)|Hybridization|2820|3996 +##Hyb(Medium)=Hyb(Medium)|Hybridization|966|10518 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##NP(A)=NP(A)|Non-Polymorphic|12697|441 +##NP(C)=NP(C)|Non-Polymorphic|1083|9309 +##NP(G)=NP(G)|Non-Polymorphic|1041|7455 +##NP(T)=NP(T)|Non-Polymorphic|15318|408 +##NSB(Bgnd)Blue=NSB(Bgnd)Blue|Non-SpecificBinding|706|395 +##NSB(Bgnd)Green=NSB(Bgnd)Green|Non-SpecificBinding|638|335 +##NSB(Bgnd)Purple=NSB(Bgnd)Purple|Non-SpecificBinding|588|324 +##NSB(Bgnd)Red=NSB(Bgnd)Red|Non-SpecificBinding|635|388 +##Restore=Restore|Restoration|700|427 +##String(MM)=String(MM)|Stringency|4134|355 +##String(PM)=String(PM)|Stringency|25371|644 +##TargetRemoval=TargetRemoval|TargetRemoval|1696|337 +##analysisVersionNumber=6 +##arrayType=MEG_AllofUs_20002558X351448_A2 +##autocallDate=12/16/2019 3:48 PM +##autocallGender=F +##autocallVersion=3.0.0 +##chipWellBarcode=203469020115_R01C01 +##clusterFile=MEG_AllofUs_A1_Gentrain_1299_edited_prevalidation_081419update.egt +##content=MEG_AllofUs_20002558X351448_A2.1.4.extended.csv +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##expectedGender=Unknown +##extendedIlluminaManifestVersion=1.4 +##extendedManifestFile=MEG_AllofUs_20002558X351448_A2.1.4.extended.csv +##fileDate=Mon Dec 16 15:51:50 UTC 2019 +##fingerprintGender=Unknown +##imagingDate=10/3/2019 6:06:48 PM +##manifestFile=MEG_AllofUs_20002558X351448_A2.bpm +##p95Green=10073 +##p95Red=17625 +##picardVersion=Version:2.21.2-SNAPSHOT +##reference=/cromwell_root/broad-references/hg19/v0/Homo_sapiens_assembly19.fasta +##sampleAlias=NA12891_2_NA12892_98_A +##scannerName=N1052 +##source=GtcToVcf +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 203469020115_R01C01 +1 49554 1:49554-G-A A G . . AC=0;AF=0.00;ALLELE_A=A*;ALLELE_B=G;AN=2;BEADSET_ID=1165;GC_SCORE=0.705;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=49554;ILLUMINA_STRAND=TOP;N_AA=1159;N_AB=140;N_BB=0;PROBE_A=AAAGTCAAAAAGAAAAAGAAAAAAAGAAAAATCCATGCATATGATACATC;PROBE_B=.;SOURCE=PAGE;devR_AA=0.156;devR_AB=0.178;devR_BB=0.128;devTHETA_AA=0.007;devTHETA_AB=0.037;devTHETA_BB=0.022;devX_AA=0.147;devX_AB=0.139;devX_BB=0.047;devY_AA=0.019;devY_AB=0.087;devY_BB=0.129;meanR_AA=1.615;meanR_AB=2.007;meanR_BB=1.462;meanTHETA_AA=0.043;meanTHETA_AB=0.273;meanTHETA_BB=0.964;meanX_AA=1.512;meanX_AB=1.378;meanX_BB=0.079;meanY_AA=0.103;meanY_AB=0.629;meanY_BB=1.383 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0:0.581:-0.343:1.201:0.046:1.247:0.024:14733:519 +1 115746 JHU_1.115745 C T . . AC=0;AF=0.00;ALLELE_A=T;ALLELE_B=C*;AN=2;BEADSET_ID=1040;GC_SCORE=0.641;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=115746;ILLUMINA_STRAND=BOT;N_AA=0;N_AB=52;N_BB=1245;PROBE_A=TCTATAAACTCACAAGAAATTTCTGCCCACCCAGCACACAGTTTGTCCAG;PROBE_B=.;SOURCE=1;devR_AA=0.141;devR_AB=0.172;devR_BB=0.126;devTHETA_AA=0.022;devTHETA_AB=0.018;devTHETA_BB=0.009;devX_AA=0.14;devX_AB=0.054;devX_BB=0.026;devY_AA=0.047;devY_AB=0.137;devY_BB=0.117;meanR_AA=1.481;meanR_AB=2.048;meanR_BB=1.875;meanTHETA_AA=0.041;meanTHETA_AB=0.816;meanTHETA_BB=0.937;meanX_AA=1.392;meanX_AB=0.469;meanX_BB=0.168;meanY_AA=0.089;meanY_AB=1.579;meanY_BB=1.706;refSNP=rs147538909 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:1:0.469:0.183:0.177:1.944:2.121:0.942:2413:12648 +1 534238 JHU_1.534237 A C . . AC=0;AF=0.00;ALLELE_A=A*;ALLELE_B=C;AN=2;BEADSET_ID=1040;GC_SCORE=0.838;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=534238;ILLUMINA_STRAND=TOP;N_AA=1299;N_AB=0;N_BB=0;PROBE_A=TTCGCTTCACCGCCTTGACAGCTTTGCAGAGTGCTGCTCAGGTATTCTGC;PROBE_B=.;SOURCE=1;devR_AA=0.131;devR_AB=0.182;devR_BB=0.107;devTHETA_AA=0.014;devTHETA_AB=0.022;devTHETA_BB=0.022;devX_AA=0.128;devX_AB=0.095;devX_BB=0.04;devY_AA=0.027;devY_AB=0.096;devY_BB=0.111;meanR_AA=1.299;meanR_AB=1.611;meanR_BB=1.226;meanTHETA_AA=0.03;meanTHETA_AB=0.503;meanTHETA_BB=0.976;meanX_AA=1.241;meanX_AB=0.802;meanX_BB=0.045;meanY_AA=0.059;meanY_AB=0.809;meanY_BB=1.181 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0:0.43:0.195:1.436:0.043:1.479:0.019:15799:487 +1 565433 rs9701055 C T . ZEROED_OUT_ASSAY AC=0;AF=0.00;ALLELE_A=T;ALLELE_B=C*;AN=0;BEADSET_ID=1618;GC_SCORE=0;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=565433;ILLUMINA_STRAND=TOP;N_AA=21;N_AB=103;N_BB=1172;PROBE_A=AGGAGAAGGCTTACGTTTAATGAGGGAGAAATTTGGTATATGATTGAGAT;PROBE_B=.;SOURCE=dbSNP;devR_AA=0.248;devR_AB=0.196;devR_BB=0.146;devTHETA_AA=0.019;devTHETA_AB=0.059;devTHETA_BB=0.016;devX_AA=0.213;devX_AB=0.142;devX_BB=0.032;devY_AA=0.049;devY_AB=0.113;devY_BB=0.128;meanR_AA=1.366;meanR_AB=1.669;meanR_BB=1.274;meanTHETA_AA=0.113;meanTHETA_AB=0.381;meanTHETA_BB=0.893;meanX_AA=1.158;meanX_AB=0.992;meanX_BB=0.185;meanY_AA=0.208;meanY_AB=0.677;meanY_BB=1.089;refSNP=rs9701055 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y ./.:1:0:-0.056:0.169:1.053:1.222:0.899:2336:6427 +1 565476 JHU_1.565475 C G . . AC=0;AF=0.00;ALLELE_A=G;ALLELE_B=C*;AN=2;BEADSET_ID=1040;GC_SCORE=0.683;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=565476;ILLUMINA_STRAND=BOT;N_AA=0;N_AB=0;N_BB=1289;PROBE_A=AGCCCCCATCTCAATCATATACCAAATTTCTCCCTCATTAAACGTAAGCG;PROBE_B=AGCCCCCATCTCAATCATATACCAAATTTCTCCCTCATTAAACGTAAGCC;SOURCE=1;devR_AA=0.1;devR_AB=0.1;devR_BB=0.125;devTHETA_AA=0.022;devTHETA_AB=0.014;devTHETA_BB=0.014;devX_AA=0.101;devX_AB=0.027;devX_BB=0.02;devY_AA=0.032;devY_AB=0.08;devY_BB=0.12;meanR_AA=0.976;meanR_AB=1.07;meanR_BB=0.959;meanTHETA_AA=0.024;meanTHETA_AB=0.83;meanTHETA_BB=0.965;meanX_AA=0.94;meanX_AB=0.23;meanX_BB=0.05;meanY_AA=0.036;meanY_AB=0.84;meanY_BB=0.908 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0.984:0.272:0.523:0.081:1.302:1.383:0.96:1182:13568 +1 566140 M:5591-G-A G A . . AC=0;AF=0.00;ALLELE_A=A;ALLELE_B=G*;AN=2;BEADSET_ID=1080;GC_SCORE=0.728;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=566140;ILLUMINA_STRAND=TOP;N_AA=0;N_AB=0;N_BB=1273;PROBE_A=GCCTTCAAAGCCCTCAGTAAGTTGCAATACTTAATTTCTGCAACAGCTAA;PROBE_B=.;SOURCE=PAGE;devR_AA=0.152;devR_AB=0.229;devR_BB=0.113;devTHETA_AA=0.022;devTHETA_AB=0.022;devTHETA_BB=0.013;devX_AA=0.156;devX_AB=0.126;devX_BB=0.027;devY_AA=0.053;devY_AB=0.114;devY_BB=0.102;meanR_AA=1.604;meanR_AB=2.03;meanR_BB=1.5;meanTHETA_AA=0.023;meanTHETA_AB=0.466;meanTHETA_BB=0.909;meanX_AA=1.548;meanX_AB=1.069;meanX_BB=0.189;meanY_AA=0.055;meanY_AB=0.961;meanY_BB=1.312 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0.985:0.312:0.163:0.24:1.457:1.697:0.896:3424:10417 +1 568208 ilmnseq_rs9701872 T C . . AC=0;AF=0.00;ALLELE_A=T*;ALLELE_B=C;AN=2;BEADSET_ID=1710;GC_SCORE=0.714;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=568208;ILLUMINA_STRAND=TOP;N_AA=1287;N_AB=12;N_BB=0;PROBE_A=GCGTACAGGACTAGGAAGCAGATAAGGAAAATGATTATGAGGGCGTGATC;PROBE_B=.;SOURCE=dbSNP;devR_AA=0.445;devR_AB=0.284;devR_BB=0.274;devTHETA_AA=0.01;devTHETA_AB=0.025;devTHETA_BB=0.022;devX_AA=0.386;devX_AB=0.122;devX_BB=0.105;devY_AA=0.071;devY_AB=0.19;devY_BB=0.285;meanR_AA=2.991;meanR_AB=2.994;meanR_BB=3.174;meanTHETA_AA=0.1;meanTHETA_AB=0.665;meanTHETA_BB=0.978;meanX_AA=2.583;meanX_AB=1.101;meanX_BB=0.105;meanY_AA=0.409;meanY_AB=1.893;meanY_BB=3.069 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0:0.599:0.195:2.971:0.453:3.425:0.096:30460:3049 +1 568208 ilmnseq_rs9701872_ilmndup1 T C . DUPE AC=0;AF=0.00;ALLELE_A=T*;ALLELE_B=C;AN=2;BEADSET_ID=1710;GC_SCORE=0.711;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=568208;ILLUMINA_STRAND=TOP;N_AA=1288;N_AB=11;N_BB=0;PROBE_A=GCGTACAGGACTAGGAAGCAGATAAGGAAAATGATTATGAGGGCGTGATC;PROBE_B=.;SOURCE=dbSNP;devR_AA=0.455;devR_AB=0.347;devR_BB=0.167;devTHETA_AA=0.01;devTHETA_AB=0.037;devTHETA_BB=0.022;devX_AA=0.395;devX_AB=0.162;devX_BB=0.064;devY_AA=0.072;devY_AB=0.237;devY_BB=0.175;meanR_AA=3.021;meanR_AB=3.13;meanR_BB=1.917;meanTHETA_AA=0.099;meanTHETA_AB=0.655;meanTHETA_BB=0.983;meanX_AA=2.611;meanX_AB=1.177;meanX_BB=0.051;meanY_AA=0.41;meanY_AB=1.953;meanY_BB=1.867 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0.003:0.593:0.232:3.054:0.495:3.549:0.102:31297:3294 +1 568527 ilmnseq_rs11497407 G A . . AC=0;AF=0.00;ALLELE_A=A;ALLELE_B=G*;AN=2;BEADSET_ID=1621;GC_SCORE=0.672;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=568527;ILLUMINA_STRAND=BOT;N_AA=0;N_AB=0;N_BB=1292;PROBE_A=TTCAACCGGGAGTACTACTCGATTGTCAACGTCAAGGAGTCGCAGGTCGC;PROBE_B=.;SOURCE=dbSNP;devR_AA=0.218;devR_AB=0.337;devR_BB=0.204;devTHETA_AA=0.022;devTHETA_AB=0.022;devTHETA_BB=0.019;devX_AA=0.224;devX_AB=0.191;devX_BB=0.057;devY_AA=0.076;devY_AB=0.162;devY_BB=0.173;meanR_AA=2.3;meanR_AB=2.983;meanR_BB=2.076;meanTHETA_AA=0.022;meanTHETA_AB=0.441;meanTHETA_BB=0.861;meanX_AA=2.225;meanX_AB=1.629;meanX_BB=0.377;meanY_AA=0.075;meanY_AB=1.354;meanY_BB=1.7 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0.975:0.264:0.349:0.552:2.152:2.703:0.84:5585:11443 +1 569418 MitoA8870G A G . . AC=0;AF=0.00;ALLELE_A=A*;ALLELE_B=G;AN=2;BEADSET_ID=1124;GC_SCORE=0.639;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=569418;ILLUMINA_STRAND=TOP;N_AA=1296;N_AB=0;N_BB=2;PROBE_A=TATCTATAAACCTAGCCATGGCCATCCCCTTATGAGCGGGCGCAGTGATT;PROBE_B=.;SOURCE=1000genomes;devR_AA=0.305;devR_AB=0.36;devR_BB=0.182;devTHETA_AA=0.01;devTHETA_AB=0.022;devTHETA_BB=0.021;devX_AA=0.272;devX_AB=0.194;devX_BB=0.06;devY_AA=0.047;devY_AB=0.183;devY_BB=0.161;meanR_AA=2.51;meanR_AB=3.188;meanR_BB=2.135;meanTHETA_AA=0.081;meanTHETA_AB=0.479;meanTHETA_BB=0.877;meanX_AA=2.225;meanX_AB=1.646;meanX_BB=0.348;meanY_AA=0.285;meanY_AB=1.542;meanY_BB=1.786 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0:0.466:0.298:2.755:0.31:3.065:0.071:39610:2910 +1 721124 JHU_1.721123 A G . . AC=0;AF=0.00;ALLELE_A=A*;ALLELE_B=G;AN=2;BEADSET_ID=1040;GC_SCORE=0.517;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=721124;ILLUMINA_STRAND=BOT;N_AA=1299;N_AB=0;N_BB=0;PROBE_A=AATTGGTCTTGAGAGCTAGAAACTCTGAGGTCAAGTTTTATTGTGATATA;PROBE_B=.;SOURCE=1;devR_AA=0.127;devR_AB=0.216;devR_BB=0.123;devTHETA_AA=0.037;devTHETA_AB=0.022;devTHETA_BB=0.022;devX_AA=0.083;devX_AB=0.08;devX_BB=0.048;devY_AA=0.073;devY_AB=0.15;devY_BB=0.129;meanR_AA=1.533;meanR_AB=1.913;meanR_BB=1.41;meanTHETA_AA=0.435;meanTHETA_AB=0.711;meanTHETA_BB=0.986;meanX_AA=0.845;meanX_AB=0.628;meanX_BB=0.031;meanY_AA=0.688;meanY_AB=1.285;meanY_BB=1.38;refSNP=rs3131983 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0.021:0.161:-0.168:0.747:0.631:1.379:0.447:8380:4215 +1 737263 JHU_1.737262 G A . . AC=0;AF=0.00;ALLELE_A=A;ALLELE_B=G*;AN=2;BEADSET_ID=1065;GC_SCORE=0.732;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=737263;ILLUMINA_STRAND=TOP;N_AA=0;N_AB=92;N_BB=1207;PROBE_A=GCCCAGGGACACAGAAACTTTTGAATAAAGACAAGAGAGACCGACTCTTC;PROBE_B=.;SOURCE=1;devR_AA=0.13;devR_AB=0.138;devR_BB=0.1;devTHETA_AA=0.022;devTHETA_AB=0.036;devTHETA_BB=0.009;devX_AA=0.131;devX_AB=0.072;devX_BB=0.021;devY_AA=0.044;devY_AB=0.111;devY_BB=0.093;meanR_AA=1.373;meanR_AB=1.815;meanR_BB=1.545;meanTHETA_AA=0.036;meanTHETA_AB=0.723;meanTHETA_BB=0.94;meanX_AA=1.299;meanX_AB=0.575;meanX_BB=0.134;meanY_AA=0.074;meanY_AB=1.24;meanY_BB=1.411;refSNP=rs369986014 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0.998:0.63:0.012:0.137:1.422:1.559:0.939:2183:10041 diff --git a/testdata/picard/arrays/illumina/TestAdpc1_23.adpc.bin b/testdata/picard/arrays/illumina/TestAdpc1_23.adpc.bin new file mode 100644 index 0000000000..e892050e62 Binary files /dev/null and b/testdata/picard/arrays/illumina/TestAdpc1_23.adpc.bin differ diff --git a/testdata/picard/arrays/illumina/TestAdpc1_23.samples.txt b/testdata/picard/arrays/illumina/TestAdpc1_23.samples.txt new file mode 100644 index 0000000000..e4add1887e --- /dev/null +++ b/testdata/picard/arrays/illumina/TestAdpc1_23.samples.txt @@ -0,0 +1,3 @@ +203469020115_R01C01 +203469020115_R02C01 +203469020115_R03C01 \ No newline at end of file diff --git a/testdata/picard/arrays/illumina/TestAdpc23.adpc.bin b/testdata/picard/arrays/illumina/TestAdpc23.adpc.bin new file mode 100644 index 0000000000..0c0aec25d0 Binary files /dev/null and b/testdata/picard/arrays/illumina/TestAdpc23.adpc.bin differ diff --git a/testdata/picard/arrays/illumina/TestAdpc23.samples.txt b/testdata/picard/arrays/illumina/TestAdpc23.samples.txt new file mode 100644 index 0000000000..6595040494 --- /dev/null +++ b/testdata/picard/arrays/illumina/TestAdpc23.samples.txt @@ -0,0 +1,2 @@ +203469020115_R02C01 +203469020115_R03C01 \ No newline at end of file diff --git a/testdata/picard/arrays/illumina/TestAdpc23.vcf b/testdata/picard/arrays/illumina/TestAdpc23.vcf new file mode 100644 index 0000000000..aa3396368d --- /dev/null +++ b/testdata/picard/arrays/illumina/TestAdpc23.vcf @@ -0,0 +1,165 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##arrayType=MEG_AllofUs_20002558X351448_A2 +##autocallVersion=3.0.0 +##clusterFile=MEG_AllofUs_A1_Gentrain_1299_edited_prevalidation_081419update.egt +##content=MEG_AllofUs_20002558X351448_A2.1.4.extended.csv +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##extendedIlluminaManifestVersion=1.4 +##extendedManifestFile=MEG_AllofUs_20002558X351448_A2.1.4.extended.csv +##manifestFile=MEG_AllofUs_20002558X351448_A2.bpm +##picardVersion=Version:2.21.2-SNAPSHOT +##reference=/cromwell_root/broad-references/hg19/v0/Homo_sapiens_assembly19.fasta +##source=GtcToVcf +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 203469020115_R02C01 203469020115_R03C01 +1 49554 1:49554-G-A A G . . AC=0;AF=0.00;ALLELE_A=A*;ALLELE_B=G;AN=4;BEADSET_ID=1165;GC_SCORE=0.705;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=49554;ILLUMINA_STRAND=TOP;N_AA=1159;N_AB=140;N_BB=0;PROBE_A=AAAGTCAAAAAGAAAAAGAAAAAAAGAAAAATCCATGCATATGATACATC;PROBE_B=.;SOURCE=PAGE;devR_AA=0.156;devR_AB=0.178;devR_BB=0.128;devTHETA_AA=0.007;devTHETA_AB=0.037;devTHETA_BB=0.022;devX_AA=0.147;devX_AB=0.139;devX_BB=0.047;devY_AA=0.019;devY_AB=0.087;devY_BB=0.129;meanR_AA=1.615;meanR_AB=2.007;meanR_BB=1.462;meanTHETA_AA=0.043;meanTHETA_AB=0.273;meanTHETA_BB=0.964;meanX_AA=1.512;meanX_AB=1.378;meanX_BB=0.079;meanY_AA=0.103;meanY_AB=0.629;meanY_BB=1.383 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0:0.581:-0.274:1.26:0.051:1.311:0.026:15803:616 0/0:0:0.581:-0.084:1.43:0.083:1.513:0.037:18748:869 +1 115746 JHU_1.115745 C T . . AC=0;AF=0.00;ALLELE_A=T;ALLELE_B=C*;AN=4;BEADSET_ID=1040;GC_SCORE=0.641;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=115746;ILLUMINA_STRAND=BOT;N_AA=0;N_AB=52;N_BB=1245;PROBE_A=TCTATAAACTCACAAGAAATTTCTGCCCACCCAGCACACAGTTTGTCCAG;PROBE_B=.;SOURCE=1;devR_AA=0.141;devR_AB=0.172;devR_BB=0.126;devTHETA_AA=0.022;devTHETA_AB=0.018;devTHETA_BB=0.009;devX_AA=0.14;devX_AB=0.054;devX_BB=0.026;devY_AA=0.047;devY_AB=0.137;devY_BB=0.117;meanR_AA=1.481;meanR_AB=2.048;meanR_BB=1.875;meanTHETA_AA=0.041;meanTHETA_AB=0.816;meanTHETA_BB=0.937;meanX_AA=1.392;meanX_AB=0.469;meanX_BB=0.168;meanY_AA=0.089;meanY_AB=1.579;meanY_BB=1.706;refSNP=rs147538909 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0.979:0.469:0.136:0.199:1.869:2.068:0.932:2915:13140 0/0:1:0.469:0.172:0.177:1.928:2.105:0.942:2502:13449 +1 534238 JHU_1.534237 A C . . AC=0;AF=0.00;ALLELE_A=A*;ALLELE_B=C;AN=4;BEADSET_ID=1040;GC_SCORE=0.838;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=534238;ILLUMINA_STRAND=TOP;N_AA=1299;N_AB=0;N_BB=0;PROBE_A=TTCGCTTCACCGCCTTGACAGCTTTGCAGAGTGCTGCTCAGGTATTCTGC;PROBE_B=.;SOURCE=1;devR_AA=0.131;devR_AB=0.182;devR_BB=0.107;devTHETA_AA=0.014;devTHETA_AB=0.022;devTHETA_BB=0.022;devX_AA=0.128;devX_AB=0.095;devX_BB=0.04;devY_AA=0.027;devY_AB=0.096;devY_BB=0.111;meanR_AA=1.299;meanR_AB=1.611;meanR_BB=1.226;meanTHETA_AA=0.03;meanTHETA_AB=0.503;meanTHETA_BB=0.976;meanX_AA=1.241;meanX_AB=0.802;meanX_BB=0.045;meanY_AA=0.059;meanY_AB=0.809;meanY_BB=1.181 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0:0.43:0.178:1.41:0.057:1.467:0.026:16898:628 0/0:0:0.43:0.179:1.414:0.052:1.466:0.024:16641:563 +1 565433 rs9701055 C T . ZEROED_OUT_ASSAY AC=0;AF=0.00;ALLELE_A=T;ALLELE_B=C*;AN=0;BEADSET_ID=1618;GC_SCORE=0;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=565433;ILLUMINA_STRAND=TOP;N_AA=21;N_AB=103;N_BB=1172;PROBE_A=AGGAGAAGGCTTACGTTTAATGAGGGAGAAATTTGGTATATGATTGAGAT;PROBE_B=.;SOURCE=dbSNP;devR_AA=0.248;devR_AB=0.196;devR_BB=0.146;devTHETA_AA=0.019;devTHETA_AB=0.059;devTHETA_BB=0.016;devX_AA=0.213;devX_AB=0.142;devX_BB=0.032;devY_AA=0.049;devY_AB=0.113;devY_BB=0.128;meanR_AA=1.366;meanR_AB=1.669;meanR_BB=1.274;meanTHETA_AA=0.113;meanTHETA_AB=0.381;meanTHETA_BB=0.893;meanX_AA=1.158;meanX_AB=0.992;meanX_BB=0.185;meanY_AA=0.208;meanY_AB=0.677;meanY_BB=1.089;refSNP=rs9701055 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y ./.:0.995:0:-0.29:0.158:0.887:1.046:0.888:2332:5795 ./.:1:0:-0.331:0.09:0.896:0.987:0.936:1519:5880 +1 565476 JHU_1.565475 C G . . AC=0;AF=0.00;ALLELE_A=G;ALLELE_B=C*;AN=4;BEADSET_ID=1040;GC_SCORE=0.683;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=565476;ILLUMINA_STRAND=BOT;N_AA=0;N_AB=0;N_BB=1289;PROBE_A=AGCCCCCATCTCAATCATATACCAAATTTCTCCCTCATTAAACGTAAGCG;PROBE_B=AGCCCCCATCTCAATCATATACCAAATTTCTCCCTCATTAAACGTAAGCC;SOURCE=1;devR_AA=0.1;devR_AB=0.1;devR_BB=0.125;devTHETA_AA=0.022;devTHETA_AB=0.014;devTHETA_BB=0.014;devX_AA=0.101;devX_AB=0.027;devX_BB=0.02;devY_AA=0.032;devY_AB=0.08;devY_BB=0.12;meanR_AA=0.976;meanR_AB=1.07;meanR_BB=0.959;meanTHETA_AA=0.024;meanTHETA_AB=0.83;meanTHETA_BB=0.965;meanX_AA=0.94;meanX_AB=0.23;meanX_BB=0.05;meanY_AA=0.036;meanY_AB=0.84;meanY_BB=0.908 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:1:0.272:0.262:0.043:1.096:1.139:0.975:916:12446 0/0:1:0.272:0.257:0.054:1.088:1.142:0.968:965:12079 +1 566140 M:5591-G-A G A . . AC=0;AF=0.00;ALLELE_A=A;ALLELE_B=G*;AN=4;BEADSET_ID=1080;GC_SCORE=0.728;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=566140;ILLUMINA_STRAND=TOP;N_AA=0;N_AB=0;N_BB=1273;PROBE_A=GCCTTCAAAGCCCTCAGTAAGTTGCAATACTTAATTTCTGCAACAGCTAA;PROBE_B=.;SOURCE=PAGE;devR_AA=0.152;devR_AB=0.229;devR_BB=0.113;devTHETA_AA=0.022;devTHETA_AB=0.022;devTHETA_BB=0.013;devX_AA=0.156;devX_AB=0.126;devX_BB=0.027;devY_AA=0.053;devY_AB=0.114;devY_BB=0.102;meanR_AA=1.604;meanR_AB=2.03;meanR_BB=1.5;meanTHETA_AA=0.023;meanTHETA_AB=0.466;meanTHETA_BB=0.909;meanX_AA=1.548;meanX_AB=1.069;meanX_BB=0.189;meanY_AA=0.055;meanY_AB=0.961;meanY_BB=1.312 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0.984:0.312:0.147:0.24:1.44:1.68:0.895:3798:11002 0/0:1:0.312:0.145:0.207:1.451:1.658:0.91:3287:11240 +1 568208 ilmnseq_rs9701872 T C . . AC=0;AF=0.00;ALLELE_A=T*;ALLELE_B=C;AN=4;BEADSET_ID=1710;GC_SCORE=0.714;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=568208;ILLUMINA_STRAND=TOP;N_AA=1287;N_AB=12;N_BB=0;PROBE_A=GCGTACAGGACTAGGAAGCAGATAAGGAAAATGATTATGAGGGCGTGATC;PROBE_B=.;SOURCE=dbSNP;devR_AA=0.445;devR_AB=0.284;devR_BB=0.274;devTHETA_AA=0.01;devTHETA_AB=0.025;devTHETA_BB=0.022;devX_AA=0.386;devX_AB=0.122;devX_BB=0.105;devY_AA=0.071;devY_AB=0.19;devY_BB=0.285;meanR_AA=2.991;meanR_AB=2.994;meanR_BB=3.174;meanTHETA_AA=0.1;meanTHETA_AB=0.665;meanTHETA_BB=0.978;meanX_AA=2.583;meanX_AB=1.101;meanX_BB=0.105;meanY_AA=0.409;meanY_AB=1.893;meanY_BB=3.069 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0:0.599:0.24:3.085:0.449:3.534:0.092:32159:3193 0/0:0:0.599:0.066:2.713:0.418:3.131:0.097:30396:3083 +1 568208 ilmnseq_rs9701872_ilmndup1 T C . DUPE AC=0;AF=0.00;ALLELE_A=T*;ALLELE_B=C;AN=4;BEADSET_ID=1710;GC_SCORE=0.711;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=568208;ILLUMINA_STRAND=TOP;N_AA=1288;N_AB=11;N_BB=0;PROBE_A=GCGTACAGGACTAGGAAGCAGATAAGGAAAATGATTATGAGGGCGTGATC;PROBE_B=.;SOURCE=dbSNP;devR_AA=0.455;devR_AB=0.347;devR_BB=0.167;devTHETA_AA=0.01;devTHETA_AB=0.037;devTHETA_BB=0.022;devX_AA=0.395;devX_AB=0.162;devX_BB=0.064;devY_AA=0.072;devY_AB=0.237;devY_BB=0.175;meanR_AA=3.021;meanR_AB=3.13;meanR_BB=1.917;meanTHETA_AA=0.099;meanTHETA_AB=0.655;meanTHETA_BB=0.983;meanX_AA=2.611;meanX_AB=1.177;meanX_BB=0.051;meanY_AA=0.41;meanY_AB=1.953;meanY_BB=1.867 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0:0.593:0.257:3.159:0.45:3.609:0.09:32916:3210 0/0:0:0.593:0.03:2.711:0.371:3.082:0.087:30365:2798 +1 568527 ilmnseq_rs11497407 G A . . AC=0;AF=0.00;ALLELE_A=A;ALLELE_B=G*;AN=4;BEADSET_ID=1621;GC_SCORE=0.672;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=568527;ILLUMINA_STRAND=BOT;N_AA=0;N_AB=0;N_BB=1292;PROBE_A=TTCAACCGGGAGTACTACTCGATTGTCAACGTCAAGGAGTCGCAGGTCGC;PROBE_B=.;SOURCE=dbSNP;devR_AA=0.218;devR_AB=0.337;devR_BB=0.204;devTHETA_AA=0.022;devTHETA_AB=0.022;devTHETA_BB=0.019;devX_AA=0.224;devX_AB=0.191;devX_BB=0.057;devY_AA=0.076;devY_AB=0.162;devY_BB=0.173;meanR_AA=2.3;meanR_AB=2.983;meanR_BB=2.076;meanTHETA_AA=0.022;meanTHETA_AB=0.441;meanTHETA_BB=0.861;meanX_AA=2.225;meanX_AB=1.629;meanX_BB=0.377;meanY_AA=0.075;meanY_AB=1.354;meanY_BB=1.7 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0.982:0.264:0.307:0.516:2.093:2.609:0.846:5511:11742 0/0:1:0.264:0.281:0.42:2.073:2.493:0.873:4663:11857 +1 569418 MitoA8870G A G . . AC=0;AF=0.00;ALLELE_A=A*;ALLELE_B=G;AN=4;BEADSET_ID=1124;GC_SCORE=0.639;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=569418;ILLUMINA_STRAND=TOP;N_AA=1296;N_AB=0;N_BB=2;PROBE_A=TATCTATAAACCTAGCCATGGCCATCCCCTTATGAGCGGGCGCAGTGATT;PROBE_B=.;SOURCE=1000genomes;devR_AA=0.305;devR_AB=0.36;devR_BB=0.182;devTHETA_AA=0.01;devTHETA_AB=0.022;devTHETA_BB=0.021;devX_AA=0.272;devX_AB=0.194;devX_BB=0.06;devY_AA=0.047;devY_AB=0.183;devY_BB=0.161;meanR_AA=2.51;meanR_AB=3.188;meanR_BB=2.135;meanTHETA_AA=0.081;meanTHETA_AB=0.479;meanTHETA_BB=0.877;meanX_AA=2.225;meanX_AB=1.646;meanX_BB=0.348;meanY_AA=0.285;meanY_AB=1.542;meanY_BB=1.786 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0.002:0.466:0.21:2.571:0.336:2.907:0.083:40785:3453 0/0:0:0.466:0.267:2.685:0.328:3.013:0.077:41067:3216 +1 721124 JHU_1.721123 A G . . AC=0;AF=0.00;ALLELE_A=A*;ALLELE_B=G;AN=4;BEADSET_ID=1040;GC_SCORE=0.517;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=721124;ILLUMINA_STRAND=BOT;N_AA=1299;N_AB=0;N_BB=0;PROBE_A=AATTGGTCTTGAGAGCTAGAAACTCTGAGGTCAAGTTTTATTGTGATATA;PROBE_B=.;SOURCE=1;devR_AA=0.127;devR_AB=0.216;devR_BB=0.123;devTHETA_AA=0.037;devTHETA_AB=0.022;devTHETA_BB=0.022;devX_AA=0.083;devX_AB=0.08;devX_BB=0.048;devY_AA=0.073;devY_AB=0.15;devY_BB=0.129;meanR_AA=1.533;meanR_AB=1.913;meanR_BB=1.41;meanTHETA_AA=0.435;meanTHETA_AB=0.711;meanTHETA_BB=0.986;meanX_AA=0.845;meanX_AB=0.628;meanX_BB=0.031;meanY_AA=0.688;meanY_AB=1.285;meanY_BB=1.38;refSNP=rs3131983 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0.124:0.156:-0.11:0.75:0.758:1.508:0.503:9207:5445 0/0:0.077:0.161:-0.085:0.777:0.724:1.501:0.478:9308:5159 +1 737263 JHU_1.737262 G A . . AC=0;AF=0.00;ALLELE_A=A;ALLELE_B=G*;AN=4;BEADSET_ID=1065;GC_SCORE=0.732;ILLUMINA_BUILD=37;ILLUMINA_CHR=1;ILLUMINA_POS=737263;ILLUMINA_STRAND=TOP;N_AA=0;N_AB=92;N_BB=1207;PROBE_A=GCCCAGGGACACAGAAACTTTTGAATAAAGACAAGAGAGACCGACTCTTC;PROBE_B=.;SOURCE=1;devR_AA=0.13;devR_AB=0.138;devR_BB=0.1;devTHETA_AA=0.022;devTHETA_AB=0.036;devTHETA_BB=0.009;devX_AA=0.131;devX_AB=0.072;devX_BB=0.021;devY_AA=0.044;devY_AB=0.111;devY_BB=0.093;meanR_AA=1.373;meanR_AB=1.815;meanR_BB=1.545;meanTHETA_AA=0.036;meanTHETA_AB=0.723;meanTHETA_BB=0.94;meanX_AA=1.299;meanX_AB=0.575;meanX_BB=0.134;meanY_AA=0.074;meanY_AB=1.24;meanY_BB=1.411;refSNP=rs369986014 GT:BAF:IGC:LRR:NORMX:NORMY:R:THETA:X:Y 0/0:0.998:0.63:0.049:0.14:1.459:1.6:0.939:2496:11165 0/0:0.988:0.63:0.034:0.148:1.44:1.589:0.935:2401:10999 diff --git a/testdata/picard/arrays/illumina/TestAdpc23_1.adpc.bin b/testdata/picard/arrays/illumina/TestAdpc23_1.adpc.bin new file mode 100644 index 0000000000..0aef50b0ec Binary files /dev/null and b/testdata/picard/arrays/illumina/TestAdpc23_1.adpc.bin differ diff --git a/testdata/picard/arrays/illumina/TestAdpc23_1.samples.txt b/testdata/picard/arrays/illumina/TestAdpc23_1.samples.txt new file mode 100644 index 0000000000..eafc4003af --- /dev/null +++ b/testdata/picard/arrays/illumina/TestAdpc23_1.samples.txt @@ -0,0 +1,3 @@ +203469020115_R02C01 +203469020115_R03C01 +203469020115_R01C01 \ No newline at end of file diff --git a/testdata/picard/arrays/illumina/TestIlluminaAdpcFileWriter.adpc.bin b/testdata/picard/arrays/illumina/TestVcfToAdpc.adpc.bin similarity index 82% rename from testdata/picard/arrays/illumina/TestIlluminaAdpcFileWriter.adpc.bin rename to testdata/picard/arrays/illumina/TestVcfToAdpc.adpc.bin index 3f66ec1438..17c454419f 100644 Binary files a/testdata/picard/arrays/illumina/TestIlluminaAdpcFileWriter.adpc.bin and b/testdata/picard/arrays/illumina/TestVcfToAdpc.adpc.bin differ diff --git a/testdata/picard/arrays/illumina/TestVcfToAdpc.num_markers.txt b/testdata/picard/arrays/illumina/TestVcfToAdpc.num_markers.txt new file mode 100644 index 0000000000..62f9457511 --- /dev/null +++ b/testdata/picard/arrays/illumina/TestVcfToAdpc.num_markers.txt @@ -0,0 +1 @@ +6 \ No newline at end of file diff --git a/testdata/picard/arrays/illumina/TestVcfToAdpc.samples.txt b/testdata/picard/arrays/illumina/TestVcfToAdpc.samples.txt new file mode 100644 index 0000000000..a11bff8587 --- /dev/null +++ b/testdata/picard/arrays/illumina/TestVcfToAdpc.samples.txt @@ -0,0 +1 @@ +203078500006_R01C01 \ No newline at end of file