From af24c41c1caf84cd623dd5f979cd9eb4dd4120f8 Mon Sep 17 00:00:00 2001 From: David Adams Date: Thu, 2 Jul 2015 13:51:55 -0400 Subject: [PATCH] Removing ReadConverter as it's functionality is now in utils-java. --- pom.xml | 2 +- .../dataflow/readers/bam/ReadConverter.java | 150 ------------------ .../genomics/dataflow/readers/bam/Reader.java | 5 +- .../readers/bam/ReadConverterTest.java | 77 --------- 4 files changed, 4 insertions(+), 230 deletions(-) delete mode 100644 src/main/java/com/google/cloud/genomics/dataflow/readers/bam/ReadConverter.java delete mode 100644 src/test/java/com/google/cloud/genomics/dataflow/readers/bam/ReadConverterTest.java diff --git a/pom.xml b/pom.xml index 4f74df6..92a6c2d 100644 --- a/pom.xml +++ b/pom.xml @@ -127,7 +127,7 @@ com.google.cloud.genomics google-genomics-utils - v1beta2-0.27 + v1beta2-0.29 diff --git a/src/main/java/com/google/cloud/genomics/dataflow/readers/bam/ReadConverter.java b/src/main/java/com/google/cloud/genomics/dataflow/readers/bam/ReadConverter.java deleted file mode 100644 index f321a70..0000000 --- a/src/main/java/com/google/cloud/genomics/dataflow/readers/bam/ReadConverter.java +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (C) 2015 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.genomics.dataflow.readers.bam; - -import com.google.api.client.util.Maps; -import com.google.api.services.genomics.model.CigarUnit; -import com.google.api.services.genomics.model.LinearAlignment; -import com.google.api.services.genomics.model.Position; -import com.google.api.services.genomics.model.Read; -import com.google.common.base.Function; -import com.google.common.collect.BiMap; -import com.google.common.collect.HashBiMap; -import com.google.common.collect.Lists; -import htsjdk.samtools.*; -import htsjdk.samtools.util.SequenceUtil; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * Converts SAMRecords to Reads. - */ -public class ReadConverter { - static HashBiMap CIGAR_OPERATIONS; - static BiMap CIGAR_OPERATIONS_INV; - - static { - CIGAR_OPERATIONS = HashBiMap.create(); - CIGAR_OPERATIONS.put("ALIGNMENT_MATCH", "M"); - CIGAR_OPERATIONS.put("CLIP_HARD", "H"); - CIGAR_OPERATIONS.put("CLIP_SOFT", "S"); - CIGAR_OPERATIONS.put("DELETE", "D"); - CIGAR_OPERATIONS.put("INSERT", "I"); - CIGAR_OPERATIONS.put("PAD", "P"); - CIGAR_OPERATIONS.put("SEQUENCE_MATCH", "="); - CIGAR_OPERATIONS.put("SEQUENCE_MISMATCH", "X"); - CIGAR_OPERATIONS.put("SKIP", "N"); - CIGAR_OPERATIONS_INV = CIGAR_OPERATIONS.inverse(); - } - - /** - * Generates a Read from a SAMRecord. - */ - public static final Read makeRead(final SAMRecord record) { - Read read = new Read(); - read.setId(record.getReadName()); // TODO: make more unique - read.setFragmentName(record.getReadName()); - read.setReadGroupId(getAttr(record, "RG")); - read.setNumberReads(record.getReadPairedFlag() ? 2 : 1); - read.setProperPlacement(record.getReadPairedFlag() && record.getProperPairFlag()); - if (!record.getReadUnmappedFlag() && record.getAlignmentStart() > 0) { - LinearAlignment alignment = new LinearAlignment(); - - Position position = new Position(); - position.setPosition((long) record.getAlignmentStart() - 1); - position.setReferenceName(record.getReferenceName()); - position.setReverseStrand(record.getReadNegativeStrandFlag()); - alignment.setPosition(position); - - alignment.setMappingQuality(record.getMappingQuality()); - - final String referenceSequence = (record.getAttribute("MD") != null) ? new String( - SequenceUtil.makeReferenceFromAlignment(record, true)) - : null; - List cigar = Lists.transform(record.getCigar().getCigarElements(), - new Function() { - @Override - public CigarUnit apply(CigarElement c) { - CigarUnit u = new CigarUnit(); - CigarOperator o = c.getOperator(); - u.setOperation(CIGAR_OPERATIONS_INV.get(o.toString())); - u.setOperationLength((long) c.getLength()); - if (referenceSequence != null && (u.getOperation().equals("SEQUENCE_MISMATCH") - || u.getOperation().equals("DELETE"))) { - u.setReferenceSequence(referenceSequence); - } - return u; - } - }); - alignment.setCigar(cigar); - read.setAlignment(alignment); - } - read.setDuplicateFragment(record.getDuplicateReadFlag()); - read.setFragmentLength(record.getInferredInsertSize()); - if (record.getReadPairedFlag()) { - if (record.getFirstOfPairFlag()) { - read.setReadNumber(0); - } else if (record.getSecondOfPairFlag()) { - read.setReadNumber(1); - } - - if (!record.getMateUnmappedFlag()) { - Position matePosition = new Position(); - matePosition.setPosition((long) record.getMateAlignmentStart() - 1); - matePosition.setReferenceName(record.getMateReferenceName()); - matePosition.setReverseStrand(record.getMateNegativeStrandFlag()); - read.setNextMatePosition(matePosition); - } - } - read.setFailedVendorQualityChecks(record.getReadFailsVendorQualityCheckFlag()); - read.setSecondaryAlignment(record.getNotPrimaryAlignmentFlag()); - read.setSupplementaryAlignment(record.getSupplementaryAlignmentFlag()); - read.setAlignedSequence(record.getReadString()); - byte[] baseQualities = record.getBaseQualities(); - if (baseQualities.length > 0) { - List readBaseQualities = new ArrayList(baseQualities.length); - for (byte b : baseQualities) { - readBaseQualities.add(new Integer(b)); - } - read.setAlignedQuality(readBaseQualities); - } - - Map> attributes = Maps.newHashMap(); - for( SAMRecord.SAMTagAndValue tagAndValue: record.getAttributes()) { - String s = tagAndValue.value.toString(); - if (tagAndValue.value instanceof byte[]) { - // It's possible for client code of SamRecord to pass byte[] - // to setAttribute. toString is not defined for byte[], so - // it produces garbage. The solution to create a string directly. - s = new String(((byte[]) tagAndValue.value)); - } - attributes.put(tagAndValue.tag, Lists.newArrayList(s)); - } - read.setInfo(attributes); - - return read; - } - - public static String getAttr(SAMRecord record, String attributeName) { - try { - return record.getStringAttribute(attributeName); - } catch (SAMException ex) { - return ""; - } - } -} diff --git a/src/main/java/com/google/cloud/genomics/dataflow/readers/bam/Reader.java b/src/main/java/com/google/cloud/genomics/dataflow/readers/bam/Reader.java index a3fd1fa..fa3f066 100644 --- a/src/main/java/com/google/cloud/genomics/dataflow/readers/bam/Reader.java +++ b/src/main/java/com/google/cloud/genomics/dataflow/readers/bam/Reader.java @@ -20,6 +20,7 @@ import com.google.api.services.storage.Storage.Objects; import com.google.cloud.dataflow.sdk.transforms.DoFn; import com.google.cloud.genomics.utils.Contig; +import com.google.cloud.genomics.utils.ReadUtils; import com.google.common.base.Stopwatch; import htsjdk.samtools.SAMRecord; @@ -118,7 +119,7 @@ void processRecord(SAMRecord record) { recordsAfterEnd++; return; } - c.output(ReadConverter.makeRead(record)); + c.output(ReadUtils.makeRead(record)); recordsProcessed++; } @@ -167,7 +168,7 @@ public static Iterable readSequentiallyForTesting(Objects storageClient, S recordsAfterEnd++; continue; } - reads.add(ReadConverter.makeRead(record)); + reads.add(ReadUtils.makeRead(record)); recordsProcessed++; } timer.stop(); diff --git a/src/test/java/com/google/cloud/genomics/dataflow/readers/bam/ReadConverterTest.java b/src/test/java/com/google/cloud/genomics/dataflow/readers/bam/ReadConverterTest.java deleted file mode 100644 index 2b04789..0000000 --- a/src/test/java/com/google/cloud/genomics/dataflow/readers/bam/ReadConverterTest.java +++ /dev/null @@ -1,77 +0,0 @@ -package com.google.cloud.genomics.dataflow.readers.bam; - -import com.google.api.services.genomics.model.Read; -import com.google.cloud.genomics.gatk.common.GenomicsConverter; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -import java.io.File; -import java.io.IOException; - -import static org.junit.Assert.assertEquals; - -@RunWith(JUnit4.class) -public class ReadConverterTest { - @Test - public void testConversion() { - SAMRecord record = new SAMRecord(null); - record.setReferenceName("chr20"); - record.setAlignmentStart(1); - record.setCigarString(String.format("%dM", 10)); - record.setMateReferenceName("chr20"); - record.setMateAlignmentStart(100); - record.setReadPairedFlag(true); - record.setFirstOfPairFlag(true); - record.setMateNegativeStrandFlag(true); - - Read read = ReadConverter.makeRead(record); - assertEquals((long)0, (long)read.getAlignment().getPosition().getPosition()); - assertEquals((long)1, (long)read.getAlignment().getCigar().size()); - assertEquals("chr20", read.getAlignment().getPosition().getReferenceName()); - assertEquals((int)0, (int)read.getReadNumber()); - assertEquals((long)99, (long)read.getNextMatePosition().getPosition()); - assertEquals("chr20", read.getNextMatePosition().getReferenceName()); - assertEquals((Boolean)true, read.getNextMatePosition().getReverseStrand()); - } - @Test - public void testByteArrayAttributes() { - // Client code of SamRecord can pass anything to setAttribute including - // byte[] (which doesn't have toString defined). This verifies - // we handle that case correctly. - SAMRecord record = new SAMRecord(null); - record.setReferenceName("chr20"); - record.setAlignmentStart(1); - record.setCigarString(String.format("%dM", 10)); - String s = "123456"; - record.setAttribute("FZ", s.getBytes()); - - Read read = ReadConverter.makeRead(record); - assertEquals((long)0, (long)read.getAlignment().getPosition().getPosition()); - assertEquals((long)1, (long)read.getAlignment().getCigar().size()); - assertEquals("chr20", read.getAlignment().getPosition().getReferenceName()); - assertEquals(s, read.getInfo().get("FZ").get(0)); - } - - @Test - public void SamToReadToSamTest() throws IOException { - String filePath = "src/test/resources/com/google/cloud/genomics/dataflow/readers/bam/conversion_test.sam"; - File samInput = new File(filePath); - SamReader reads = SamReaderFactory.makeDefault().open(samInput); - SAMFileHeader header = reads.getFileHeader(); - - int numReads = 0; - for (SAMRecord sam : reads){ - Read read = ReadConverter.makeRead(sam); - SAMRecord newSam = GenomicsConverter.makeSAMRecord(read, header ); - assertEquals(newSam.getSAMString(), sam.getSAMString()); - numReads++; - } - assertEquals(19, numReads);//sanity check to make sure we actually read the file - } - -}