Skip to content
This repository was archived by the owner on Oct 29, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<groupId>com.google.cloud.genomics</groupId>
<artifactId>google-genomics-dataflow</artifactId>
<packaging>jar</packaging>
<version>v1beta2-0.22-SNAPSHOT</version>
<version>v1-0.1-SNAPSHOT</version>

<organization>
<name>Google</name>
Expand Down Expand Up @@ -114,7 +114,7 @@
<dependency>
<groupId>com.google.apis</groupId>
<artifactId>google-api-services-genomics</artifactId>
<version>v1beta2-rev87-1.20.0</version>
<version>v1-rev73-1.21.0</version>
<exclusions>
<!-- Exclude an old version of guava which is being pulled
in by a transitive dependency google-api-client 1.19.0 -->
Expand All @@ -127,7 +127,7 @@
<dependency>
<groupId>com.google.cloud.genomics</groupId>
<artifactId>google-genomics-utils</artifactId>
<version>v1beta2-0.44</version>
<version>v1-0.1</version>
<exclusions>
<!-- Exclude an old version of guava which is being pulled
in by a transitive dependency google-api-client 1.19.0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
*/
package com.google.cloud.genomics.dataflow.functions.transmission;

import com.google.api.services.genomics.model.Call;
import com.google.api.services.genomics.model.Variant;
import com.google.api.services.genomics.model.VariantCall;
import com.google.cloud.dataflow.sdk.transforms.DoFn;
import com.google.cloud.dataflow.sdk.values.KV;
import com.google.cloud.genomics.dataflow.model.Allele;
Expand Down Expand Up @@ -93,8 +93,8 @@ public void processElement(ProcessContext c) {
}

@VisibleForTesting
Call getSample(Variant variant, String sampleName) {
for (Call call : variant.getCalls()) {
VariantCall getSample(Variant variant, String sampleName) {
for (VariantCall call : variant.getCalls()) {
if (call.getCallSetName() == sampleName)
return call;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
import com.google.api.services.genomics.model.Annotation;
import com.google.api.services.genomics.model.AnnotationSet;
import com.google.api.services.genomics.model.ListBasesResponse;
import com.google.api.services.genomics.model.QueryRange;
import com.google.api.services.genomics.model.RangePosition;
import com.google.api.services.genomics.model.SearchAnnotationsRequest;
import com.google.api.services.genomics.model.SearchVariantsRequest;
import com.google.api.services.genomics.model.Variant;
Expand Down Expand Up @@ -219,17 +217,15 @@ private ListMultimap<Range<Long>, Annotation> retrieveVariantAnnotations(
Paginator.Annotations.create(genomics, ShardBoundary.Requirement.OVERLAPS).search(
new SearchAnnotationsRequest()
.setAnnotationSetIds(variantAnnotationSetIds)
.setRange(new QueryRange()
.setReferenceName(canonicalizeRefName(request.getReferenceName()))
.setStart(request.getStart())
.setEnd(request.getEnd())));
.setReferenceName(canonicalizeRefName(request.getReferenceName()))
.setStart(request.getStart())
.setEnd(request.getEnd()));
for (Annotation annotation : annotationIter) {
RangePosition pos = annotation.getPosition();
long start = 0;
if (pos.getStart() != null) {
start = pos.getStart();
if (annotation.getStart() != null) {
start = annotation.getStart();
}
annotationMap.put(Range.closedOpen(start, pos.getEnd()), annotation);
annotationMap.put(Range.closedOpen(start, annotation.getEnd()), annotation);
}
LOG.info(String.format("read %d variant annotations in %s (%.2f / s)", annotationMap.size(),
stopwatch, (double)annotationMap.size() / stopwatch.elapsed(TimeUnit.SECONDS)));
Expand All @@ -243,13 +239,11 @@ private IntervalTree<Annotation> retrieveTranscripts(Genomics genomics, SearchVa
Paginator.Annotations.create(genomics, ShardBoundary.Requirement.OVERLAPS).search(
new SearchAnnotationsRequest()
.setAnnotationSetIds(transcriptSetIds)
.setRange(new QueryRange()
.setReferenceName(canonicalizeRefName(request.getReferenceName()))
.setStart(request.getStart())
.setEnd(request.getEnd())));
.setReferenceName(canonicalizeRefName(request.getReferenceName()))
.setStart(request.getStart())
.setEnd(request.getEnd()));
for (Annotation annotation : transcriptIter) {
RangePosition pos = annotation.getPosition();
transcripts.put(pos.getStart().intValue(), pos.getEnd().intValue(), annotation);
transcripts.put(annotation.getStart().intValue(), annotation.getEnd().intValue(), annotation);
}
LOG.info(String.format("read %d transcripts in %s (%.2f / s)", transcripts.size(),
stopwatch, (double)transcripts.size() / stopwatch.elapsed(TimeUnit.SECONDS)));
Expand All @@ -258,23 +252,22 @@ private IntervalTree<Annotation> retrieveTranscripts(Genomics genomics, SearchVa

private String getCachedTranscriptBases(Genomics genomics, Annotation transcript)
throws IOException {
RangePosition pos = transcript.getPosition();
Range<Long> rng = Range.closedOpen(pos.getStart(), pos.getEnd());
Range<Long> rng = Range.closedOpen(transcript.getStart(), transcript.getEnd());
if (!refBaseCache.containsKey(rng)) {
refBaseCache.put(rng, retrieveReferenceBases(genomics, pos));
refBaseCache.put(rng, retrieveReferenceBases(genomics, transcript));
}
return refBaseCache.get(rng);
}

private String retrieveReferenceBases(Genomics genomics, RangePosition pos) throws IOException {
private String retrieveReferenceBases(Genomics genomics, Annotation annotation) throws IOException {
StringBuilder b = new StringBuilder();
String pageToken = "";
while (true) {
// TODO: Support full request parameterization for Paginator.References.Bases.
ListBasesResponse response = genomics.references().bases()
.list(pos.getReferenceId())
.setStart(pos.getStart())
.setEnd(pos.getEnd())
.list(annotation.getReferenceId())
.setStart(annotation.getStart())
.setEnd(annotation.getEnd())
.setPageToken(pageToken)
.execute();
b.append(response.getSequence());
Expand Down Expand Up @@ -337,7 +330,7 @@ private static void validateRefsetForAnnotationSets(
Genomics genomics, List<String> annosetIds) throws IOException {
String refsetId = null;
for (String annosetId : annosetIds) {
String gotId = genomics.annotationSets().get(annosetId).execute().getReferenceSetId();
String gotId = genomics.annotationsets().get(annosetId).execute().getReferenceSetId();
if (refsetId == null) {
refsetId = gotId;
} else if (!refsetId.equals(gotId)) {
Expand All @@ -351,7 +344,7 @@ private static List<String> validateAnnotationSetsFlag(
Genomics genomics, String flagValue, String wantType) throws IOException {
List<String> annosetIds = ImmutableList.copyOf(flagValue.split(","));
for (String annosetId : annosetIds) {
AnnotationSet annoset = genomics.annotationSets().get(annosetId).execute();
AnnotationSet annoset = genomics.annotationsets().get(annosetId).execute();
if (!wantType.equals(annoset.getType())) {
throw new IllegalArgumentException("annotation set " + annosetId + " has type " +
annoset.getType() + ", wanted type " + wantType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@

import com.google.api.client.util.Strings;
import com.google.api.services.genomics.Genomics;
import com.google.api.services.genomics.Genomics.Annotationsets;
import com.google.api.services.genomics.model.Annotation;
import com.google.api.services.genomics.model.AnnotationSet;
import com.google.api.services.genomics.model.BatchCreateAnnotationsRequest;
import com.google.api.services.genomics.model.Position;
import com.google.api.services.genomics.model.RangePosition;
import com.google.cloud.dataflow.sdk.Pipeline;
import com.google.cloud.dataflow.sdk.options.Default;
import com.google.cloud.dataflow.sdk.options.Description;
Expand Down Expand Up @@ -421,17 +421,16 @@ public void processElement(ProcessContext c) throws GeneralSecurityException, IO
Position bucket = c.element().getKey();
Annotation a = new Annotation()
.setAnnotationSetId(asId)
.setPosition(new RangePosition()
.setStart(bucket.getPosition())
.setEnd(bucket.getPosition() + pOptions.getBucketWidth())
.setReferenceName(bucket.getReferenceName()))
.setStart(bucket.getPosition())
.setEnd(bucket.getPosition() + pOptions.getBucketWidth())
.setReferenceName(bucket.getReferenceName())
.setType("GENERIC")
.setInfo(new HashMap<String, List<String>>());
.setInfo(new HashMap<String, List<Object>>());
for (KV<PosRgsMq.MappingQuality, List<Double>> mappingQualityKV : c.element().getValue()) {
List<String> output = Lists.newArrayList();
List<Object> output = Lists.newArrayList();
for (int i = 0; i < mappingQualityKV.getValue().size(); i++) {
double value = Math.round(mappingQualityKV.getValue().get(i) * 1000000.0) / 1000000.0;
output.add(Double.toString(value));
output.add(value);
}
a.getInfo().put(mappingQualityKV.getKey().toString(), output);
}
Expand Down Expand Up @@ -485,7 +484,7 @@ private static AnnotationSet createAnnotationSet(String referenceSetId)
as.setReferenceSetId(referenceSetId);
as.setType("GENERIC");
Genomics genomics = GenomicsFactory.builder().build().fromOfflineAuth(auth);
Genomics.AnnotationSets.Create asRequest = genomics.annotationSets().create(as);
Annotationsets.Create asRequest = genomics.annotationsets().create(as);
AnnotationSet asWithId = asRequest.execute();
return asWithId;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import com.google.api.client.util.Preconditions;
import com.google.api.services.genomics.model.Annotation;
import com.google.api.services.genomics.model.TranscriptExon;
import com.google.api.services.genomics.model.Exon;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Range;

Expand Down Expand Up @@ -77,7 +77,7 @@ public enum VariantEffect { SYNONYMOUS_SNP, STOP_GAIN, STOP_LOSS, NONSYNONYMOUS_
*/
public static VariantEffect determineVariantTranscriptEffect(
long variantStart, String allele, Annotation transcript, String transcriptBases) {
long txLen = transcript.getPosition().getEnd() - transcript.getPosition().getStart();
long txLen = transcript.getEnd() - transcript.getStart();
Preconditions.checkArgument(transcriptBases.length() == txLen,
"transcriptBases must have equal length to the transcript; got " +
transcriptBases.length() + " and " + txLen + ", respectively");
Expand All @@ -96,22 +96,22 @@ public static VariantEffect determineVariantTranscriptEffect(
Range<Long> codingRange = Range.closedOpen(
transcript.getTranscript().getCodingSequence().getStart(),
transcript.getTranscript().getCodingSequence().getEnd());
if (Boolean.TRUE.equals(transcript.getPosition().getReverseStrand())) {
if (Boolean.TRUE.equals(transcript.getReverseStrand())) {
allele = SequenceUtil.reverseComplement(allele);
}
for (TranscriptExon exon : transcript.getTranscript().getExons()) {
for (Exon exon : transcript.getTranscript().getExons()) {
// For now, only compute effects on variants within the coding region of an exon.
Range<Long> exonRange = Range.closedOpen(exon.getStart(), exon.getEnd());
if (exonRange.isConnected(codingRange) &&
exonRange.intersection(codingRange).isConnected(variantRange) &&
!exonRange.intersection(codingRange).intersection(variantRange).isEmpty()) {
// Get the bases which correspond to this exon.
int txOffset = transcript.getPosition().getStart().intValue();
int txOffset = transcript.getStart().intValue();
String exonBases = transcriptBases.substring(
exon.getStart().intValue() - txOffset, exon.getEnd().intValue() - txOffset);
int variantExonOffset = (int) (variantStart - exon.getStart());

if (Boolean.TRUE.equals(transcript.getPosition().getReverseStrand())) {
if (Boolean.TRUE.equals(transcript.getReverseStrand())) {
// Normalize the offset and bases to 5' -> 3'.
exonBases = SequenceUtil.reverseComplement(exonBases);
variantExonOffset = (int) (exon.getEnd() - variantEnd);
Expand All @@ -122,7 +122,7 @@ public static VariantEffect determineVariantTranscriptEffect(
LOG.fine("exon lacks frame data, cannot determine effect");
return null;
}
int offsetWithinCodon = (variantExonOffset + exon.getFrame().getValue()) % 3;
int offsetWithinCodon = (variantExonOffset + exon.getFrame()) % 3;
int codonExonOffset = variantExonOffset - offsetWithinCodon;
if (codonExonOffset < 0 || exonBases.length() <= codonExonOffset+3) {
LOG.fine("variant codon spans multiple exons, this case is not yet handled");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import com.google.api.services.genomics.model.Annotation;
import com.google.api.services.genomics.model.Position;
import com.google.api.services.genomics.model.RangePosition;
import com.google.cloud.dataflow.sdk.Pipeline;
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
Expand Down Expand Up @@ -244,29 +243,27 @@ public void testCalculateCoverage() throws Exception {
List<Annotation> expectedOutput = Lists.newArrayList();
Annotation a1 = new Annotation()
.setAnnotationSetId("123")
.setPosition(new RangePosition()
.setStart(0L)
.setEnd(2L)
.setReferenceName("1"))
.setStart(0L)
.setEnd(2L)
.setReferenceName("1")
.setType("GENERIC")
.setInfo(new HashMap<String, List<String>>());
a1.getInfo().put("L", Lists.newArrayList("1.0", "1.0", "3.5"));
a1.getInfo().put("M", Lists.newArrayList("1.5", "1.5", "2.0"));
a1.getInfo().put("H", Lists.newArrayList("0.5", "0.5", "0.5"));
a1.getInfo().put("A", Lists.newArrayList("2.5", "3.0", "3.5"));
.setInfo(new HashMap<String, List<Object>>());
a1.getInfo().put("L", Lists.newArrayList((Object) 1.0, 1.0, 3.5));
a1.getInfo().put("M", Lists.newArrayList((Object) 1.5, 1.5, 2.0));
a1.getInfo().put("H", Lists.newArrayList((Object) 0.5, 0.5, 0.5));
a1.getInfo().put("A", Lists.newArrayList((Object) 2.5, 3.0, 3.5));
expectedOutput.add(a1);
Annotation a2 = new Annotation()
.setAnnotationSetId("123")
.setPosition(new RangePosition()
.setStart(2L)
.setEnd(4L)
.setReferenceName("1"))
.setStart(2L)
.setEnd(4L)
.setReferenceName("1")
.setType("GENERIC")
.setInfo(new HashMap<String, List<String>>());
a2.getInfo().put("L", Lists.newArrayList("1.0", "1.0", "3.0"));
a2.getInfo().put("M", Lists.newArrayList("0.5", "1.5", "1.5"));
a2.getInfo().put("H", Lists.newArrayList("0.5", "1.0", "1.0"));
a2.getInfo().put("A", Lists.newArrayList("2.0", "3.0", "3.0"));
.setInfo(new HashMap<String, List<Object>>());
a2.getInfo().put("L", Lists.newArrayList((Object) 1.0, 1.0, 3.0));
a2.getInfo().put("M", Lists.newArrayList((Object) 0.5, 1.5, 1.5));
a2.getInfo().put("H", Lists.newArrayList((Object) 0.5, 1.0, 1.0));
a2.getInfo().put("A", Lists.newArrayList((Object) 2.0, 3.0, 3.0));
expectedOutput.add(a2);
CalculateCoverage.Options popts = PipelineOptionsFactory.create().as(
CalculateCoverage.Options.class);
Expand Down
Loading