Skip to content

Commit

Permalink
removing all google genomics API dependencies
Browse files Browse the repository at this point in the history
the google genomics API has deprecated all the features we were using,
this includes the reference lookup api, and the google Read data types

removing all google genomics related dependencies
* replacing com.google.cloud.genomics:gatk-tools-java:1.1 with gov.nist.math.jama:gov.nist.math.jama:1.1.1
	we rely on this transitive dependency, making it a direct
	dependency
* remove com.google.apis:google-api-services-genomics:v1-rev527-1.22.0
* remove com.google.cloud.genomics:google-genomics-utils:v1-0.10

* delete ReferenceAPISource and tests
* delete GoogleGenomicsReadToGATKReadAdapter and tests
* delete CigarConversionUtils and tests

* update other classes to remove references to these types
* improve an error message
  • Loading branch information
lbergelson committed Jan 25, 2018
1 parent 25755c2 commit ca175e4
Show file tree
Hide file tree
Showing 26 changed files with 72 additions and 2,190 deletions.
6 changes: 2 additions & 4 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ dependencies {
// provided by dataproc).
compile 'org.broadinstitute:google-cloud-nio-GATK4-custom-patch:0.20.4-alpha-GCS-RETRY-FIX:shaded'

compile 'com.google.cloud.genomics:gatk-tools-java:1.1'
compile "gov.nist.math.jama:gov.nist.math.jama:1.1.1"

// this comes built-in when running on Google Dataproc, but the library
// allows us to read from GCS also when testing locally (or on non-Dataproc clusters,
// should we want to)
Expand All @@ -179,9 +180,6 @@ dependencies {

compile 'it.unimi.dsi:fastutil:7.0.6'

compile 'com.google.apis:google-api-services-genomics:v1-rev527-1.22.0'
compile 'com.google.cloud.genomics:google-genomics-utils:v1-0.10'

compile 'com.github.wendykierp:JTransforms:3.1'
compile 'org.broadinstitute:hdf5-java-bindings:1.1.0-hdf5_2.11.0'
compile 'org.broadinstitute:gatk-native-bindings:1.0.0'
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public ReferenceFileSource(final Path referencePath) {
this.referencePath = referencePath;
this.referenceUri = referencePath.toUri();
if (!Files.exists(this.referencePath)) {
throw new UserException.MissingReference("The specified fasta file (" + referencePath + ") does not exist.");
throw new UserException.MissingReference("The specified fasta file (" + referencePath.toAbsolutePath().toUri().toString() + ") does not exist.");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ public ReferenceMultiSource(final String referenceURL,
} else {
referenceSource = new ReferenceFileSource(referenceURL);
}
} else { // use the Google Genomics API
referenceSource = new ReferenceAPISource(referenceURL);
} else {
throw new UserException.CouldNotReadInputFile("Couldn't read the given reference, reference must be a .fasta or .2bit file.\n" +
" Reference provided was: " + referenceURL);
}
this.referenceWindowFunction = referenceWindowFunction;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,8 @@

import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.serializers.FieldSerializer;
import com.esotericsoftware.kryo.serializers.JavaSerializer;
import com.google.api.services.genomics.model.Read;
import de.javakaffee.kryoserializers.UnmodifiableCollectionsSerializer;
import htsjdk.samtools.SAMRecord;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.FastGenotype;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.spark.serializer.KryoRegistrator;
import org.bdgenomics.adam.serialization.ADAMKryoRegistrator;
import org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter;
Expand All @@ -33,9 +27,6 @@ public GATKRegistrator() {
@Override
public void registerClasses(Kryo kryo) {

// JsonSerializer is needed for the Google Genomics classes like Read and Reference.
kryo.register(Read.class, new JsonSerializer<Read>());

//relatively inefficient serialization of Collections created with Collections.nCopies(), without this
//any Collection created with Collections.nCopies fails to serialize at run time
kryo.register(Collections.nCopies(2, "").getClass(), new FieldSerializer<>(kryo, Collections.nCopies(2, "").getClass()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,29 +339,6 @@ public LexicographicallySortedSequenceDictionary(String name, SAMSequenceDiction
}
}

public static final class ReferenceAPIReturnedUnexpectedNumberOfBytes extends UserException {
private static final long serialVersionUID = 0L;
public ReferenceAPIReturnedUnexpectedNumberOfBytes(final SimpleInterval interval, final byte[] bases) {
super("Query to genomics service failed for reference interval " + interval + ". Requested " + interval.size() + " bytes but got " + bases.length + ". Perhaps you're querying outside the edge of the contig.");
}
}

public static final class MultipleReferenceSets extends UserException {
private static final long serialVersionUID = 0L;

public MultipleReferenceSets(final String referenceSetAssemblyID, final Set<String> referenceSetIds) {
super("Multiple reference sets found for " + referenceSetAssemblyID + " : " + referenceSetIds + ". Please use a reference set ID that uniquely identifies a reference set.");
}
}

public static final class UnknownReferenceSet extends UserException {
private static final long serialVersionUID = 0L;

public UnknownReferenceSet(final String referenceSetAssemblyID) {
super("There are no known reference set for ID " + referenceSetAssemblyID);
}
}

public static final class Require2BitReferenceForBroadcast extends BadInput {
private static final long serialVersionUID = 0L;
public Require2BitReferenceForBroadcast() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
package org.broadinstitute.hellbender.utils.pileup;

import com.google.api.services.genomics.model.Read;
import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.util.Locatable;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.walkers.qc.Pileup;
import org.broadinstitute.hellbender.utils.BaseUtils;
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.fragments.FragmentCollection;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
import org.broadinstitute.hellbender.engine.AlignmentContext;

import java.util.*;
import java.util.function.Predicate;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
package org.broadinstitute.hellbender.utils.read;

import com.google.api.services.genomics.model.LinearAlignment;
import com.google.api.services.genomics.model.Position;
import com.google.api.services.genomics.model.Read;
import htsjdk.samtools.*;
import htsjdk.samtools.util.StringUtil;
import htsjdk.variant.variantcontext.Allele;
import org.apache.commons.lang3.ArrayUtils;
import org.broadinstitute.hellbender.exceptions.GATKException;
Expand Down Expand Up @@ -301,26 +297,6 @@ public static GATKRead createHeaderlessSamBackedRead( final String name, final S
return read;
}

/**
* Creates an artificial GATKRead backed by a Google Genomics read.
*
* The read will consist of the specified number of Q30 'A' bases, and will be
* mapped to the specified contig at the specified start position.
*
* @param name name of the new read
* @param contig contig the new read is mapped to
* @param start start position of the new read
* @param length number of bases in the new read
* @return an artificial GATKRead backed by a Google Genomics read.
*/
public static GATKRead createGoogleBackedRead( final String name, final String contig, final int start, final int length ) {
final byte[] bases = Utils.dupBytes((byte) 'A', length);
final byte[] quals = Utils.dupBytes((byte) 30, length);

final Read googleRead = createArtificialGoogleGenomicsRead(name, contig, start, bases, quals, length + "M");
return new GoogleGenomicsReadToGATKReadAdapter(googleRead);
}

/**
* Create an artificial SAMRecord based on the parameters. The cigar string will be *M, where * is the length of the read
*
Expand Down Expand Up @@ -455,37 +431,6 @@ public static SAMRecord createUniqueArtificialSAMRecord(final Cigar cigar) {
return createArtificialSAMRecord(header, cigar, UUID.randomUUID().toString());
}

public static Read createArtificialGoogleGenomicsRead( final String name, final String contig, final int start, final byte[] bases, final byte[] quals, final String cigar ) {
Read googleRead = new Read();

googleRead.setFragmentName(name);
googleRead.setAlignment(new LinearAlignment());
googleRead.getAlignment().setPosition(new Position());
googleRead.getAlignment().getPosition().setReferenceName(contig);
googleRead.getAlignment().getPosition().setPosition((long) start - 1);
googleRead.setAlignedSequence(StringUtil.bytesToString(bases));
googleRead.getAlignment().setCigar(CigarConversionUtils.convertSAMCigarToCigarUnitList(TextCigarCodec.decode(cigar)));

List<Integer> convertedQuals = new ArrayList<>();
for ( byte b : quals ) {
convertedQuals.add((int)b);
}
googleRead.setAlignedQuality(convertedQuals);

// Create a fully formed read that can be wrapped by a GATKRead and have a valid
// SAMString without GATKRead throwing missing field exceptions.
googleRead.setFailedVendorQualityChecks(false);
googleRead.setSecondaryAlignment(false);
googleRead.setSupplementaryAlignment(false);
googleRead.setDuplicateFragment(false);

Position matePos = new Position();
matePos.setReverseStrand(false);
googleRead.setNextMatePosition(matePos);

return googleRead;
}

public static List<GATKRead> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
return createPair(header, name, readLen, 0, leftStart, rightStart, leftIsFirst, leftIsNegative);
}
Expand Down

This file was deleted.

0 comments on commit ca175e4

Please sign in to comment.