Permalink
Browse files

HTSJDK SamReader implementation using the API and example of its usage

Added ant build and updated HTSJDK
  • Loading branch information...
1 parent f6b5e21 commit 4784f9096f57f0348886f79b2b32c8edea8b202d @iliat iliat committed Sep 27, 2014
View
@@ -1,4 +1,7 @@
bin
.classpath
.project
-
+.jar_tmp
+classes
+dist
+gatk-tools-java.version.properties
View
145 build.xml
@@ -0,0 +1,145 @@
+<?xml version="1.0"?>
+<project name="gatk-tools-java" basedir="." default="all">
+
+ <property name="src" value="src/main/java"/>
+ <property name="lib" value="lib"/>
+ <property name="dist" value="dist"/>
+ <property name="classes" value="classes"/>
+ <property name="jar_tmp" value=".jar_tmp"/>
+
+ <property name="javac.target" value="1.6"/>
+ <property name="javac.debug" value="true"/>
+
+ <!-- Get GIT hash, if available, otherwise leave it blank. -->
+ <exec executable="git" outputproperty="repository.revision" failifexecutionfails="true" errorproperty="">
+ <arg value="log"/>
+ <arg value="-1"/>
+ <arg value="--pretty=format:%H_%at"/>
+ </exec>
+ <property name="repository.revision" value=""/>
+ <property name="gatk-tools-java-version" value="1.0"/>
+ <property name="gatk-tools-java-version-file" value="gatk-tools-java.version.properties"/>
+
+ <!-- VERSION PROPERTY -->
+ <target name="write-version-property">
+ <propertyfile
+ file="${gatk-tools-java-version-file}"
+ comment="gatk-tools-java version">
+ <entry key="gatk-tools-java-version" value="${gatk-tools-java-version}"/>
+ </propertyfile>
+ </target>
+
+ <!-- INIT -->
+ <target name="init" depends="write-version-property">
+ <path id="classpath">
+ <fileset dir="${lib}">
+ <include name="**/*.jar"/>
+ </fileset>
+ </path>
+ </target>
+
+ <!-- CLEAN -->
+ <target name="clean">
+ <delete dir="${classes}"/>
+ <delete dir="${dist}"/>
+ <delete dir="javadoc"/>
+ <delete dir="${jar_tmp}"/>
+ <delete file="${gatk-tools-java-version-file}"/>
+ </target>
+
+ <!-- COMPILE -->
+ <target name="compile" depends="init"
+ description="Compile files without cleaning">
+ <compile-src includes="**/*.*"/>
+ </target>
+
+
+ <target name="gatk-tools-java-jar" depends="compile"
+ description="Builds gatk-tools-java-${gatk-tools-java-version}.jar for inclusion in other projects">
+ <mkdir dir="${dist}"/>
+ <delete dir="${jar_tmp}"/>
+ <mkdir dir="${jar_tmp}"/>
+ <copy todir="${jar_tmp}">
+ <fileset dir="${classes}" includes="**/*"/>
+ </copy>
+ <unzip dest="${jar_tmp}">
+ <fileset dir="${lib}">
+ <include name="*.jar"/>
+ </fileset>
+ </unzip>
+ <jar destfile="${dist}/gatk-tools-java-${gatk-tools-java-version}.jar" compress="no">
+ <fileset dir="${jar_tmp}" includes="**/*.*"/>
+ <manifest>
+ <attribute name="Implementation-Title" value="@gatk-tools-java"/>
+ <attribute name="Implementation-Version" value="${gatk-tools-java-version}(${repository.revision})"/>
+ </manifest>
+ </jar>
+ <copy todir="${dist}">
+ <fileset dir="lib" includes="*.jar"/>
+ </copy>
+ </target>
+
+
+
+ <!-- ALL -->
+ <target name="all" depends="compile, gatk-tools-java-jar" description="Default build target">
+ </target>
+
+ <!-- ************************************************************************************** -->
+ <!-- ************************************************************************************** -->
+ <!-- Beginning of taskdefs that are used elsewhere in the build file -->
+ <!-- ************************************************************************************** -->
+ <!-- ************************************************************************************** -->
+
+ <!-- Compile source files specified by includes, from source root. Can specifically
+ include or exclude-->
+ <macrodef name="compile-src">
+ <attribute name="includes" default=""/>
+ <attribute name="excludes" default=""/>
+ <attribute name="destdir" default="${classes}"/>
+ <attribute name="compile.classpath" default="classpath"/>
+ <attribute name="compiler.args" default=""/>
+ <sequential>
+ <mkdir dir="${classes}"/>
+ <!-- unset the sourcepath attribute in order to compile only files explicitly specified and disable javac's default searching mechanism -->
+ <javac destdir="@{destdir}"
+ optimize="${javac.opt}"
+ debug="${javac.debug}"
+ sourcepath=""
+ srcdir="${src}"
+ includes="@{includes}"
+ excludes="@{excludes}"
+ source="${javac.target}"
+ target="${javac.target}"
+ includeantruntime="false">
+ <classpath refid="@{compile.classpath}"/>
+ <compilerarg line="@{compiler.args}" />
+ </javac>
+ </sequential>
+ </macrodef>
+
+ <macrodef name="compile-tests">
+ <attribute name="includes" default=""/>
+ <attribute name="excludes" default=""/>
+ <attribute name="compiler.args" default=""/>
+
+ <sequential>
+ <mkdir dir="${classes.test}"/>
+ <javac destdir="${classes.test}"
+ optimize="${javac.opt}"
+ debug="${javac.debug}"
+ srcdir="${src.test.java}"
+ includes="@{includes}"
+ excludes="@{excludes}"
+ source="${javac.target}"
+ target="${javac.target}"
+ includeantruntime="false">
+ <classpath>
+ <path refid="classpath"/>
+ <pathelement location="${classes}"/>
+ </classpath>
+ <compilerarg line="@{compiler.args}"/>
+ </javac>
+ </sequential>
+ </macrodef>
+</project>
Binary file not shown.
@@ -16,11 +16,12 @@
package com.google.cloud.genomics.gatk.common;
import java.net.URISyntaxException;
+import java.net.URL;
/**
* Represents a GA4GH reads resource as a URL in the form of
* ga4gh://<base api path>/readsets/<readset>/<sequence>/[start-end],
- * e.g. ga4gh://www.googleapis.com/genomics/v1beta/reads/CLqN8Z3sDRDQldHJ_rTS9VE/1/
+ * e.g. ga4gh://www.googleapis.com/genomics/v1beta/readsets/CLqN8Z3sDRDQldHJ_rTS9VE/1/
*/
public class GA4GHUrl {
int rangeStart = 0;
@@ -53,6 +54,10 @@ public GA4GHUrl(String rootUrl,
this.rangeEnd = rangeEnd;
}
+ public GA4GHUrl(URL input) throws URISyntaxException {
+ this(input.toString().replace("https://", GA4GH_SCHEMA_PREFIX));
+ }
+
public GA4GHUrl(String input) throws URISyntaxException {
if (!isGA4GHUrl(input)) {
throw new URISyntaxException(input, "Schema is not ga4gh");
@@ -67,7 +72,7 @@ public GA4GHUrl(String input) throws URISyntaxException {
String[] pathComponents = readsPath.split("/");
if (pathComponents.length < 4) {
throw new URISyntaxException(input,
- "Expecting " + READS_PATH_COMPONENT +"readset/sequence/[range], got "
+ "Expecting " + READS_PATH_COMPONENT + "readset/sequence/[range], got "
+ readsPath);
}
@@ -15,10 +15,15 @@
*/
package com.google.cloud.genomics.gatk.common;
+import com.google.api.client.auth.oauth2.Credential;
import com.google.api.client.extensions.java6.auth.oauth2.VerificationCodeReceiver;
import com.google.api.client.extensions.jetty.auth.oauth2.LocalServerReceiver;
import com.google.api.client.googleapis.extensions.java6.auth.oauth2.GooglePromptReceiver;
+import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.http.HttpRequest;
+import com.google.api.client.http.HttpRequestInitializer;
+import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.services.genomics.Genomics;
import com.google.api.services.genomics.GenomicsScopes;
import com.google.api.services.genomics.model.Dataset;
@@ -70,39 +75,58 @@ private Genomics getApi() throws GeneralSecurityException, IOException {
}
private Genomics initGenomicsApi() throws GeneralSecurityException, IOException {
- File clientSecrets = new File(clientSecretsFilename);
- if (!clientSecrets.exists()) {
- throw new IOException(
- "Client secrets file " + clientSecretsFilename + " does not exist."
- + " Visit https://developers.google.com/genomics to learn how"
- + " to install a client_secrets.json file. If you have installed a client_secrets.json"
- + " in a specific location, use --client_secrets_filename <path>/client_secrets.json.");
+ LOG.info("Initializing Genomics API for " + rootUrl);
+ if (!clientSecretsFilename.isEmpty()) {
+ File clientSecrets = new File(clientSecretsFilename);
+ if (!clientSecrets.exists()) {
+ throw new IOException(
+ "Client secrets file " + clientSecretsFilename + " does not exist."
+ + " Visit https://developers.google.com/genomics to learn how"
+ + " to install a client_secrets.json file. If you have installed a client_secrets.json"
+ + " in a specific location, use --client_secrets_filename <path>/client_secrets.json.");
+ }
+ LOG.info("Using client secrets file " + clientSecretsFilename);
+
+ VerificationCodeReceiver receiver = noLocalServer ?
+ new GooglePromptReceiver() : new LocalServerReceiver();
+ GenomicsFactory genomicsFactory = GenomicsFactory
+ .builder("genomics_java_client")
+ .setScopes(SCOPES)
+ .setRootUrl(rootUrl)
+ .setServicePath("/")
+ .setVerificationCodeReceiver(Suppliers.ofInstance(receiver))
+ .build();
+ return genomicsFactory.fromClientSecretsFile(clientSecrets);
+ } else {
+ final Genomics.Builder builder = new Genomics
+ .Builder(
+ GoogleNetHttpTransport.newTrustedTransport(),
+ JacksonFactory.getDefaultInstance(),
+ new HttpRequestInitializer() {
+ @Override public void initialize(HttpRequest httpRequest) throws IOException {
+ httpRequest.setReadTimeout(20000);
+ httpRequest.setConnectTimeout(20000);
+ }
+ })
+ .setApplicationName("genomics_java_client")
+ .setRootUrl(rootUrl)
+ .setServicePath("/");
+ return builder.build();
}
-
- VerificationCodeReceiver receiver = noLocalServer ?
- new GooglePromptReceiver() : new LocalServerReceiver();
-
- GenomicsFactory genomicsFactory = GenomicsFactory
- .builder("genomics_java_client")
- .setScopes(SCOPES)
- .setUserName("user" + SCOPES.size())
- .setVerificationCodeReceiver(Suppliers.ofInstance(receiver))
- .setRootUrl(rootUrl)
- .setServicePath("/")
- .build();
-
- return genomicsFactory.fromClientSecretsFile(clientSecrets);
}
public ReadIteratorResource getReadsFromGenomicsApi(GA4GHUrl url)
throws IOException, GeneralSecurityException {
+ LOG.info("Getting reads from " + url);
return getReadsFromGenomicsApi(url.getReadset(),
url.getSequence(), url.getRangeStart(), url.getRangeEnd());
}
public ReadIteratorResource getReadsFromGenomicsApi(String readsetId,
String sequenceName, int sequenceStart, int sequenceEnd)
throws IOException, GeneralSecurityException {
+ LOG.info("Getting readset " + readsetId + ", sequence " + sequenceName +
+ ", start=" + sequenceStart + ", end=" + sequenceEnd);
final Genomics stub = getApi();
// TODO(iliat): implement API retries and using access key for public
// datasets
@@ -0,0 +1,82 @@
+package com.google.cloud.genomics.gatk.htsjdk;
+
+import htsjdk.samtools.util.CoordMath;
+import htsjdk.samtools.SAMRecord;
+
+/**
+ * Similar to HTSJDK's QueryInterval but allows specifying sequence name
+ * (as opposed to index in the header) and adds ability to check if a given read
+ * matches the interval.
+ */
+public class GA4GHQueryInterval {
+ private String sequence;
+ private int start;
+ private int end;
+
+ public enum ReadPositionConstraint {
+ OVERLAPPING,
+ CONTAINED,
+ START_AT
+ }
+ private ReadPositionConstraint readPositionConstraint;
+
+ public GA4GHQueryInterval(String sequence, int start, int end,
+ ReadPositionConstraint readPositionConstraint) {
+ super();
+ this.sequence = sequence;
+ this.start = start;
+ this.end = end;
+ this.readPositionConstraint = readPositionConstraint;
+ }
+
+ public String getSequence() {
+ return sequence;
+ }
+
+ public void setSequence(String sequence) {
+ this.sequence = sequence;
+ }
+
+ public int getStart() {
+ return start;
+ }
+
+ public void setStart(int start) {
+ this.start = start;
+ }
+
+ public int getEnd() {
+ return end;
+ }
+
+ public void setEnd(int end) {
+ this.end = end;
+ }
+
+ public ReadPositionConstraint getReadPositionConstraint() {
+ return readPositionConstraint;
+ }
+
+ public void setReadPositionConstraint(ReadPositionConstraint readPositionConstraint) {
+ this.readPositionConstraint = readPositionConstraint;
+ }
+
+ /**
+ * Returns true iff the read specified by the record matches the interval
+ * given the interval's constraints and the read position.
+ */
+ public boolean matches(SAMRecord record) {
+ int myEnd = end == 0 ? Integer.MAX_VALUE : end;
+ switch (readPositionConstraint) {
+ case OVERLAPPING:
+ return CoordMath.overlaps(start, myEnd,
+ record.getAlignmentStart(), record.getAlignmentEnd());
+ case CONTAINED:
+ return CoordMath.encloses(start, myEnd,
+ record.getAlignmentStart(), record.getAlignmentEnd());
+ case START_AT:
+ return start == record.getAlignmentStart();
+ }
+ return false;
+ }
+}
@@ -0,0 +1,25 @@
+package com.google.cloud.genomics.gatk.htsjdk;
+
+import htsjdk.samtools.CustomReaderFactory;
+import htsjdk.samtools.SamReader;
+
+import java.net.URL;
+import java.util.logging.Logger;
+/**
+ * HTSJDK CustomReaderFactory implementation.
+ * Returns a SamReader that reads data from GA4GH API.
+ */
+public class GA4GHReaderFactory implements CustomReaderFactory.ICustomReaderFactory {
+ private static final Logger LOG = Logger.getLogger(GA4GHReaderFactory.class.getName());
+
+ @Override
+ public SamReader open(URL url) {
+ try {
+ return new GA4GHSamReader(url);
+ } catch (Exception ex) {
+ LOG.warning("Error creating SamReader " + ex.toString());
+ return null;
+ }
+ }
+
+}
Oops, something went wrong.

0 comments on commit 4784f90

Please sign in to comment.