diff --git a/.gitignore b/.gitignore
index 3b1abaf..6cc0a52 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,7 @@
bin
.classpath
.project
-
+.jar_tmp
+classes
+dist
+gatk-tools-java.version.properties
diff --git a/build.xml b/build.xml
new file mode 100755
index 0000000..6735451
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,145 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/lib/htsjdk-1.118.jar b/lib/htsjdk-1.121.jar
similarity index 79%
rename from lib/htsjdk-1.118.jar
rename to lib/htsjdk-1.121.jar
index a19810b..128d936 100644
Binary files a/lib/htsjdk-1.118.jar and b/lib/htsjdk-1.121.jar differ
diff --git a/src/main/java/com/google/cloud/genomics/gatk/common/GA4GHUrl.java b/src/main/java/com/google/cloud/genomics/gatk/common/GA4GHUrl.java
index 269e684..8e677f3 100644
--- a/src/main/java/com/google/cloud/genomics/gatk/common/GA4GHUrl.java
+++ b/src/main/java/com/google/cloud/genomics/gatk/common/GA4GHUrl.java
@@ -16,11 +16,12 @@
package com.google.cloud.genomics.gatk.common;
import java.net.URISyntaxException;
+import java.net.URL;
/**
* Represents a GA4GH reads resource as a URL in the form of
* ga4gh:///readsets///[start-end],
- * e.g. ga4gh://www.googleapis.com/genomics/v1beta/reads/CLqN8Z3sDRDQldHJ_rTS9VE/1/
+ * e.g. ga4gh://www.googleapis.com/genomics/v1beta/readsets/CLqN8Z3sDRDQldHJ_rTS9VE/1/
*/
public class GA4GHUrl {
int rangeStart = 0;
@@ -53,6 +54,10 @@ public GA4GHUrl(String rootUrl,
this.rangeEnd = rangeEnd;
}
+ public GA4GHUrl(URL input) throws URISyntaxException {
+ this(input.toString().replace("https://", GA4GH_SCHEMA_PREFIX));
+ }
+
public GA4GHUrl(String input) throws URISyntaxException {
if (!isGA4GHUrl(input)) {
throw new URISyntaxException(input, "Schema is not ga4gh");
@@ -67,7 +72,7 @@ public GA4GHUrl(String input) throws URISyntaxException {
String[] pathComponents = readsPath.split("/");
if (pathComponents.length < 4) {
throw new URISyntaxException(input,
- "Expecting " + READS_PATH_COMPONENT +"readset/sequence/[range], got "
+ "Expecting " + READS_PATH_COMPONENT + "readset/sequence/[range], got "
+ readsPath);
}
diff --git a/src/main/java/com/google/cloud/genomics/gatk/common/GenomicsApiDataSource.java b/src/main/java/com/google/cloud/genomics/gatk/common/GenomicsApiDataSource.java
index 2f845d8..df75159 100644
--- a/src/main/java/com/google/cloud/genomics/gatk/common/GenomicsApiDataSource.java
+++ b/src/main/java/com/google/cloud/genomics/gatk/common/GenomicsApiDataSource.java
@@ -15,10 +15,15 @@
*/
package com.google.cloud.genomics.gatk.common;
+import com.google.api.client.auth.oauth2.Credential;
import com.google.api.client.extensions.java6.auth.oauth2.VerificationCodeReceiver;
import com.google.api.client.extensions.jetty.auth.oauth2.LocalServerReceiver;
import com.google.api.client.googleapis.extensions.java6.auth.oauth2.GooglePromptReceiver;
+import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.http.HttpRequest;
+import com.google.api.client.http.HttpRequestInitializer;
+import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.services.genomics.Genomics;
import com.google.api.services.genomics.GenomicsScopes;
import com.google.api.services.genomics.model.Dataset;
@@ -70,32 +75,49 @@ private Genomics getApi() throws GeneralSecurityException, IOException {
}
private Genomics initGenomicsApi() throws GeneralSecurityException, IOException {
- File clientSecrets = new File(clientSecretsFilename);
- if (!clientSecrets.exists()) {
- throw new IOException(
- "Client secrets file " + clientSecretsFilename + " does not exist."
- + " Visit https://developers.google.com/genomics to learn how"
- + " to install a client_secrets.json file. If you have installed a client_secrets.json"
- + " in a specific location, use --client_secrets_filename /client_secrets.json.");
+ LOG.info("Initializing Genomics API for " + rootUrl);
+ if (!clientSecretsFilename.isEmpty()) {
+ File clientSecrets = new File(clientSecretsFilename);
+ if (!clientSecrets.exists()) {
+ throw new IOException(
+ "Client secrets file " + clientSecretsFilename + " does not exist."
+ + " Visit https://developers.google.com/genomics to learn how"
+ + " to install a client_secrets.json file. If you have installed a client_secrets.json"
+ + " in a specific location, use --client_secrets_filename /client_secrets.json.");
+ }
+ LOG.info("Using client secrets file " + clientSecretsFilename);
+
+ VerificationCodeReceiver receiver = noLocalServer ?
+ new GooglePromptReceiver() : new LocalServerReceiver();
+ GenomicsFactory genomicsFactory = GenomicsFactory
+ .builder("genomics_java_client")
+ .setScopes(SCOPES)
+ .setRootUrl(rootUrl)
+ .setServicePath("/")
+ .setVerificationCodeReceiver(Suppliers.ofInstance(receiver))
+ .build();
+ return genomicsFactory.fromClientSecretsFile(clientSecrets);
+ } else {
+ final Genomics.Builder builder = new Genomics
+ .Builder(
+ GoogleNetHttpTransport.newTrustedTransport(),
+ JacksonFactory.getDefaultInstance(),
+ new HttpRequestInitializer() {
+ @Override public void initialize(HttpRequest httpRequest) throws IOException {
+ httpRequest.setReadTimeout(20000);
+ httpRequest.setConnectTimeout(20000);
+ }
+ })
+ .setApplicationName("genomics_java_client")
+ .setRootUrl(rootUrl)
+ .setServicePath("/");
+ return builder.build();
}
-
- VerificationCodeReceiver receiver = noLocalServer ?
- new GooglePromptReceiver() : new LocalServerReceiver();
-
- GenomicsFactory genomicsFactory = GenomicsFactory
- .builder("genomics_java_client")
- .setScopes(SCOPES)
- .setUserName("user" + SCOPES.size())
- .setVerificationCodeReceiver(Suppliers.ofInstance(receiver))
- .setRootUrl(rootUrl)
- .setServicePath("/")
- .build();
-
- return genomicsFactory.fromClientSecretsFile(clientSecrets);
}
public ReadIteratorResource getReadsFromGenomicsApi(GA4GHUrl url)
throws IOException, GeneralSecurityException {
+ LOG.info("Getting reads from " + url);
return getReadsFromGenomicsApi(url.getReadset(),
url.getSequence(), url.getRangeStart(), url.getRangeEnd());
}
@@ -103,6 +125,8 @@ public ReadIteratorResource getReadsFromGenomicsApi(GA4GHUrl url)
public ReadIteratorResource getReadsFromGenomicsApi(String readsetId,
String sequenceName, int sequenceStart, int sequenceEnd)
throws IOException, GeneralSecurityException {
+ LOG.info("Getting readset " + readsetId + ", sequence " + sequenceName +
+ ", start=" + sequenceStart + ", end=" + sequenceEnd);
final Genomics stub = getApi();
// TODO(iliat): implement API retries and using access key for public
// datasets
diff --git a/src/main/java/com/google/cloud/genomics/gatk/htsjdk/GA4GHQueryInterval.java b/src/main/java/com/google/cloud/genomics/gatk/htsjdk/GA4GHQueryInterval.java
new file mode 100644
index 0000000..b3617e5
--- /dev/null
+++ b/src/main/java/com/google/cloud/genomics/gatk/htsjdk/GA4GHQueryInterval.java
@@ -0,0 +1,82 @@
+package com.google.cloud.genomics.gatk.htsjdk;
+
+import htsjdk.samtools.util.CoordMath;
+import htsjdk.samtools.SAMRecord;
+
+/**
+ * Similar to HTSJDK's QueryInterval but allows specifying sequence name
+ * (as opposed to index in the header) and adds ability to check if a given read
+ * matches the interval.
+ */
+public class GA4GHQueryInterval {
+ private String sequence;
+ private int start;
+ private int end;
+
+ public enum ReadPositionConstraint {
+ OVERLAPPING,
+ CONTAINED,
+ START_AT
+ }
+ private ReadPositionConstraint readPositionConstraint;
+
+ public GA4GHQueryInterval(String sequence, int start, int end,
+ ReadPositionConstraint readPositionConstraint) {
+ super();
+ this.sequence = sequence;
+ this.start = start;
+ this.end = end;
+ this.readPositionConstraint = readPositionConstraint;
+ }
+
+ public String getSequence() {
+ return sequence;
+ }
+
+ public void setSequence(String sequence) {
+ this.sequence = sequence;
+ }
+
+ public int getStart() {
+ return start;
+ }
+
+ public void setStart(int start) {
+ this.start = start;
+ }
+
+ public int getEnd() {
+ return end;
+ }
+
+ public void setEnd(int end) {
+ this.end = end;
+ }
+
+ public ReadPositionConstraint getReadPositionConstraint() {
+ return readPositionConstraint;
+ }
+
+ public void setReadPositionConstraint(ReadPositionConstraint readPositionConstraint) {
+ this.readPositionConstraint = readPositionConstraint;
+ }
+
+ /**
+ * Returns true iff the read specified by the record matches the interval
+ * given the interval's constraints and the read position.
+ */
+ public boolean matches(SAMRecord record) {
+ int myEnd = end == 0 ? Integer.MAX_VALUE : end;
+ switch (readPositionConstraint) {
+ case OVERLAPPING:
+ return CoordMath.overlaps(start, myEnd,
+ record.getAlignmentStart(), record.getAlignmentEnd());
+ case CONTAINED:
+ return CoordMath.encloses(start, myEnd,
+ record.getAlignmentStart(), record.getAlignmentEnd());
+ case START_AT:
+ return start == record.getAlignmentStart();
+ }
+ return false;
+ }
+}
diff --git a/src/main/java/com/google/cloud/genomics/gatk/htsjdk/GA4GHReaderFactory.java b/src/main/java/com/google/cloud/genomics/gatk/htsjdk/GA4GHReaderFactory.java
new file mode 100644
index 0000000..507a5f6
--- /dev/null
+++ b/src/main/java/com/google/cloud/genomics/gatk/htsjdk/GA4GHReaderFactory.java
@@ -0,0 +1,25 @@
+package com.google.cloud.genomics.gatk.htsjdk;
+
+import htsjdk.samtools.CustomReaderFactory;
+import htsjdk.samtools.SamReader;
+
+import java.net.URL;
+import java.util.logging.Logger;
+/**
+ * HTSJDK CustomReaderFactory implementation.
+ * Returns a SamReader that reads data from GA4GH API.
+ */
+public class GA4GHReaderFactory implements CustomReaderFactory.ICustomReaderFactory {
+ private static final Logger LOG = Logger.getLogger(GA4GHReaderFactory.class.getName());
+
+ @Override
+ public SamReader open(URL url) {
+ try {
+ return new GA4GHSamReader(url);
+ } catch (Exception ex) {
+ LOG.warning("Error creating SamReader " + ex.toString());
+ return null;
+ }
+ }
+
+}
diff --git a/src/main/java/com/google/cloud/genomics/gatk/htsjdk/GA4GHSamReader.java b/src/main/java/com/google/cloud/genomics/gatk/htsjdk/GA4GHSamReader.java
new file mode 100644
index 0000000..21c06d6
--- /dev/null
+++ b/src/main/java/com/google/cloud/genomics/gatk/htsjdk/GA4GHSamReader.java
@@ -0,0 +1,182 @@
+package com.google.cloud.genomics.gatk.htsjdk;
+
+import com.google.cloud.genomics.gatk.common.GA4GHUrl;
+import com.google.cloud.genomics.gatk.common.GenomicsApiDataSource;
+import com.google.cloud.genomics.gatk.common.GenomicsApiDataSourceFactory;
+import com.google.cloud.genomics.gatk.common.GenomicsApiDataSourceFactory.Settings;
+
+import htsjdk.samtools.QueryInterval;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMFormatException;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.util.CloseableIterator;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.security.GeneralSecurityException;
+
+/**
+ * SamReader implementation that reads data from GA4GH API.
+ * For client_secrets file, specify the path in the ga4gh.client_secrets system property.
+ */
+public class GA4GHSamReader implements SamReader {
+ private GA4GHUrl url;
+ private GenomicsApiDataSourceFactory factory;
+ GenomicsApiDataSource dataSource;
+ GA4GHSamRecordIterator iterator;
+
+ public GA4GHSamReader(URL url) throws URISyntaxException, IOException, GeneralSecurityException {
+ this.url = new GA4GHUrl(url);
+ this.factory = new GenomicsApiDataSourceFactory();
+ factory.configure(this.url.getRootUrl(),
+ new Settings(
+ System.getProperty("ga4gh.client_secrets", "client_secrets.json"),
+ System.getProperty("ga4gh.no_local_server","")
+ .toLowerCase().equals("true")));
+ dataSource = factory.get(this.url.getRootUrl());
+ queryOverlapping(this.url.getSequence(), this.url.getRangeStart(),
+ this.url.getRangeEnd());
+ }
+
+ @Override
+ public void close() throws IOException {
+ this.dataSource = null;
+ this.factory = null;
+ }
+
+ @Override
+ public SAMFileHeader getFileHeader() {
+ return iterator.getFileHeader();
+ }
+
+ @Override
+ public Type type() {
+ // TODO(iliat): figure out bearing of this on indexing
+ return Type.SAM_TYPE;
+ }
+
+ @Override
+ public boolean hasIndex() {
+ return true;
+ }
+
+ @Override
+ public Indexing indexing() {
+ return null;
+ }
+
+ @Override
+ public SAMRecordIterator queryContained(String sequence, int start, int end) {
+ return query(sequence, start, end, true);
+ }
+
+ @Override
+ public SAMRecordIterator queryOverlapping(String sequence, int start, int end) {
+ return query(sequence, start, end, false);
+ }
+
+ @Override
+ public SAMRecordIterator queryContained(QueryInterval[] intervals) {
+ return query(intervals, true);
+ }
+
+ @Override
+ public SAMRecordIterator queryOverlapping(QueryInterval[] intervals) {
+ return query(intervals, false);
+ }
+
+ @Override
+ public SAMRecordIterator queryUnmapped() {
+ return queryOverlapping("*", 0, 0);
+ }
+
+ @Override
+ public SAMRecordIterator query(QueryInterval[] intervals, boolean contained) {
+ final GA4GHQueryInterval[] myIntervals = new GA4GHQueryInterval[intervals.length];
+ final GA4GHQueryInterval.ReadPositionConstraint constraint = contained ?
+ GA4GHQueryInterval.ReadPositionConstraint.CONTAINED :
+ GA4GHQueryInterval.ReadPositionConstraint.OVERLAPPING;
+ final SAMFileHeader header = getFileHeader();
+ for (int i = 0; i < intervals.length; i++) {
+ final QueryInterval interval = intervals[i];
+ final String sequence = header.getSequence(
+ interval.referenceIndex).getSequenceName();
+ myIntervals[i] = new GA4GHQueryInterval(sequence, interval.start,
+ interval.end, constraint);
+ }
+ return query(myIntervals);
+ }
+
+ @Override
+ public SAMRecordIterator query(String sequence, int start, int end, boolean contained) {
+ return query(new GA4GHQueryInterval[] {
+ new GA4GHQueryInterval(sequence, start, end, contained ?
+ GA4GHQueryInterval.ReadPositionConstraint.CONTAINED :
+ GA4GHQueryInterval.ReadPositionConstraint.OVERLAPPING)});
+ }
+
+ @Override
+ public SAMRecordIterator queryAlignmentStart(String sequence, int start) {
+ return query(new GA4GHQueryInterval[] {
+ new GA4GHQueryInterval(sequence, start, 0,
+ GA4GHQueryInterval.ReadPositionConstraint.START_AT)});
+ }
+
+ @Override
+ public SAMRecord queryMate(SAMRecord rec) {
+ if (!rec.getReadPairedFlag()) {
+ throw new IllegalArgumentException("queryMate called for unpaired read.");
+ }
+ if (rec.getFirstOfPairFlag() == rec.getSecondOfPairFlag()) {
+ throw new IllegalArgumentException("SAMRecord must be either first and second of pair, but not both.");
+ }
+ final boolean firstOfPair = rec.getFirstOfPairFlag();
+ final CloseableIterator it;
+ if (rec.getMateReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
+ it = queryUnmapped();
+ } else {
+ it = queryAlignmentStart(rec.getMateReferenceName(), rec.getMateAlignmentStart());
+ }
+ try {
+ SAMRecord mateRec = null;
+ while (it.hasNext()) {
+ final SAMRecord next = it.next();
+ if (!next.getReadPairedFlag()) {
+ if (rec.getReadName().equals(next.getReadName())) {
+ throw new SAMFormatException("Paired and unpaired reads with same name: " + rec.getReadName());
+ }
+ continue;
+ }
+ if (firstOfPair) {
+ if (next.getFirstOfPairFlag()) continue;
+ } else {
+ if (next.getSecondOfPairFlag()) continue;
+ }
+ if (rec.getReadName().equals(next.getReadName())) {
+ if (mateRec != null) {
+ throw new SAMFormatException("Multiple SAMRecord with read name " + rec.getReadName() +
+ " for " + (firstOfPair ? "second" : "first") + " end.");
+ }
+ mateRec = next;
+ }
+ }
+ return mateRec;
+ } finally {
+ it.close();
+ }
+ }
+
+ public SAMRecordIterator query(GA4GHQueryInterval[] intervals) {
+ iterator = new GA4GHSamRecordIterator(dataSource, url.getReadset(),
+ intervals);
+ return iterator();
+ }
+
+ @Override
+ public SAMRecordIterator iterator() {
+ return iterator;
+ }
+}
diff --git a/src/main/java/com/google/cloud/genomics/gatk/htsjdk/GA4GHSamRecordIterator.java b/src/main/java/com/google/cloud/genomics/gatk/htsjdk/GA4GHSamRecordIterator.java
new file mode 100644
index 0000000..2814b40
--- /dev/null
+++ b/src/main/java/com/google/cloud/genomics/gatk/htsjdk/GA4GHSamRecordIterator.java
@@ -0,0 +1,148 @@
+package com.google.cloud.genomics.gatk.htsjdk;
+
+import com.google.cloud.genomics.gatk.common.GenomicsApiDataSource;
+import com.google.cloud.genomics.gatk.common.ReadIteratorResource;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMFileHeader.SortOrder;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
+
+import java.util.Iterator;
+import java.util.logging.Logger;
+
+/**
+ * Wraps iterators provided from Genomics API and implements
+ * HTSJDK's SAMRecordIterator.
+ * Iterates over data returned from the API and when needed
+ * re-queries the API for more data.
+ * Since the API always return *overalpping* reads and SAMRecordIterator
+ * supports contained and start-at queries, this class filters reads
+ * returned from the API to make sure they conform to the requested intervals.
+ */
+public class GA4GHSamRecordIterator implements SAMRecordIterator{
+ private static final Logger LOG = Logger.getLogger(GA4GHSamRecordIterator.class.getName());
+
+ Iterator iterator;
+ GenomicsApiDataSource dataSource;
+ GA4GHQueryInterval[] intervals;
+ String readSetId;
+ int intervalIndex = -1;
+ boolean hasNext;
+ SAMRecord nextRead;
+ SAMFileHeader header;
+
+ public GA4GHSamRecordIterator(GenomicsApiDataSource dataSource,
+ String readSetId,
+ GA4GHQueryInterval[] intervals) {
+ this.dataSource = dataSource;
+ this.readSetId = readSetId;
+ this.intervals = intervals;
+ seekMatchingRead();
+ }
+
+ /** Returns true when we truly reached the end of all requested data */
+ boolean isAtEnd() {
+ return intervals == null || intervals.length == 0 ||
+ intervalIndex >= intervals.length;
+ }
+
+ /** Returns the current interval being processed or null if we have reached the end */
+ GA4GHQueryInterval currentInterval() {
+ if (isAtEnd()) {
+ return null;
+ }
+ return intervals[intervalIndex];
+ }
+
+ /** Re-queries the API for the next interval */
+ ReadIteratorResource queryNextInterval() {
+ if (!isAtEnd()) {
+ intervalIndex++;
+ }
+ if (isAtEnd()) {
+ return null;
+ }
+ return queryForInterval(currentInterval());
+ }
+
+ /** Queries the API for an interval and returns the iterator resource, or null if failed */
+ ReadIteratorResource queryForInterval(GA4GHQueryInterval interval) {
+ try {
+ return dataSource.getReadsFromGenomicsApi(readSetId, interval.getSequence(),
+ interval.getStart(), interval.getEnd());
+ } catch (Exception ex) {
+ LOG.warning("Error getting data for interval " + ex.toString());
+ }
+ return null;
+ }
+
+ /**
+ * Ensures next returned read will match the currently requested interval.
+ * Since the API always returns overlapping reads we might need to skip some
+ * reads if the interval asks for "included" or "starts at" types.
+ * Also deals with the case of iterator being at an end and needing to query
+ * for the next interval.
+ */
+ void seekMatchingRead() {
+ while (!isAtEnd()) {
+ if (iterator == null || !iterator.hasNext()) {
+ LOG.info("Getting next interval from the API");
+ // We have hit an end (or this is first time) so we need to go fish
+ // to the API.
+ ReadIteratorResource resource = queryNextInterval();
+ if (resource != null) {
+ LOG.info("Got next interval from the API");
+ header = resource.getSAMFileHeader();
+ iterator = resource.getSAMRecordIterable().iterator();
+ } else {
+ LOG.info("Failed to get next interval from the API");
+ header = null;
+ iterator = null;
+ }
+ } else {
+ nextRead = iterator.next();
+ if (currentInterval().matches(nextRead)) {
+ return; // Happy case, otherwise we keep spinning in the loop.
+ } else {
+ LOG.info("Skipping non matching read");
+ }
+ }
+ }
+ }
+
+
+ @Override
+ public void close() {
+ this.iterator = null;
+ this.dataSource = null;
+ this.intervalIndex = intervals.length;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return !isAtEnd();
+ }
+
+ @Override
+ public SAMRecord next() {
+ SAMRecord retVal = nextRead;
+ seekMatchingRead();
+ return retVal;
+ }
+
+ @Override
+ public void remove() {
+ // Not implemented
+ }
+
+ @Override
+ public SAMRecordIterator assertSorted(SortOrder sortOrder) {
+ // TODO(iliat): implement this properly. This code never checks anything.
+ return this;
+ }
+
+ public SAMFileHeader getFileHeader() {
+ return header;
+ }
+}
diff --git a/src/main/java/com/google/cloud/genomics/gatk/htsjdk/SamReaderExample.java b/src/main/java/com/google/cloud/genomics/gatk/htsjdk/SamReaderExample.java
new file mode 100644
index 0000000..579c059
--- /dev/null
+++ b/src/main/java/com/google/cloud/genomics/gatk/htsjdk/SamReaderExample.java
@@ -0,0 +1,42 @@
+package com.google.cloud.genomics.gatk.htsjdk;
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SamInputResource;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+
+/**
+ * Example of HTSJDK SamReader and SamReaderFactory class usage.
+ * Illustrates how to plug in a custom SamReaderFactory in order to consume
+ * data from ga4gh urls.
+ *
+ * To run this we need to specify the custom reader factory for HTSJDK and set
+ * client_secrets file path for GenomGenomics API:
+ * -Dsamjdk.custom_reader=https://www.googleapis.com/genomics,com.google.cloud.genomics.gatk.htsjdk.GA4GHReaderFactory
+ * -Dga4gh.client_secrets=
+ */
+public class SamReaderExample {
+ static String GA4GH_URL =
+ "https://www.googleapis.com/genomics/v1beta/readsets/CLqN8Z3sDRCwgrmdkOXjn_sB/*/";
+
+ public static void main(String[] args) {
+ try {
+ SamReaderFactory factory = SamReaderFactory.makeDefault();
+
+ // If it was a file, we would open like so:
+ // factory.open(new File("~/testdata/htsjdk/samtools/uncompressed.sam"));
+ // For API access we use SamInputResource constructed from a URL:
+ SamReader reader = factory.open(SamInputResource.of(new URL(GA4GH_URL)));
+
+ for (final SAMRecord samRecord : reader) {
+ System.err.println(samRecord);
+ }
+
+ } catch (MalformedURLException e) {
+ e.printStackTrace();
+ }
+ }
+}