From c4acecdcb824e67534744294adbf1f34ff1f7439 Mon Sep 17 00:00:00 2001 From: Nicole Deflaux Date: Thu, 12 Mar 2015 20:09:04 -0700 Subject: [PATCH 1/3] Increase default number of bases per shard. This yields a reasonable number of tasks for sharded jobs running over the whole genome. --- src/main/java/com/google/cloud/genomics/utils/Contig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/google/cloud/genomics/utils/Contig.java b/src/main/java/com/google/cloud/genomics/utils/Contig.java index aa20f62..4891e90 100644 --- a/src/main/java/com/google/cloud/genomics/utils/Contig.java +++ b/src/main/java/com/google/cloud/genomics/utils/Contig.java @@ -40,7 +40,7 @@ public class Contig implements Serializable { private static final long serialVersionUID = -1730387112193404207L; - public static final long DEFAULT_NUMBER_OF_BASES_PER_SHARD = 100000; + public static final long DEFAULT_NUMBER_OF_BASES_PER_SHARD = 1000000; public enum SexChromosomeFilter { INCLUDE_XY, EXCLUDE_XY } From a5f96c263f2e09d7f7ac31f12f24a1c39cf05038 Mon Sep 17 00:00:00 2001 From: Nicole Deflaux Date: Thu, 12 Mar 2015 20:39:31 -0700 Subject: [PATCH 2/3] Provide a better error for client-side shard filtering. Now that we are doing client-side filtering for strict shard boundaries, we need to check that we are requesting the field that the filter will require and return a useful error if not. --- .../cloud/genomics/utils/Paginator.java | 77 ++++++++++++++++++- .../cloud/genomics/utils/PaginatorTest.java | 34 +++++++- 2 files changed, 106 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/google/cloud/genomics/utils/Paginator.java b/src/main/java/com/google/cloud/genomics/utils/Paginator.java index 20b6d07..ebd6e00 100644 --- a/src/main/java/com/google/cloud/genomics/utils/Paginator.java +++ b/src/main/java/com/google/cloud/genomics/utils/Paginator.java @@ -52,9 +52,11 @@ import com.google.api.services.genomics.model.VariantSet; import com.google.common.base.Function; import com.google.common.base.Optional; +import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.AbstractSequentialIterator; import com.google.common.collect.FluentIterable; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -319,6 +321,11 @@ public static class Reads extends Paginator< SearchReadsResponse, Read> { + private static ImmutableMap REQUIRED_STRICT_SHARD_FIELDS = + ImmutableMap.builder() + .put("alignment", ".*\\p{Punct}alignment\\p{Punct}.*") + .put("position", ".*\\p{Punct}position\\p{Punct}.*") + .build(); private final ShardBoundary shardBoundary; private Predicate shardPredicate = null; @@ -369,6 +376,26 @@ public boolean apply(Read read) { .or(request)); } + @Override + public GenomicsRequestInitializer> setFieldsInitializer(final String fields) { + return new GenomicsRequestInitializer>() { + @Override + public void initialize(GenomicsRequest search) { + if (null != fields) { + for (String requiredField : REQUIRED_STRICT_SHARD_FIELDS.keySet()) { + Preconditions + .checkArgument( + shardBoundary != ShardBoundary.STRICT + || fields.matches(REQUIRED_STRICT_SHARD_FIELDS.get(requiredField)), + "Required field missing: '%s' Add this field to the list of Read fields to be returned in the partial response.", + requiredField); + } + search.setFields(fields); + } + } + }; + } + @Override Genomics.Reads getApi(Genomics genomics) { return genomics.reads(); } @@ -396,6 +423,11 @@ public static class Annotations extends Paginator< SearchAnnotationsResponse, Annotation> { + private static ImmutableMap REQUIRED_STRICT_SHARD_FIELDS = + ImmutableMap.builder() + .put("position", ".*\\p{Punct}position\\p{Punct}.*") + .put("start", ".*\\p{Punct}start\\p{Punct}.*") + .build(); private final ShardBoundary shardBoundary; private Predicate shardPredicate = null; @@ -445,6 +477,26 @@ public boolean apply(Annotation anno) { .or(request)); } + @Override + public GenomicsRequestInitializer> setFieldsInitializer(final String fields) { + return new GenomicsRequestInitializer>() { + @Override + public void initialize(GenomicsRequest search) { + if (null != fields) { + for(String requiredField : REQUIRED_STRICT_SHARD_FIELDS.keySet()) { + Preconditions + .checkArgument( + shardBoundary != ShardBoundary.STRICT + || fields.matches(REQUIRED_STRICT_SHARD_FIELDS.get(requiredField)), + "Required field missing: '%s' Add this field to the list of Annotation fields to be returned in the partial response.", + requiredField); + } + search.setFields(fields); + } + } + }; + } + @Override Genomics.Annotations getApi(Genomics genomics) { return genomics.annotations(); } @@ -784,6 +836,9 @@ public static class Variants extends Paginator< SearchVariantsResponse, Variant> { + private static final String REQUIRED_STRICT_SHARD_FIELD = "start"; + private static final String REQUIRED_STRICT_SHARD_PATTERN = + ".*\\p{Punct}" + REQUIRED_STRICT_SHARD_FIELD +"\\p{Punct}.*"; private final ShardBoundary shardBoundary; private Predicate shardPredicate = null; @@ -834,6 +889,24 @@ public boolean apply(Variant variant) { .or(request)); } + @Override + public GenomicsRequestInitializer> setFieldsInitializer(final String fields) { + return new GenomicsRequestInitializer>() { + @Override + public void initialize(GenomicsRequest search) { + if (null != fields) { + Preconditions + .checkArgument( + shardBoundary != ShardBoundary.STRICT + || fields.matches(REQUIRED_STRICT_SHARD_PATTERN), + "Required field missing: '%s' Add this field to the list of Variant fields to be returned in the partial response.", + REQUIRED_STRICT_SHARD_FIELD); + search.setFields(fields); + } + } + }; + } + @Override Genomics.Variants getApi(Genomics genomics) { return genomics.variants(); } @@ -905,7 +978,7 @@ private Variantsets(Genomics genomics) { @Override public void initialize(GenomicsRequest search) {} }; - public static GenomicsRequestInitializer> setFieldsInitializer( + public GenomicsRequestInitializer> setFieldsInitializer( final String fields) { return new GenomicsRequestInitializer>() { @Override public void initialize(GenomicsRequest search) { @@ -915,7 +988,7 @@ public static GenomicsRequestInitializer> setFieldsInitialize } }; } - + private final Genomics genomics; public Paginator(Genomics genomics) { diff --git a/src/test/java/com/google/cloud/genomics/utils/PaginatorTest.java b/src/test/java/com/google/cloud/genomics/utils/PaginatorTest.java index 51b799e..62e5327 100644 --- a/src/test/java/com/google/cloud/genomics/utils/PaginatorTest.java +++ b/src/test/java/com/google/cloud/genomics/utils/PaginatorTest.java @@ -32,7 +32,9 @@ import org.hamcrest.CoreMatchers; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; import org.mockito.Mock; @@ -55,6 +57,9 @@ public class PaginatorTest { @Mock Genomics.Reads reads; @Mock Genomics.Reads.Search readsSearch; + @Rule + public ExpectedException thrown = ExpectedException.none(); + @Before public void initMocks() { MockitoAnnotations.initMocks(this); @@ -157,6 +162,15 @@ public void testVariantPagination() throws Exception { assertThat(filteredVariants, CoreMatchers.hasItems(atStartWithinExtent, atStartOverlapExtent, beyondStartWithinExtent, beyondOverlapExtent)); + // Ensure searches with fields verify the preconditions for strict shards. + final String nullFields = null; + assertNotNull(filteredPaginator.search(request, nullFields).iterator().next()); + assertNotNull(filteredPaginator.search(request, "nextPageToken,variants(start,id,calls(genotype,callSetName))").iterator().next()); + try { + filteredPaginator.search(request, "nextPageToken,variants(id,calls(genotype,callSetName))").iterator().next(); + fail("should have thrown an IllegalArgumentxception"); + } catch (IllegalArgumentException e) {} + Paginator.Variants overlappingPaginator = Paginator.Variants.create(genomics, ShardBoundary.OVERLAPS); List overlappingVariants = Lists.newArrayList(); for (Variant variant : overlappingPaginator.search(request)) { @@ -164,8 +178,13 @@ public void testVariantPagination() throws Exception { } assertEquals(6, overlappingVariants.size()); assertThat(overlappingVariants, CoreMatchers.hasItems(input)); + + // There are no preconditions on fields for overlapping shards. + assertNotNull(overlappingPaginator.search(request, nullFields).iterator().next()); + assertNotNull(overlappingPaginator.search(request, "nextPageToken,variants(start,id,calls(genotype,callSetName))").iterator().next()); + assertNotNull(overlappingPaginator.search(request, "nextPageToken,variants(id,calls(genotype,callSetName))").iterator().next()); } - + @Test public void testVariantPaginationEmptyShard() throws Exception { @@ -178,8 +197,8 @@ public void testVariantPaginationEmptyShard() throws Exception { Paginator.Variants filteredPaginator = Paginator.Variants.create(genomics, ShardBoundary.STRICT); assertNotNull(filteredPaginator.search(request)); } - - Read readHelper(int start, int end) { + + static Read readHelper(int start, int end) { Position position = new Position().setPosition((long) start); LinearAlignment alignment = new LinearAlignment().setPosition(position); return new Read().setAlignment(alignment).setFragmentLength(end-start); @@ -221,4 +240,13 @@ public void testReadPagination() throws Exception { assertThat(overlappingReads, CoreMatchers.hasItems(input)); } + @Test + public void testReadPaginationStrictShardPrecondition() throws Exception { + SearchReadsRequest request = new SearchReadsRequest().setStart(1000L).setEnd(2000L); + Mockito.when(reads.search(request)).thenReturn(readsSearch); + + Paginator.Reads filteredPaginator = Paginator.Reads.create(genomics, ShardBoundary.STRICT); + thrown.expect(IllegalArgumentException.class); + filteredPaginator.search(request, "nextPageToken,reads(id,alignment(cigar))").iterator().next(); + } } From 00c633ec6b38c559797d2b6cc618de2758517e9b Mon Sep 17 00:00:00 2001 From: Nicole Deflaux Date: Fri, 20 Mar 2015 17:33:26 -0700 Subject: [PATCH 3/3] Refactored field enforcement. --- .../cloud/genomics/utils/Paginator.java | 131 +++++++++--------- .../cloud/genomics/utils/PaginatorTest.java | 42 +++++- 2 files changed, 107 insertions(+), 66 deletions(-) diff --git a/src/main/java/com/google/cloud/genomics/utils/Paginator.java b/src/main/java/com/google/cloud/genomics/utils/Paginator.java index ebd6e00..11ea559 100644 --- a/src/main/java/com/google/cloud/genomics/utils/Paginator.java +++ b/src/main/java/com/google/cloud/genomics/utils/Paginator.java @@ -13,6 +13,12 @@ */ package com.google.cloud.genomics.utils; +import java.io.IOException; +import java.io.Serializable; +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; + import com.google.api.services.genomics.Genomics; import com.google.api.services.genomics.GenomicsRequest; import com.google.api.services.genomics.model.Annotation; @@ -51,6 +57,7 @@ import com.google.api.services.genomics.model.Variant; import com.google.api.services.genomics.model.VariantSet; import com.google.common.base.Function; +import com.google.common.base.Joiner; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; @@ -59,11 +66,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; - -import java.io.IOException; -import java.io.Serializable; -import java.util.Collections; -import java.util.Iterator; +import com.google.common.collect.Maps; /** * An abstraction that understands the {@code pageToken} / {@code nextPageToken} protocol for paging @@ -321,8 +324,10 @@ public static class Reads extends Paginator< SearchReadsResponse, Read> { + // TODO: When this is supported server-side, these additional fields are no longer required private static ImmutableMap REQUIRED_STRICT_SHARD_FIELDS = ImmutableMap.builder() + .putAll(REQUIRED_FIELDS) .put("alignment", ".*\\p{Punct}alignment\\p{Punct}.*") .put("position", ".*\\p{Punct}position\\p{Punct}.*") .build(); @@ -378,22 +383,10 @@ public boolean apply(Read read) { @Override public GenomicsRequestInitializer> setFieldsInitializer(final String fields) { - return new GenomicsRequestInitializer>() { - @Override - public void initialize(GenomicsRequest search) { - if (null != fields) { - for (String requiredField : REQUIRED_STRICT_SHARD_FIELDS.keySet()) { - Preconditions - .checkArgument( - shardBoundary != ShardBoundary.STRICT - || fields.matches(REQUIRED_STRICT_SHARD_FIELDS.get(requiredField)), - "Required field missing: '%s' Add this field to the list of Read fields to be returned in the partial response.", - requiredField); - } - search.setFields(fields); - } - } - }; + if(shardBoundary == ShardBoundary.STRICT) { + return new GenomicsSearchFieldRequestInitializer(fields, REQUIRED_STRICT_SHARD_FIELDS); + } + return new GenomicsSearchFieldRequestInitializer(fields, REQUIRED_FIELDS); } @Override Genomics.Reads getApi(Genomics genomics) { @@ -423,8 +416,10 @@ public static class Annotations extends Paginator< SearchAnnotationsResponse, Annotation> { + // TODO: When this is supported server-side, these additional fields are no longer required private static ImmutableMap REQUIRED_STRICT_SHARD_FIELDS = ImmutableMap.builder() + .putAll(REQUIRED_FIELDS) .put("position", ".*\\p{Punct}position\\p{Punct}.*") .put("start", ".*\\p{Punct}start\\p{Punct}.*") .build(); @@ -479,22 +474,10 @@ public boolean apply(Annotation anno) { @Override public GenomicsRequestInitializer> setFieldsInitializer(final String fields) { - return new GenomicsRequestInitializer>() { - @Override - public void initialize(GenomicsRequest search) { - if (null != fields) { - for(String requiredField : REQUIRED_STRICT_SHARD_FIELDS.keySet()) { - Preconditions - .checkArgument( - shardBoundary != ShardBoundary.STRICT - || fields.matches(REQUIRED_STRICT_SHARD_FIELDS.get(requiredField)), - "Required field missing: '%s' Add this field to the list of Annotation fields to be returned in the partial response.", - requiredField); - } - search.setFields(fields); - } - } - }; + if(shardBoundary == ShardBoundary.STRICT) { + return new GenomicsSearchFieldRequestInitializer(fields, REQUIRED_STRICT_SHARD_FIELDS); + } + return new GenomicsSearchFieldRequestInitializer(fields, REQUIRED_FIELDS); } @Override Genomics.Annotations getApi(Genomics genomics) { @@ -836,9 +819,12 @@ public static class Variants extends Paginator< SearchVariantsResponse, Variant> { - private static final String REQUIRED_STRICT_SHARD_FIELD = "start"; - private static final String REQUIRED_STRICT_SHARD_PATTERN = - ".*\\p{Punct}" + REQUIRED_STRICT_SHARD_FIELD +"\\p{Punct}.*"; + // TODO: When this is supported server-side, these additional fields are no longer required + private static ImmutableMap REQUIRED_STRICT_SHARD_FIELDS = + ImmutableMap.builder() + .putAll(REQUIRED_FIELDS) + .put("start", ".*\\p{Punct}start\\p{Punct}.*") + .build(); private final ShardBoundary shardBoundary; private Predicate shardPredicate = null; @@ -891,20 +877,10 @@ public boolean apply(Variant variant) { @Override public GenomicsRequestInitializer> setFieldsInitializer(final String fields) { - return new GenomicsRequestInitializer>() { - @Override - public void initialize(GenomicsRequest search) { - if (null != fields) { - Preconditions - .checkArgument( - shardBoundary != ShardBoundary.STRICT - || fields.matches(REQUIRED_STRICT_SHARD_PATTERN), - "Required field missing: '%s' Add this field to the list of Variant fields to be returned in the partial response.", - REQUIRED_STRICT_SHARD_FIELD); - search.setFields(fields); - } - } - }; + if(shardBoundary == ShardBoundary.STRICT) { + return new GenomicsSearchFieldRequestInitializer(fields, REQUIRED_STRICT_SHARD_FIELDS); + } + return new GenomicsSearchFieldRequestInitializer(fields, REQUIRED_FIELDS); } @Override Genomics.Variants getApi(Genomics genomics) { @@ -978,15 +954,44 @@ private Variantsets(Genomics genomics) { @Override public void initialize(GenomicsRequest search) {} }; - public GenomicsRequestInitializer> setFieldsInitializer( - final String fields) { - return new GenomicsRequestInitializer>() { - @Override public void initialize(GenomicsRequest search) { - if (null != fields) { - search.setFields(fields); - } - } - }; + protected static final ImmutableMap REQUIRED_FIELDS = ImmutableMap + .builder() + .put("nextPageToken", "(.+\\p{Punct})?nextPageToken(\\p{Punct}.+)?"). + build(); + + public GenomicsRequestInitializer> setFieldsInitializer(final String fields) { + return new GenomicsSearchFieldRequestInitializer(fields, REQUIRED_FIELDS); + } + + public static class GenomicsSearchFieldRequestInitializer implements + GenomicsRequestInitializer> { + private final String fields; + private final ImmutableMap requiredFields; + + public GenomicsSearchFieldRequestInitializer(String fields, + ImmutableMap requiredFields) { + this.fields = fields; + this.requiredFields = requiredFields; + } + + @Override + public void initialize(GenomicsRequest search) { + if (null != fields) { + Map missingFields = + Maps.filterValues(requiredFields, new Predicate() { + @Override + public boolean apply(String input) { + return !fields.matches(input); + } + }); + Preconditions + .checkArgument( + missingFields.isEmpty(), + "Required field(s) missing: '%s' Add this field to the list of fields to be returned in the partial response.", + Joiner.on(",").join(missingFields.keySet())); + search.setFields(fields); + } + } } private final Genomics genomics; diff --git a/src/test/java/com/google/cloud/genomics/utils/PaginatorTest.java b/src/test/java/com/google/cloud/genomics/utils/PaginatorTest.java index 62e5327..bc6a2fd 100644 --- a/src/test/java/com/google/cloud/genomics/utils/PaginatorTest.java +++ b/src/test/java/com/google/cloud/genomics/utils/PaginatorTest.java @@ -32,6 +32,7 @@ import org.hamcrest.CoreMatchers; import org.junit.Before; +import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -125,16 +126,26 @@ public void testFields() throws Exception { Paginator.ReadGroupSets paginator = Paginator.ReadGroupSets.create(genomics); List ids = Lists.newArrayList(); for (ReadGroupSet set : paginator.search( - new SearchReadGroupSetsRequest().setName("HG"), "readGroupSets(id,name)")) { + new SearchReadGroupSetsRequest().setName("HG"), "nextPageToken,readGroupSets(id,name)")) { ids.add(set.getId()); } assertEquals(Lists.newArrayList("r1"), ids); // Make sure the fields parameter actually gets passed along - Mockito.verify(readGroupSetSearch, Mockito.atLeastOnce()).setFields("readGroupSets(id,name)"); + Mockito.verify(readGroupSetSearch, Mockito.atLeastOnce()).setFields("nextPageToken,readGroupSets(id,name)"); } + @Test + public void testFieldsMissingNextPageToken() throws Exception { + Mockito.when(readGroupSets.search(new SearchReadGroupSetsRequest().setName("HG"))) + .thenReturn(readGroupSetSearch); + + Paginator.ReadGroupSets paginator = Paginator.ReadGroupSets.create(genomics); + thrown.expect(IllegalArgumentException.class); + paginator.search(new SearchReadGroupSetsRequest().setName("HG"), "readGroupSets(id,name)").iterator().next(); + } + @Test public void testVariantPagination() throws Exception { @@ -166,6 +177,8 @@ public void testVariantPagination() throws Exception { final String nullFields = null; assertNotNull(filteredPaginator.search(request, nullFields).iterator().next()); assertNotNull(filteredPaginator.search(request, "nextPageToken,variants(start,id,calls(genotype,callSetName))").iterator().next()); + assertNotNull(filteredPaginator.search(request, "id,nextPageToken,variants(start,id,calls(genotype,callSetName))").iterator().next()); + assertNotNull(filteredPaginator.search(request, "variants(start,id,calls(genotype,callSetName)),nextPageToken").iterator().next()); try { filteredPaginator.search(request, "nextPageToken,variants(id,calls(genotype,callSetName))").iterator().next(); fail("should have thrown an IllegalArgumentxception"); @@ -179,10 +192,12 @@ public void testVariantPagination() throws Exception { assertEquals(6, overlappingVariants.size()); assertThat(overlappingVariants, CoreMatchers.hasItems(input)); - // There are no preconditions on fields for overlapping shards. + // Ensure searches with fields verify the preconditions for overlapping shards. assertNotNull(overlappingPaginator.search(request, nullFields).iterator().next()); assertNotNull(overlappingPaginator.search(request, "nextPageToken,variants(start,id,calls(genotype,callSetName))").iterator().next()); assertNotNull(overlappingPaginator.search(request, "nextPageToken,variants(id,calls(genotype,callSetName))").iterator().next()); + assertNotNull(overlappingPaginator.search(request, "id,nextPageToken,variants(start,id,calls(genotype,callSetName))").iterator().next()); + assertNotNull(overlappingPaginator.search(request, "variants(id,calls(genotype,callSetName)),nextPageToken").iterator().next()); } @Test @@ -249,4 +264,25 @@ public void testReadPaginationStrictShardPrecondition() throws Exception { thrown.expect(IllegalArgumentException.class); filteredPaginator.search(request, "nextPageToken,reads(id,alignment(cigar))").iterator().next(); } + + @Test + public void testStrictReadPaginationNextPageTokenPrecondition() throws Exception { + SearchReadsRequest request = new SearchReadsRequest().setStart(1000L).setEnd(2000L); + Mockito.when(reads.search(request)).thenReturn(readsSearch); + + Paginator.Reads filteredPaginator = Paginator.Reads.create(genomics, ShardBoundary.STRICT); + thrown.expect(IllegalArgumentException.class); + filteredPaginator.search(request, "reads(id,alignment(cigar,position))").iterator().next(); + } + + @Test + public void testOverlappingReadPaginationNextPageTokenPrecondition() throws Exception { + SearchReadsRequest request = new SearchReadsRequest().setStart(1000L).setEnd(2000L); + Mockito.when(reads.search(request)).thenReturn(readsSearch); + + Paginator.Reads overlappingPaginator = Paginator.Reads.create(genomics, ShardBoundary.OVERLAPS); + thrown.expect(IllegalArgumentException.class); + overlappingPaginator.search(request, "reads(id,alignment(cigar,position))").iterator().next(); + } + }