From 2c0bb9199ad81ff6817703409d035e40fa1432aa Mon Sep 17 00:00:00 2001 From: kimchy Date: Thu, 28 Apr 2011 17:20:31 +0300 Subject: [PATCH] Search: Optimize (perf) execution of global facets, closes #889. --- .../QueryFilterFacetSearchBenchmark.java | 153 ++++++++++++++++++ .../search/facet/AbstractFacetCollector.java | 4 + .../search/facet/FacetPhase.java | 61 +++++-- .../facet/OptimizeGlobalFacetCollector.java | 29 ++++ .../facet/filter/FilterFacetCollector.java | 19 ++- .../facet/query/QueryFacetCollector.java | 20 ++- .../search/internal/ContextIndexSearcher.java | 4 + .../search/facet/SimpleFacetsTests.java | 54 +++++++ 8 files changed, 325 insertions(+), 19 deletions(-) create mode 100644 modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/QueryFilterFacetSearchBenchmark.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/OptimizeGlobalFacetCollector.java diff --git a/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/QueryFilterFacetSearchBenchmark.java b/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/QueryFilterFacetSearchBenchmark.java new file mode 100644 index 0000000000000..892f28bd6ab64 --- /dev/null +++ b/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/QueryFilterFacetSearchBenchmark.java @@ -0,0 +1,153 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.benchmark.search.facet; + +import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.SearchType; +import org.elasticsearch.client.Client; +import org.elasticsearch.client.Requests; +import org.elasticsearch.client.action.bulk.BulkRequestBuilder; +import org.elasticsearch.common.StopWatch; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.SizeValue; +import org.elasticsearch.common.util.concurrent.jsr166y.ThreadLocalRandom; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.node.Node; +import org.elasticsearch.search.facet.FacetBuilders; + +import static org.elasticsearch.client.Requests.*; +import static org.elasticsearch.cluster.metadata.IndexMetaData.*; +import static org.elasticsearch.common.settings.ImmutableSettings.*; +import static org.elasticsearch.common.xcontent.XContentFactory.*; +import static org.elasticsearch.index.query.xcontent.QueryBuilders.*; +import static org.elasticsearch.node.NodeBuilder.*; + +public class QueryFilterFacetSearchBenchmark { + + static long COUNT = SizeValue.parseSizeValue("1m").singles(); + static int BATCH = 100; + static int QUERY_COUNT = 200; + static int NUMBER_OF_TERMS = 200; + + static Client client; + + public static void main(String[] args) throws Exception { + Settings settings = settingsBuilder() + .put("index.refresh_interval", "-1") + .put("gateway.type", "local") + .put(SETTING_NUMBER_OF_SHARDS, 2) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .build(); + + Node node1 = nodeBuilder().settings(settingsBuilder().put(settings).put("name", "node1")).node(); + Node node2 = nodeBuilder().settings(settingsBuilder().put(settings).put("name", "node2")).node(); + + Node clientNode = nodeBuilder().settings(settingsBuilder().put(settings).put("name", "client")).client(true).node(); + + client = clientNode.client(); + + long[] lValues = new long[NUMBER_OF_TERMS]; + for (int i = 0; i < NUMBER_OF_TERMS; i++) { + lValues[i] = ThreadLocalRandom.current().nextLong(); + } + + Thread.sleep(10000); + try { + client.admin().indices().create(createIndexRequest("test")).actionGet(); + + StopWatch stopWatch = new StopWatch().start(); + + System.out.println("--> Indexing [" + COUNT + "] ..."); + long ITERS = COUNT / BATCH; + long i = 1; + int counter = 0; + for (; i <= ITERS; i++) { + BulkRequestBuilder request = client.prepareBulk(); + for (int j = 0; j < BATCH; j++) { + counter++; + + XContentBuilder builder = jsonBuilder().startObject(); + builder.field("id", Integer.toString(counter)); + builder.field("l_value", lValues[counter % lValues.length]); + + builder.endObject(); + + request.add(Requests.indexRequest("test").type("type1").id(Integer.toString(counter)) + .source(builder)); + } + BulkResponse response = request.execute().actionGet(); + if (response.hasFailures()) { + System.err.println("--> failures..."); + } + if (((i * BATCH) % 10000) == 0) { + System.out.println("--> Indexed " + (i * BATCH) + " took " + stopWatch.stop().lastTaskTime()); + stopWatch.start(); + } + } + System.out.println("--> Indexing took " + stopWatch.totalTime() + ", TPS " + (((double) (COUNT)) / stopWatch.totalTime().secondsFrac())); + } catch (Exception e) { + System.out.println("--> Index already exists, ignoring indexing phase, waiting for green"); + ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("10m").execute().actionGet(); + if (clusterHealthResponse.timedOut()) { + System.err.println("--> Timed out waiting for cluster health"); + } + } + client.admin().indices().prepareRefresh().execute().actionGet(); + COUNT = client.prepareCount().setQuery(matchAllQuery()).execute().actionGet().count(); + System.out.println("--> Number of docs in index: " + COUNT); + + + long totalQueryTime = 0; + + totalQueryTime = 0; + for (int j = 0; j < QUERY_COUNT; j++) { + SearchResponse searchResponse = client.prepareSearch() + .setSearchType(SearchType.COUNT) + .setQuery(termQuery("l_value", lValues[0])) + .execute().actionGet(); + totalQueryTime += searchResponse.tookInMillis(); + } + System.out.println("--> Simple Query on first l_value " + (totalQueryTime / QUERY_COUNT) + "ms"); + + totalQueryTime = 0; + for (int j = 0; j < QUERY_COUNT; j++) { + SearchResponse searchResponse = client.prepareSearch() + .setSearchType(SearchType.COUNT) + .setQuery(termQuery("l_value", lValues[0])) + .addFacet(FacetBuilders.queryFacet("query").query(termQuery("l_value", lValues[0]))) + .execute().actionGet(); + totalQueryTime += searchResponse.tookInMillis(); + } + System.out.println("--> Query facet first l_value " + (totalQueryTime / QUERY_COUNT) + "ms"); + + totalQueryTime = 0; + for (int j = 0; j < QUERY_COUNT; j++) { + SearchResponse searchResponse = client.prepareSearch() + .setSearchType(SearchType.COUNT) + .setQuery(termQuery("l_value", lValues[0])) + .addFacet(FacetBuilders.queryFacet("query").query(termQuery("l_value", lValues[0])).global(true)) + .execute().actionGet(); + totalQueryTime += searchResponse.tookInMillis(); + } + System.out.println("--> Query facet first l_value (global) " + (totalQueryTime / QUERY_COUNT) + "ms"); + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/AbstractFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/AbstractFacetCollector.java index 5431bd70c6ed9..271f493ccaba4 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/AbstractFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/AbstractFacetCollector.java @@ -44,6 +44,10 @@ public AbstractFacetCollector(String facetName) { this.facetName = facetName; } + public Filter getFilter() { + return this.filter; + } + @Override public void setFilter(Filter filter) { if (this.filter == null) { this.filter = filter; diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/FacetPhase.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/FacetPhase.java index 9ca4424b788fb..0380c520aa7ac 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/FacetPhase.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/FacetPhase.java @@ -19,13 +19,13 @@ package org.elasticsearch.search.facet; -import org.apache.lucene.search.FilteredQuery; -import org.apache.lucene.search.Query; +import org.apache.lucene.search.*; import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.collect.ImmutableMap; import org.elasticsearch.common.collect.Lists; +import org.elasticsearch.common.collect.Maps; import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.lucene.search.NoopCollector; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.SearchPhase; @@ -67,22 +67,51 @@ public class FacetPhase implements SearchPhase { return; } - // run global facets ... - if (context.searcher().hasCollectors(ContextIndexSearcher.Scopes.GLOBAL)) { - Query query = Queries.MATCH_ALL_QUERY; - if (context.types().length > 0) { - query = new FilteredQuery(query, context.filterCache().cache(context.mapperService().typesFilter(context.types()))); - } + // optimize global facet execution, based on filters (don't iterate over all docs), and check + // if we have special facets that can be optimized for all execution, do it + List collectors = context.searcher().removeCollectors(ContextIndexSearcher.Scopes.GLOBAL); - context.searcher().processingScope(ContextIndexSearcher.Scopes.GLOBAL); - try { - context.searcher().search(query, NoopCollector.NOOP_COLLECTOR); - } catch (IOException e) { - throw new QueryPhaseExecutionException(context, "Failed to execute global facets", e); - } finally { - context.searcher().processedScope(); + if (collectors != null && !collectors.isEmpty()) { + Map> filtersByCollector = Maps.newHashMap(); + for (Collector collector : collectors) { + if (collector instanceof OptimizeGlobalFacetCollector) { + try { + ((OptimizeGlobalFacetCollector) collector).optimizedGlobalExecution(context); + } catch (IOException e) { + throw new QueryPhaseExecutionException(context, "Failed to execute global facets", e); + } + } else { + Filter filter = Queries.MATCH_ALL_FILTER; + if (collector instanceof AbstractFacetCollector) { + AbstractFacetCollector facetCollector = (AbstractFacetCollector) collector; + if (facetCollector.getFilter() != null) { + filter = facetCollector.getFilter(); + } + } + List list = filtersByCollector.get(filter); + if (list == null) { + list = ImmutableList.of(collector); + filtersByCollector.put(filter, list); + } else { + list.add(collector); + } + } + } + // now, go and execute the filters->collector ones + for (Map.Entry> entry : filtersByCollector.entrySet()) { + Filter filter = entry.getKey(); + Query query = new DeletionAwareConstantScoreQuery(filter); + if (context.types().length > 0) { + query = new FilteredQuery(query, context.filterCache().cache(context.mapperService().typesFilter(context.types()))); + } + try { + context.searcher().search(query, MultiCollector.wrap(entry.getValue().toArray(new Collector[entry.getValue().size()]))); + } catch (IOException e) { + throw new QueryPhaseExecutionException(context, "Failed to execute global facets", e); + } } } + SearchContextFacets contextFacets = context.facets(); List facets = Lists.newArrayListWithCapacity(2); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/OptimizeGlobalFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/OptimizeGlobalFacetCollector.java new file mode 100644 index 0000000000000..ff95fa3a98f5d --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/OptimizeGlobalFacetCollector.java @@ -0,0 +1,29 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet; + +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; + +public interface OptimizeGlobalFacetCollector { + + void optimizedGlobalExecution(SearchContext searchContext) throws IOException; +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/filter/FilterFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/filter/FilterFacetCollector.java index 4e5a738cc0527..c4b257658bccc 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/filter/FilterFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/filter/FilterFacetCollector.java @@ -20,19 +20,21 @@ package org.elasticsearch.search.facet.filter; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.Filter; +import org.apache.lucene.search.*; import org.elasticsearch.common.lucene.docset.DocSet; import org.elasticsearch.common.lucene.docset.DocSets; import org.elasticsearch.index.cache.filter.FilterCache; import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.OptimizeGlobalFacetCollector; +import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; /** * @author kimchy (shay.banon) */ -public class FilterFacetCollector extends AbstractFacetCollector { +public class FilterFacetCollector extends AbstractFacetCollector implements OptimizeGlobalFacetCollector { private final Filter filter; @@ -45,6 +47,19 @@ public FilterFacetCollector(String facetName, Filter filter, FilterCache filterC this.filter = filter; } + @Override public void optimizedGlobalExecution(SearchContext searchContext) throws IOException { + Query query = new DeletionAwareConstantScoreQuery(filter); + if (super.filter != null) { + query = new FilteredQuery(query, super.filter); + } + if (searchContext.types().length > 0) { + query = new FilteredQuery(query, searchContext.filterCache().cache(searchContext.mapperService().typesFilter(searchContext.types()))); + } + TotalHitCountCollector collector = new TotalHitCountCollector(); + searchContext.searcher().search(query, collector); + count = collector.getTotalHits(); + } + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { docSet = DocSets.convert(reader, filter.getDocIdSet(reader)); } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/query/QueryFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/query/QueryFacetCollector.java index 2407595264da0..e71aa89c2348b 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/query/QueryFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/query/QueryFacetCollector.java @@ -27,13 +27,17 @@ import org.elasticsearch.index.cache.filter.FilterCache; import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.OptimizeGlobalFacetCollector; +import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; /** * @author kimchy (shay.banon) */ -public class QueryFacetCollector extends AbstractFacetCollector { +public class QueryFacetCollector extends AbstractFacetCollector implements OptimizeGlobalFacetCollector { + + private final Query query; private final Filter filter; @@ -43,6 +47,7 @@ public class QueryFacetCollector extends AbstractFacetCollector { public QueryFacetCollector(String facetName, Query query, FilterCache filterCache) { super(facetName); + this.query = query; Filter possibleFilter = extractFilterIfApplicable(query); if (possibleFilter != null) { this.filter = possibleFilter; @@ -61,6 +66,19 @@ public QueryFacetCollector(String facetName, Query query, FilterCache filterCach } } + @Override public void optimizedGlobalExecution(SearchContext searchContext) throws IOException { + Query query = this.query; + if (super.filter != null) { + query = new FilteredQuery(query, super.filter); + } + if (searchContext.types().length > 0) { + query = new FilteredQuery(query, searchContext.filterCache().cache(searchContext.mapperService().typesFilter(searchContext.types()))); + } + TotalHitCountCollector collector = new TotalHitCountCollector(); + searchContext.searcher().search(query, collector); + count = collector.getTotalHits(); + } + @Override public Facet facet() { return new InternalQueryFacet(facetName, count); } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java index d153fec6fab00..92c68b785497b 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java @@ -77,6 +77,10 @@ public void addCollector(String scope, Collector collector) { collectors.add(collector); } + public List removeCollectors(String scope) { + return scopeCollectors.remove(scope); + } + public boolean hasCollectors(String scope) { if (scopeCollectors == null) { return false; diff --git a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java index db5e10708d462..c2bdc179bb7cb 100644 --- a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java +++ b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java @@ -32,6 +32,7 @@ import org.elasticsearch.search.facet.datehistogram.DateHistogramFacet; import org.elasticsearch.search.facet.filter.FilterFacet; import org.elasticsearch.search.facet.histogram.HistogramFacet; +import org.elasticsearch.search.facet.query.QueryFacet; import org.elasticsearch.search.facet.range.RangeFacet; import org.elasticsearch.search.facet.statistical.StatisticalFacet; import org.elasticsearch.search.facet.terms.TermsFacet; @@ -566,6 +567,18 @@ private void testTermsFacets(String executionHint) throws Exception { assertThat(facet.entries().get(0).term(), equalTo("111")); assertThat(facet.entries().get(0).count(), equalTo(1)); + // now with global + searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addFacet(termsFacet("facet1").field("stag").size(10).facetFilter(termFilter("tag", "xxx")).global(true).executionHint(executionHint)) + .execute().actionGet(); + + facet = searchResponse.facets().facet("facet1"); + assertThat(facet.name(), equalTo("facet1")); + assertThat(facet.entries().size(), equalTo(1)); + assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).count(), equalTo(1)); + // Test Facet Filter (with a type) searchResponse = client.prepareSearch() @@ -1722,6 +1735,47 @@ private void testTermsFacets(String executionHint) throws Exception { } } + @Test public void testQueryFacet() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + client.admin().indices().prepareCreate("test").execute().actionGet(); + client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); + + for (int i = 0; i < 20; i++) { + client.prepareIndex("test", "type1", Integer.toString(i)).setSource("num", i % 10).execute().actionGet(); + } + client.admin().indices().prepareRefresh().execute().actionGet(); + + for (int i = 0; i < numberOfRuns(); i++) { + SearchResponse searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addFacet(queryFacet("query").query(termQuery("num", 1))) + .execute().actionGet(); + + QueryFacet facet = searchResponse.facets().facet("query"); + assertThat(facet.count(), equalTo(2l)); + + searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addFacet(queryFacet("query").query(termQuery("num", 1)).global(true)) + .execute().actionGet(); + + facet = searchResponse.facets().facet("query"); + assertThat(facet.count(), equalTo(2l)); + + searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addFacet(queryFacet("query").query(termsQuery("num", new long[]{1, 2})).facetFilter(termFilter("num", 1)).global(true)) + .execute().actionGet(); + + facet = searchResponse.facets().facet("query"); + assertThat(facet.count(), equalTo(2l)); + } + } + private long utcTimeInMillis(String time) { return timeInMillis(time, DateTimeZone.UTC); }