From b972db53f7700c76553013d4bfa88a3d492115c3 Mon Sep 17 00:00:00 2001 From: Matthias Wahl Date: Mon, 17 Nov 2014 16:46:45 +0100 Subject: [PATCH] correctly handle empty clustered by values or values containing commas as elasticsearch misinteprets empty string or strings containing commas do not use these as routing for requests --- CHANGES.txt | 3 ++ .../io/crate/analyze/PrimaryKeyVisitor.java | 13 +++++- .../main/java/io/crate/planner/Planner.java | 2 +- .../action/bulk/BulkShardProcessor.java | 2 +- .../io/crate/analyze/BaseAnalyzerTest.java | 7 ++++ .../io/crate/analyze/SelectAnalyzerTest.java | 16 ++++++++ .../WherePKIntegrationTest.java | 41 ++++++++++++++++++- 7 files changed, 80 insertions(+), 4 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 3a371c22e4cb..dd4b2c99cc62 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -5,6 +5,9 @@ Changes for Crate Unreleased ========== + - Fix: correctly handle empty values and values containing commas + for clustered by columns in the where clause. e.g: ``where _id=''`` + - Fix: Forbid using ``alter table add column`` on a single partition. - Fix: do not consider rootfs in ``sys.nodes.fs['data']['dev']`` diff --git a/sql/src/main/java/io/crate/analyze/PrimaryKeyVisitor.java b/sql/src/main/java/io/crate/analyze/PrimaryKeyVisitor.java index 94e3afa55c99..565074979961 100644 --- a/sql/src/main/java/io/crate/analyze/PrimaryKeyVisitor.java +++ b/sql/src/main/java/io/crate/analyze/PrimaryKeyVisitor.java @@ -330,7 +330,18 @@ private Set generateIntersection(Literal setLiteral, Literal right) { } private void setClusterBy(Context context, Literal right) { - if (context.currentBucket.clusteredBy == null) { + // the current ES implementation does not support empty string + // or comma as routing column value + // it even misinterprets values which contain a comma + // make sure we have no clustered by value in this case + // see: https://github.com/elasticsearch/elasticsearch/issues/6736 + Object rightValue = right.value(); + if (rightValue != null && rightValue instanceof BytesRef) { + String val = ((BytesRef) rightValue).utf8ToString(); + if ("".equals(val) || val.contains(",")) { + invalidate(context); + } + } else if (context.currentBucket.clusteredBy == null) { context.currentBucket.clusteredBy = right; } else if (!context.currentBucket.clusteredBy.equals(right)) { invalidate(context); diff --git a/sql/src/main/java/io/crate/planner/Planner.java b/sql/src/main/java/io/crate/planner/Planner.java index 54188ecbd34b..6f39d8f2b4cd 100644 --- a/sql/src/main/java/io/crate/planner/Planner.java +++ b/sql/src/main/java/io/crate/planner/Planner.java @@ -545,7 +545,7 @@ private void normalSelect(SelectAnalysis analysis, Plan plan, Context context) { CollectNode collectNode = PlanNodeBuilder.collect(analysis, toCollect, projections); plan.add(collectNode); - ImmutableList.Builder projectionBuilder = ImmutableList.builder(); + ImmutableList.Builder projectionBuilder = ImmutableList.builder(); if (!context.indexWriterProjection.isPresent() || analysis.isLimited()) { // limit set, apply topN projection diff --git a/sql/src/main/java/org/elasticsearch/action/bulk/BulkShardProcessor.java b/sql/src/main/java/org/elasticsearch/action/bulk/BulkShardProcessor.java index 02c9bb3de597..8e64e52e090a 100644 --- a/sql/src/main/java/org/elasticsearch/action/bulk/BulkShardProcessor.java +++ b/sql/src/main/java/org/elasticsearch/action/bulk/BulkShardProcessor.java @@ -131,7 +131,7 @@ public boolean add(String indexName, BytesReference source, String id, @Nullable return true; } - private void partitionRequestByShard(String indexName, BytesReference source, String id, String routing) { + private void partitionRequestByShard(String indexName, BytesReference source, String id, @Nullable String routing) { ShardId shardId = clusterService.operationRouting().indexShards( clusterService.state(), indexName, diff --git a/sql/src/test/java/io/crate/analyze/BaseAnalyzerTest.java b/sql/src/test/java/io/crate/analyze/BaseAnalyzerTest.java index a47f8f2a4046..10ab5ba43da2 100644 --- a/sql/src/test/java/io/crate/analyze/BaseAnalyzerTest.java +++ b/sql/src/test/java/io/crate/analyze/BaseAnalyzerTest.java @@ -196,6 +196,13 @@ DEEPLY_NESTED_TABLE_IDENT, RowGranularity.DOC, new Routing()) .add("loc", DataTypes.GEO_POINT, null) .build(); + static final TableInfo TEST_CLUSTER_BY_STRING_TABLE_INFO = TestingTableInfo.builder(new TableIdent(null, "bystring"), RowGranularity.DOC, shardRouting) + .add("name", DataTypes.STRING, null) + .add("score", DataTypes.DOUBLE, null) + .addPrimaryKey("name") + .clusteredBy("name") + .build(); + static final TableIdent TEST_BLOB_TABLE_IDENT = new TableIdent(null, "myblobs"); static final TableInfo TEST_BLOB_TABLE_TABLE_INFO = TestingTableInfo.builder(TEST_BLOB_TABLE_IDENT, RowGranularity.DOC, shardRouting) .build(); diff --git a/sql/src/test/java/io/crate/analyze/SelectAnalyzerTest.java b/sql/src/test/java/io/crate/analyze/SelectAnalyzerTest.java index 3adb0b61baad..d2a47fe85388 100644 --- a/sql/src/test/java/io/crate/analyze/SelectAnalyzerTest.java +++ b/sql/src/test/java/io/crate/analyze/SelectAnalyzerTest.java @@ -114,6 +114,8 @@ protected void bindSchemas() { .thenReturn(TEST_DOC_TRANSACTIONS_TABLE_INFO); when(schemaInfo.getTableInfo(TEST_DOC_LOCATIONS_TABLE_IDENT.name())) .thenReturn(TEST_DOC_LOCATIONS_TABLE_INFO); + when(schemaInfo.getTableInfo(TEST_CLUSTER_BY_STRING_TABLE_INFO.ident().name())) + .thenReturn(TEST_CLUSTER_BY_STRING_TABLE_INFO); schemaBinder.addBinding(DocSchemaInfo.NAME).toInstance(schemaInfo); } @@ -1999,4 +2001,18 @@ public void testOrderByOnAliasWithSameColumnNameInSchema() throws Exception { assert sortSymbols != null; assertThat(sortSymbols.get(0), isReference("other_id")); } + + @Test + public void testEmptyClusteredByValue() throws Exception { + SelectAnalysis analysis = analyze("select * from bystring where name = ''"); + assertThat(analysis.whereClause().clusteredBy().isPresent(), is(false)); + assertThat(analysis.ids().isEmpty(), is(true)); + } + + @Test + public void testClusteredByValueContainsComma() throws Exception { + SelectAnalysis analysis = analyze("select * from bystring where name = 'a,b,c'"); + assertThat(analysis.whereClause().clusteredBy().isPresent(), is(false)); + assertThat(analysis.ids().isEmpty(), is(true)); + } } diff --git a/sql/src/test/java/io/crate/integrationtests/WherePKIntegrationTest.java b/sql/src/test/java/io/crate/integrationtests/WherePKIntegrationTest.java index 0a77b0d773ac..d4431431b0b3 100644 --- a/sql/src/test/java/io/crate/integrationtests/WherePKIntegrationTest.java +++ b/sql/src/test/java/io/crate/integrationtests/WherePKIntegrationTest.java @@ -22,6 +22,7 @@ package io.crate.integrationtests; import io.crate.test.integration.CrateIntegrationTest; +import io.crate.testing.TestingHelpers; import org.elasticsearch.common.collect.MapBuilder; import org.hamcrest.Matchers; import org.junit.Test; @@ -30,7 +31,6 @@ import java.util.List; import static org.hamcrest.core.Is.is; -import static org.junit.Assert.assertThat; @CrateIntegrationTest.ClusterScope(scope = CrateIntegrationTest.Scope.GLOBAL) public class WherePKIntegrationTest extends SQLTransportIntegrationTest { @@ -118,6 +118,7 @@ public void testSelectNestedObjectWherePk() throws Exception { execute("insert into items (id, details) values (?, ?)", new Object[]{ "123", MapBuilder.newMapBuilder().put("tags", Arrays.asList("small", "blue")).map() }); + execute("refresh table items"); execute("select id, details['tags'] from items where id = '123'"); assertThat(response.rowCount(), is(1L)); @@ -125,5 +126,43 @@ public void testSelectNestedObjectWherePk() throws Exception { //noinspection unchecked assertThat((List) response.rows()[0][1], Matchers.contains("small", "blue")); } + + @Test + public void testEmptyClusteredByUnderId() throws Exception { + // regression test that empty routing executes correctly + execute("create table auto_id (" + + " name string," + + " location geo_point" + + ") with (number_of_replicas=0)"); + ensureGreen(); + + execute("insert into auto_id (name, location) values (',', [36.567, 52.998]), ('Dornbirn', [54.45, 4.567])"); + execute("refresh table auto_id"); + + + execute("select * from auto_id where _id=''"); + assertThat(response.cols(), is(Matchers.arrayContaining("location", "name"))); + assertThat(response.rowCount(), is(0L)); + } + + @Test + public void testEmptyClusteredByExplicit() throws Exception { + // regression test that empty routing executes correctly + execute("create table explicit_routing (" + + " name string," + + " location geo_point" + + ") clustered by (name) with (number_of_replicas=0)"); + ensureGreen(); + execute("insert into explicit_routing (name, location) values (',', [36.567, 52.998]), ('Dornbirn', [54.45, 4.567])"); + execute("refresh table explicit_routing"); + execute("select * from explicit_routing where name=''"); + assertThat(response.cols(), is(Matchers.arrayContaining("location", "name"))); + assertThat(response.rowCount(), is(0L)); + + execute("select * from explicit_routing where name=','"); + assertThat(response.cols(), is(Matchers.arrayContaining("location", "name"))); + assertThat(response.rowCount(), is(1L)); + assertThat(TestingHelpers.printedTable(response.rows()), is("[36.567, 52.998]| ,\n")); + } }