Skip to content

Commit

Permalink
correctly handle empty clustered by values or values containing commas
Browse files Browse the repository at this point in the history
as elasticsearch misinteprets empty string or strings containing commas
do not use these as routing for requests
  • Loading branch information
msbt committed Nov 17, 2014
1 parent 31e48f7 commit b972db5
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 4 deletions.
3 changes: 3 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ Changes for Crate
Unreleased
==========

- Fix: correctly handle empty values and values containing commas
for clustered by columns in the where clause. e.g: ``where _id=''``

- Fix: Forbid using ``alter table add column`` on a single partition.

- Fix: do not consider rootfs in ``sys.nodes.fs['data']['dev']``
Expand Down
13 changes: 12 additions & 1 deletion sql/src/main/java/io/crate/analyze/PrimaryKeyVisitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,18 @@ private Set generateIntersection(Literal setLiteral, Literal right) {
}

private void setClusterBy(Context context, Literal right) {
if (context.currentBucket.clusteredBy == null) {
// the current ES implementation does not support empty string
// or comma as routing column value
// it even misinterprets values which contain a comma
// make sure we have no clustered by value in this case
// see: https://github.com/elasticsearch/elasticsearch/issues/6736
Object rightValue = right.value();
if (rightValue != null && rightValue instanceof BytesRef) {
String val = ((BytesRef) rightValue).utf8ToString();
if ("".equals(val) || val.contains(",")) {
invalidate(context);
}
} else if (context.currentBucket.clusteredBy == null) {
context.currentBucket.clusteredBy = right;
} else if (!context.currentBucket.clusteredBy.equals(right)) {
invalidate(context);
Expand Down
2 changes: 1 addition & 1 deletion sql/src/main/java/io/crate/planner/Planner.java
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ private void normalSelect(SelectAnalysis analysis, Plan plan, Context context) {

CollectNode collectNode = PlanNodeBuilder.collect(analysis, toCollect, projections);
plan.add(collectNode);
ImmutableList.Builder<Projection> projectionBuilder = ImmutableList.<Projection>builder();
ImmutableList.Builder<Projection> projectionBuilder = ImmutableList.builder();

if (!context.indexWriterProjection.isPresent() || analysis.isLimited()) {
// limit set, apply topN projection
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ public boolean add(String indexName, BytesReference source, String id, @Nullable
return true;
}

private void partitionRequestByShard(String indexName, BytesReference source, String id, String routing) {
private void partitionRequestByShard(String indexName, BytesReference source, String id, @Nullable String routing) {
ShardId shardId = clusterService.operationRouting().indexShards(
clusterService.state(),
indexName,
Expand Down
7 changes: 7 additions & 0 deletions sql/src/test/java/io/crate/analyze/BaseAnalyzerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,13 @@ DEEPLY_NESTED_TABLE_IDENT, RowGranularity.DOC, new Routing())
.add("loc", DataTypes.GEO_POINT, null)
.build();

static final TableInfo TEST_CLUSTER_BY_STRING_TABLE_INFO = TestingTableInfo.builder(new TableIdent(null, "bystring"), RowGranularity.DOC, shardRouting)
.add("name", DataTypes.STRING, null)
.add("score", DataTypes.DOUBLE, null)
.addPrimaryKey("name")
.clusteredBy("name")
.build();

static final TableIdent TEST_BLOB_TABLE_IDENT = new TableIdent(null, "myblobs");
static final TableInfo TEST_BLOB_TABLE_TABLE_INFO = TestingTableInfo.builder(TEST_BLOB_TABLE_IDENT, RowGranularity.DOC, shardRouting)
.build();
Expand Down
16 changes: 16 additions & 0 deletions sql/src/test/java/io/crate/analyze/SelectAnalyzerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ protected void bindSchemas() {
.thenReturn(TEST_DOC_TRANSACTIONS_TABLE_INFO);
when(schemaInfo.getTableInfo(TEST_DOC_LOCATIONS_TABLE_IDENT.name()))
.thenReturn(TEST_DOC_LOCATIONS_TABLE_INFO);
when(schemaInfo.getTableInfo(TEST_CLUSTER_BY_STRING_TABLE_INFO.ident().name()))
.thenReturn(TEST_CLUSTER_BY_STRING_TABLE_INFO);
schemaBinder.addBinding(DocSchemaInfo.NAME).toInstance(schemaInfo);
}

Expand Down Expand Up @@ -1999,4 +2001,18 @@ public void testOrderByOnAliasWithSameColumnNameInSchema() throws Exception {
assert sortSymbols != null;
assertThat(sortSymbols.get(0), isReference("other_id"));
}

@Test
public void testEmptyClusteredByValue() throws Exception {
SelectAnalysis analysis = analyze("select * from bystring where name = ''");
assertThat(analysis.whereClause().clusteredBy().isPresent(), is(false));
assertThat(analysis.ids().isEmpty(), is(true));
}

@Test
public void testClusteredByValueContainsComma() throws Exception {
SelectAnalysis analysis = analyze("select * from bystring where name = 'a,b,c'");
assertThat(analysis.whereClause().clusteredBy().isPresent(), is(false));
assertThat(analysis.ids().isEmpty(), is(true));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
package io.crate.integrationtests;

import io.crate.test.integration.CrateIntegrationTest;
import io.crate.testing.TestingHelpers;
import org.elasticsearch.common.collect.MapBuilder;
import org.hamcrest.Matchers;
import org.junit.Test;
Expand All @@ -30,7 +31,6 @@
import java.util.List;

import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertThat;

@CrateIntegrationTest.ClusterScope(scope = CrateIntegrationTest.Scope.GLOBAL)
public class WherePKIntegrationTest extends SQLTransportIntegrationTest {
Expand Down Expand Up @@ -118,12 +118,51 @@ public void testSelectNestedObjectWherePk() throws Exception {
execute("insert into items (id, details) values (?, ?)", new Object[]{
"123", MapBuilder.newMapBuilder().put("tags", Arrays.asList("small", "blue")).map()
});
execute("refresh table items");

execute("select id, details['tags'] from items where id = '123'");
assertThat(response.rowCount(), is(1L));
assertThat((String) response.rows()[0][0], is("123"));
//noinspection unchecked
assertThat((List<String>) response.rows()[0][1], Matchers.contains("small", "blue"));
}

@Test
public void testEmptyClusteredByUnderId() throws Exception {
// regression test that empty routing executes correctly
execute("create table auto_id (" +
" name string," +
" location geo_point" +
") with (number_of_replicas=0)");
ensureGreen();

execute("insert into auto_id (name, location) values (',', [36.567, 52.998]), ('Dornbirn', [54.45, 4.567])");
execute("refresh table auto_id");


execute("select * from auto_id where _id=''");
assertThat(response.cols(), is(Matchers.arrayContaining("location", "name")));
assertThat(response.rowCount(), is(0L));
}

@Test
public void testEmptyClusteredByExplicit() throws Exception {
// regression test that empty routing executes correctly
execute("create table explicit_routing (" +
" name string," +
" location geo_point" +
") clustered by (name) with (number_of_replicas=0)");
ensureGreen();
execute("insert into explicit_routing (name, location) values (',', [36.567, 52.998]), ('Dornbirn', [54.45, 4.567])");
execute("refresh table explicit_routing");
execute("select * from explicit_routing where name=''");
assertThat(response.cols(), is(Matchers.arrayContaining("location", "name")));
assertThat(response.rowCount(), is(0L));

execute("select * from explicit_routing where name=','");
assertThat(response.cols(), is(Matchers.arrayContaining("location", "name")));
assertThat(response.rowCount(), is(1L));
assertThat(TestingHelpers.printedTable(response.rows()), is("[36.567, 52.998]| ,\n"));
}
}

0 comments on commit b972db5

Please sign in to comment.