From b972db53f7700c76553013d4bfa88a3d492115c3 Mon Sep 17 00:00:00 2001
From: Matthias Wahl <matthias@crate.io>
Date: Mon, 17 Nov 2014 16:46:45 +0100
Subject: [PATCH] correctly handle empty clustered by values or values
 containing commas

as elasticsearch misinteprets empty string or strings containing commas
do not use these as routing for requests
---
 CHANGES.txt                                   |  3 ++
 .../io/crate/analyze/PrimaryKeyVisitor.java   | 13 +++++-
 .../main/java/io/crate/planner/Planner.java   |  2 +-
 .../action/bulk/BulkShardProcessor.java       |  2 +-
 .../io/crate/analyze/BaseAnalyzerTest.java    |  7 ++++
 .../io/crate/analyze/SelectAnalyzerTest.java  | 16 ++++++++
 .../WherePKIntegrationTest.java               | 41 ++++++++++++++++++-
 7 files changed, 80 insertions(+), 4 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 3a371c22e4cb..dd4b2c99cc62 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -5,6 +5,9 @@ Changes for Crate
 Unreleased
 ==========
 
+ - Fix: correctly handle empty values and values containing commas
+   for clustered by columns in the where clause. e.g: ``where _id=''``
+
  - Fix: Forbid using ``alter table add column`` on a single partition.
 
  - Fix: do not consider rootfs in ``sys.nodes.fs['data']['dev']``
diff --git a/sql/src/main/java/io/crate/analyze/PrimaryKeyVisitor.java b/sql/src/main/java/io/crate/analyze/PrimaryKeyVisitor.java
index 94e3afa55c99..565074979961 100644
--- a/sql/src/main/java/io/crate/analyze/PrimaryKeyVisitor.java
+++ b/sql/src/main/java/io/crate/analyze/PrimaryKeyVisitor.java
@@ -330,7 +330,18 @@ private Set generateIntersection(Literal setLiteral, Literal right) {
     }
 
     private void setClusterBy(Context context, Literal right) {
-        if (context.currentBucket.clusteredBy == null) {
+        // the current ES implementation does not support empty string
+        // or comma as routing column value
+        // it even misinterprets values which contain a comma
+        // make sure we have no clustered by value in this case
+        // see: https://github.com/elasticsearch/elasticsearch/issues/6736
+        Object rightValue = right.value();
+        if (rightValue != null && rightValue instanceof BytesRef) {
+            String val = ((BytesRef) rightValue).utf8ToString();
+            if ("".equals(val) || val.contains(",")) {
+                invalidate(context);
+            }
+        } else if (context.currentBucket.clusteredBy == null) {
             context.currentBucket.clusteredBy = right;
         } else if (!context.currentBucket.clusteredBy.equals(right)) {
             invalidate(context);
diff --git a/sql/src/main/java/io/crate/planner/Planner.java b/sql/src/main/java/io/crate/planner/Planner.java
index 54188ecbd34b..6f39d8f2b4cd 100644
--- a/sql/src/main/java/io/crate/planner/Planner.java
+++ b/sql/src/main/java/io/crate/planner/Planner.java
@@ -545,7 +545,7 @@ private void normalSelect(SelectAnalysis analysis, Plan plan, Context context) {
 
         CollectNode collectNode = PlanNodeBuilder.collect(analysis, toCollect, projections);
         plan.add(collectNode);
-        ImmutableList.Builder<Projection> projectionBuilder = ImmutableList.<Projection>builder();
+        ImmutableList.Builder<Projection> projectionBuilder = ImmutableList.builder();
 
         if (!context.indexWriterProjection.isPresent() || analysis.isLimited()) {
             // limit set, apply topN projection
diff --git a/sql/src/main/java/org/elasticsearch/action/bulk/BulkShardProcessor.java b/sql/src/main/java/org/elasticsearch/action/bulk/BulkShardProcessor.java
index 02c9bb3de597..8e64e52e090a 100644
--- a/sql/src/main/java/org/elasticsearch/action/bulk/BulkShardProcessor.java
+++ b/sql/src/main/java/org/elasticsearch/action/bulk/BulkShardProcessor.java
@@ -131,7 +131,7 @@ public boolean add(String indexName, BytesReference source, String id, @Nullable
         return true;
     }
 
-    private void partitionRequestByShard(String indexName, BytesReference source, String id, String routing) {
+    private void partitionRequestByShard(String indexName, BytesReference source, String id, @Nullable String routing) {
         ShardId shardId = clusterService.operationRouting().indexShards(
                 clusterService.state(),
                 indexName,
diff --git a/sql/src/test/java/io/crate/analyze/BaseAnalyzerTest.java b/sql/src/test/java/io/crate/analyze/BaseAnalyzerTest.java
index a47f8f2a4046..10ab5ba43da2 100644
--- a/sql/src/test/java/io/crate/analyze/BaseAnalyzerTest.java
+++ b/sql/src/test/java/io/crate/analyze/BaseAnalyzerTest.java
@@ -196,6 +196,13 @@ DEEPLY_NESTED_TABLE_IDENT, RowGranularity.DOC, new Routing())
             .add("loc", DataTypes.GEO_POINT, null)
             .build();
 
+    static final TableInfo TEST_CLUSTER_BY_STRING_TABLE_INFO = TestingTableInfo.builder(new TableIdent(null, "bystring"), RowGranularity.DOC, shardRouting)
+            .add("name", DataTypes.STRING, null)
+            .add("score", DataTypes.DOUBLE, null)
+            .addPrimaryKey("name")
+            .clusteredBy("name")
+            .build();
+
     static final TableIdent TEST_BLOB_TABLE_IDENT = new TableIdent(null, "myblobs");
     static final TableInfo TEST_BLOB_TABLE_TABLE_INFO = TestingTableInfo.builder(TEST_BLOB_TABLE_IDENT, RowGranularity.DOC, shardRouting)
             .build();
diff --git a/sql/src/test/java/io/crate/analyze/SelectAnalyzerTest.java b/sql/src/test/java/io/crate/analyze/SelectAnalyzerTest.java
index 3adb0b61baad..d2a47fe85388 100644
--- a/sql/src/test/java/io/crate/analyze/SelectAnalyzerTest.java
+++ b/sql/src/test/java/io/crate/analyze/SelectAnalyzerTest.java
@@ -114,6 +114,8 @@ protected void bindSchemas() {
                     .thenReturn(TEST_DOC_TRANSACTIONS_TABLE_INFO);
             when(schemaInfo.getTableInfo(TEST_DOC_LOCATIONS_TABLE_IDENT.name()))
                     .thenReturn(TEST_DOC_LOCATIONS_TABLE_INFO);
+            when(schemaInfo.getTableInfo(TEST_CLUSTER_BY_STRING_TABLE_INFO.ident().name()))
+                    .thenReturn(TEST_CLUSTER_BY_STRING_TABLE_INFO);
             schemaBinder.addBinding(DocSchemaInfo.NAME).toInstance(schemaInfo);
         }
 
@@ -1999,4 +2001,18 @@ public void testOrderByOnAliasWithSameColumnNameInSchema() throws Exception {
         assert sortSymbols != null;
         assertThat(sortSymbols.get(0), isReference("other_id"));
     }
+
+    @Test
+    public void testEmptyClusteredByValue() throws Exception {
+        SelectAnalysis analysis = analyze("select * from bystring where name = ''");
+        assertThat(analysis.whereClause().clusteredBy().isPresent(), is(false));
+        assertThat(analysis.ids().isEmpty(), is(true));
+    }
+
+    @Test
+    public void testClusteredByValueContainsComma() throws Exception {
+        SelectAnalysis analysis = analyze("select * from bystring where name = 'a,b,c'");
+        assertThat(analysis.whereClause().clusteredBy().isPresent(), is(false));
+        assertThat(analysis.ids().isEmpty(), is(true));
+    }
 }
diff --git a/sql/src/test/java/io/crate/integrationtests/WherePKIntegrationTest.java b/sql/src/test/java/io/crate/integrationtests/WherePKIntegrationTest.java
index 0a77b0d773ac..d4431431b0b3 100644
--- a/sql/src/test/java/io/crate/integrationtests/WherePKIntegrationTest.java
+++ b/sql/src/test/java/io/crate/integrationtests/WherePKIntegrationTest.java
@@ -22,6 +22,7 @@
 package io.crate.integrationtests;
 
 import io.crate.test.integration.CrateIntegrationTest;
+import io.crate.testing.TestingHelpers;
 import org.elasticsearch.common.collect.MapBuilder;
 import org.hamcrest.Matchers;
 import org.junit.Test;
@@ -30,7 +31,6 @@
 import java.util.List;
 
 import static org.hamcrest.core.Is.is;
-import static org.junit.Assert.assertThat;
 
 @CrateIntegrationTest.ClusterScope(scope = CrateIntegrationTest.Scope.GLOBAL)
 public class WherePKIntegrationTest extends SQLTransportIntegrationTest {
@@ -118,6 +118,7 @@ public void testSelectNestedObjectWherePk() throws Exception {
         execute("insert into items (id, details) values (?, ?)", new Object[]{
             "123", MapBuilder.newMapBuilder().put("tags", Arrays.asList("small", "blue")).map()
         });
+        execute("refresh table items");
 
         execute("select id, details['tags'] from items where id = '123'");
         assertThat(response.rowCount(), is(1L));
@@ -125,5 +126,43 @@ public void testSelectNestedObjectWherePk() throws Exception {
         //noinspection unchecked
         assertThat((List<String>) response.rows()[0][1], Matchers.contains("small", "blue"));
     }
+
+    @Test
+    public void testEmptyClusteredByUnderId() throws Exception {
+        // regression test that empty routing executes correctly
+        execute("create table auto_id (" +
+                "  name string," +
+                "  location geo_point" +
+                ") with (number_of_replicas=0)");
+        ensureGreen();
+
+        execute("insert into auto_id (name, location) values (',', [36.567, 52.998]), ('Dornbirn', [54.45, 4.567])");
+        execute("refresh table auto_id");
+
+
+        execute("select * from auto_id where _id=''");
+        assertThat(response.cols(), is(Matchers.arrayContaining("location", "name")));
+        assertThat(response.rowCount(), is(0L));
+    }
+
+    @Test
+    public void testEmptyClusteredByExplicit() throws Exception {
+        // regression test that empty routing executes correctly
+        execute("create table explicit_routing (" +
+                "  name string," +
+                "  location geo_point" +
+                ") clustered by (name) with (number_of_replicas=0)");
+        ensureGreen();
+        execute("insert into explicit_routing (name, location) values (',', [36.567, 52.998]), ('Dornbirn', [54.45, 4.567])");
+        execute("refresh table explicit_routing");
+        execute("select * from explicit_routing where name=''");
+        assertThat(response.cols(), is(Matchers.arrayContaining("location", "name")));
+        assertThat(response.rowCount(), is(0L));
+
+        execute("select * from explicit_routing where name=','");
+        assertThat(response.cols(), is(Matchers.arrayContaining("location", "name")));
+        assertThat(response.rowCount(), is(1L));
+        assertThat(TestingHelpers.printedTable(response.rows()), is("[36.567, 52.998]| ,\n"));
+    }
 }