Fix whitespace as a separator in CSV processor (#67045) (#67050)

This change fixes problem when using space or tab as a separator in CSV processor - we check if current character is separator before we check if it is whitespace. This also improves tests to always check all combinations of separators and quotes. Closes #67013
elastic · Jan 5, 2021 · fdd2a9c · fdd2a9c
1 parent 6175e18
commit fdd2a9c
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 20 deletions.
diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvParser.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvParser.java
@@ -127,15 +127,15 @@ private boolean processUnquoted() {
             char c = currentChar();
             if (c == LF || c == CR || c == quote) {
                 throw new IllegalArgumentException("Illegal character inside unquoted field at " + currentIndex);
-            } else if (trim && isWhitespace(c)) {
-                spaceCount++;
             } else if (c == separator) {
                 state = State.START;
                 if (setField(currentIndex - spaceCount)) {
                     return true;
                 }
                 startIndex = currentIndex + 1;
                 return false;
+            } else if (trim && isWhitespace(c)) {
+                spaceCount++;
             } else {
                 spaceCount = 0;
             }
@@ -163,20 +163,20 @@ private boolean processQuotedEnd() {
         boolean shouldSetField = true;
         for (; currentIndex < length; currentIndex++) {
             c = currentChar();
-            if (isWhitespace(c)) {
-                if (shouldSetField) {
-                    if (setField(currentIndex - 1)) {
-                        return true;
-                    }
-                    shouldSetField = false;
-                }
-            } else if (c == separator) {
+            if (c == separator) {
                 if (shouldSetField && setField(currentIndex - 1)) {
                     return true;
                 }
                 startIndex = currentIndex + 1;
                 state = State.START;
                 return false;
+            } else if (isWhitespace(c)) {
+                if (shouldSetField) {
+                    if (setField(currentIndex - 1)) {
+                        return true;
+                    }
+                    shouldSetField = false;
+                }
             } else {
                 throw new IllegalArgumentException("character '" + c + "' after quoted field at " + currentIndex);
             }

diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CsvProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CsvProcessorTests.java
@@ -24,33 +24,36 @@
 import org.elasticsearch.ingest.IngestDocument;
 import org.elasticsearch.ingest.RandomDocumentPicks;
 import org.elasticsearch.test.ESTestCase;
-import org.junit.Before;
 
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
+import java.util.LinkedList;
 import java.util.Map;
 import java.util.stream.Collectors;
 
 public class CsvProcessorTests extends ESTestCase {
 
-    private static final Character[] SEPARATORS = new Character[]{',', ';', '|', '.'};
+    private static final Character[] SEPARATORS = new Character[]{',', ';', '|', '.', '\t'};
+    private static final String[] QUOTES = new String[]{"'", "\"", ""};
     private final String quote;
-    private char separator;
+    private final char separator;
 
 
-    public CsvProcessorTests(@Name("quote") String quote) {
+    public CsvProcessorTests(@Name("quote") String quote, @Name("separator") char separator) {
         this.quote = quote;
+        this.separator = separator;
     }
 
     @ParametersFactory
     public static Iterable<Object[]> parameters() {
-        return Arrays.asList(new Object[]{"'"}, new Object[]{"\""}, new Object[]{""});
-    }
-
-    @Before
-    public void setup() {
-        separator = randomFrom(SEPARATORS);
+        LinkedList<Object[]> list = new LinkedList<>();
+        for (Character separator : SEPARATORS) {
+            for (String quote : QUOTES) {
+                list.add(new Object[]{quote, separator});
+            }
+        }
+        return list;
     }
 
     public void testExactNumberOfFields() {