From 8d3f309bcc052765754ec81f30420894f8d3d397 Mon Sep 17 00:00:00 2001 From: Wesley-Lawrence Date: Tue, 15 Aug 2017 15:28:09 -0400 Subject: [PATCH 1/2] NIFI-4242 Allow quote and escape chars for CSV to be 'undefined'. --- .../nifi/util/MockConfigurationContext.java | 8 ++- .../java/org/apache/nifi/csv/CSVUtils.java | 14 ++-- .../apache/nifi/csv/TestCSVRecordReader.java | 67 +++++++++++++++++++ 3 files changed, 83 insertions(+), 6 deletions(-) diff --git a/nifi-mock/src/main/java/org/apache/nifi/util/MockConfigurationContext.java b/nifi-mock/src/main/java/org/apache/nifi/util/MockConfigurationContext.java index 91d805e2e88b..f727a35754be 100644 --- a/nifi-mock/src/main/java/org/apache/nifi/util/MockConfigurationContext.java +++ b/nifi-mock/src/main/java/org/apache/nifi/util/MockConfigurationContext.java @@ -58,8 +58,12 @@ public MockConfigurationContext(final ControllerService service, @Override public PropertyValue getProperty(final PropertyDescriptor property) { - String value = properties.get(property); - if (value == null) { + String value; + // If 'properties' contains the 'property', use what ever value is under that key, 'null' or otherwise. + if (properties.keySet().contains(property)) { + value = properties.get(property); + // If 'properties' doesn't contain the 'property', use the default. + } else { value = getActualDescriptor(property).getDefaultValue(); } return new MockPropertyValue(value, serviceLookup, variableRegistry); diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java index 17152aa5fc6c..4053f6f44a32 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java @@ -60,7 +60,7 @@ public class CSVUtils { .addValidator(new CSVValidators.SingleCharacterValidator()) .expressionLanguageSupported(false) .defaultValue("\"") - .required(true) + .required(false) .build(); static final PropertyDescriptor FIRST_LINE_IS_HEADER = new PropertyDescriptor.Builder() .name("Skip Header Line") @@ -100,7 +100,7 @@ public class CSVUtils { .addValidator(new CSVValidators.SingleCharacterValidator()) .expressionLanguageSupported(false) .defaultValue("\\") - .required(true) + .required(false) .build(); static final PropertyDescriptor NULL_STRING = new PropertyDescriptor.Builder() .name("Null String") @@ -199,8 +199,14 @@ private static CSVFormat buildCustomFormat(final ConfigurationContext context) { format = format.withFirstRecordAsHeader(); } - format = format.withQuote(getChar(context, QUOTE_CHAR)); - format = format.withEscape(getChar(context, ESCAPE_CHAR)); + if (context.getProperty(QUOTE_CHAR).isSet()) { + format = format.withQuote(getChar(context, QUOTE_CHAR)); + } + + if (context.getProperty(ESCAPE_CHAR).isSet()) { + format = format.withEscape(getChar(context, ESCAPE_CHAR)); + } + format = format.withTrim(context.getProperty(TRIM_FIELDS).asBoolean()); if (context.getProperty(COMMENT_MARKER).isSet()) { diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java index 576132fb9f76..ed83fa8ff278 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java @@ -29,11 +29,14 @@ import java.sql.Date; import java.util.ArrayList; import java.util.Calendar; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.TimeZone; import org.apache.commons.csv.CSVFormat; import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.serialization.MalformedRecordException; import org.apache.nifi.serialization.SimpleRecordSchema; @@ -42,6 +45,7 @@ import org.apache.nifi.serialization.record.RecordField; import org.apache.nifi.serialization.record.RecordFieldType; import org.apache.nifi.serialization.record.RecordSchema; +import org.apache.nifi.util.MockConfigurationContext; import org.junit.Assert; import org.junit.Test; import org.mockito.Mockito; @@ -349,4 +353,67 @@ public void testMultipleRecordsEscapedWithSpecialChar() throws IOException, Malf assertNull(reader.nextRecord()); } } + + public void testRoughParseIgnoreEscapes() throws IOException, MalformedRecordException { + final List fields = new ArrayList<>(); + for (final String fieldName : new String[] {"1","2","3"}) { + fields.add(new RecordField(fieldName, RecordFieldType.STRING.getDataType())); + } + final RecordSchema schema = new SimpleRecordSchema(fields); + + // For the following line to be accepted, we need to be able to have a custom format where no escape character is defined. + Assert.assertTrue("Must allow for escape character to be undefined!", !CSVUtils.ESCAPE_CHAR.isRequired()); + final String inputRecord = "Hello,World,\"How \"\"are\"\" , &$,\\you?\\\""; + final byte[] inputData = inputRecord.getBytes(); + + Map configMap = new HashMap<>(); + configMap.put(CSVUtils.ESCAPE_CHAR, null); + + MockConfigurationContext mcc = new MockConfigurationContext(configMap, null); + + CSVFormat formatFromConfig = CSVUtils.createCSVFormat(mcc); + + try (final InputStream baos = new ByteArrayInputStream(inputData)) { + final CSVRecordReader reader = new CSVRecordReader(baos, Mockito.mock(ComponentLog.class), schema, formatFromConfig, false, false, + RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat()); + + final Object[] firstRecord = reader.nextRecord().getValues(); + final Object[] firstExpectedValues = new Object[] {"Hello", "World", "How \"are\" , &$,\\you?\\"}; + Assert.assertArrayEquals(firstExpectedValues, firstRecord); + + assertNull(reader.nextRecord()); + } + } + + @Test + public void testRoughParseNoQuote() throws IOException, MalformedRecordException { + final List fields = new ArrayList<>(); + for (final String fieldName : new String[] {"1","2","3"}) { + fields.add(new RecordField(fieldName, RecordFieldType.STRING.getDataType())); + } + final RecordSchema schema = new SimpleRecordSchema(fields); + + // For the following line to be accepted, we need to be able to have a custom format where no escape character is defined. + Assert.assertTrue("Must allow for escape character to be undefined!", !CSVUtils.ESCAPE_CHAR.isRequired()); + final String inputRecord = "Hello,World,How are , &$\\,you?"; + final byte[] inputData = inputRecord.getBytes(); + + Map configMap = new HashMap<>(); + configMap.put(CSVUtils.QUOTE_CHAR, null); + + MockConfigurationContext mcc = new MockConfigurationContext(configMap, null); + + CSVFormat formatFromConfig = CSVUtils.createCSVFormat(mcc); + + try (final InputStream baos = new ByteArrayInputStream(inputData)) { + final CSVRecordReader reader = new CSVRecordReader(baos, Mockito.mock(ComponentLog.class), schema, formatFromConfig, false, false, + RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat()); + + final Object[] firstRecord = reader.nextRecord().getValues(); + final Object[] firstExpectedValues = new Object[] {"Hello", "World", "How are , &$,you?"}; + Assert.assertArrayEquals(firstExpectedValues, firstRecord); + + assertNull(reader.nextRecord()); + } + } } From c1a5aa37d427b80036171590c2d6c1d2c9c38447 Mon Sep 17 00:00:00 2001 From: Wesley-Lawrence Date: Mon, 11 Sep 2017 12:01:40 -0400 Subject: [PATCH 2/2] NIFI-4242 Addressed feedback --- .../org/apache/nifi/util/MockConfigurationContext.java | 8 ++------ .../src/main/java/org/apache/nifi/csv/CSVUtils.java | 6 +++--- .../java/org/apache/nifi/csv/TestCSVRecordReader.java | 4 ++-- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/nifi-mock/src/main/java/org/apache/nifi/util/MockConfigurationContext.java b/nifi-mock/src/main/java/org/apache/nifi/util/MockConfigurationContext.java index f727a35754be..91d805e2e88b 100644 --- a/nifi-mock/src/main/java/org/apache/nifi/util/MockConfigurationContext.java +++ b/nifi-mock/src/main/java/org/apache/nifi/util/MockConfigurationContext.java @@ -58,12 +58,8 @@ public MockConfigurationContext(final ControllerService service, @Override public PropertyValue getProperty(final PropertyDescriptor property) { - String value; - // If 'properties' contains the 'property', use what ever value is under that key, 'null' or otherwise. - if (properties.keySet().contains(property)) { - value = properties.get(property); - // If 'properties' doesn't contain the 'property', use the default. - } else { + String value = properties.get(property); + if (value == null) { value = getActualDescriptor(property).getDefaultValue(); } return new MockPropertyValue(value, serviceLookup, variableRegistry); diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java index 4053f6f44a32..5a17b6f84d93 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java @@ -199,17 +199,17 @@ private static CSVFormat buildCustomFormat(final ConfigurationContext context) { format = format.withFirstRecordAsHeader(); } - if (context.getProperty(QUOTE_CHAR).isSet()) { + if (context.getProperty(QUOTE_CHAR).isSet() && !context.getProperty(QUOTE_CHAR).toString().isEmpty()) { format = format.withQuote(getChar(context, QUOTE_CHAR)); } - if (context.getProperty(ESCAPE_CHAR).isSet()) { + if (context.getProperty(ESCAPE_CHAR).isSet() && !context.getProperty(ESCAPE_CHAR).toString().isEmpty()) { format = format.withEscape(getChar(context, ESCAPE_CHAR)); } format = format.withTrim(context.getProperty(TRIM_FIELDS).asBoolean()); - if (context.getProperty(COMMENT_MARKER).isSet()) { + if (context.getProperty(COMMENT_MARKER).isSet() && !context.getProperty(COMMENT_MARKER).toString().isEmpty()) { format = format.withCommentMarker(getChar(context, COMMENT_MARKER)); } if (context.getProperty(NULL_STRING).isSet()) { diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java index ed83fa8ff278..3ef218532b95 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java @@ -367,7 +367,7 @@ public void testRoughParseIgnoreEscapes() throws IOException, MalformedRecordExc final byte[] inputData = inputRecord.getBytes(); Map configMap = new HashMap<>(); - configMap.put(CSVUtils.ESCAPE_CHAR, null); + configMap.put(CSVUtils.ESCAPE_CHAR, ""); MockConfigurationContext mcc = new MockConfigurationContext(configMap, null); @@ -399,7 +399,7 @@ public void testRoughParseNoQuote() throws IOException, MalformedRecordException final byte[] inputData = inputRecord.getBytes(); Map configMap = new HashMap<>(); - configMap.put(CSVUtils.QUOTE_CHAR, null); + configMap.put(CSVUtils.QUOTE_CHAR, ""); MockConfigurationContext mcc = new MockConfigurationContext(configMap, null);