Skip to content

Commit

Permalink
csv-264: added duplicateheadermode for flexibility with header strict…
Browse files Browse the repository at this point in the history
…ness
  • Loading branch information
SethFalco committed Jul 9, 2021
1 parent ca5eb7c commit 1be64a6
Show file tree
Hide file tree
Showing 6 changed files with 224 additions and 37 deletions.
91 changes: 60 additions & 31 deletions src/main/java/org/apache/commons/csv/CSVFormat.java
Expand Up @@ -17,18 +17,6 @@

package org.apache.commons.csv;

import static org.apache.commons.csv.Constants.BACKSLASH;
import static org.apache.commons.csv.Constants.COMMA;
import static org.apache.commons.csv.Constants.COMMENT;
import static org.apache.commons.csv.Constants.CR;
import static org.apache.commons.csv.Constants.CRLF;
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
import static org.apache.commons.csv.Constants.EMPTY;
import static org.apache.commons.csv.Constants.LF;
import static org.apache.commons.csv.Constants.PIPE;
import static org.apache.commons.csv.Constants.SP;
import static org.apache.commons.csv.Constants.TAB;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
Expand All @@ -48,6 +36,18 @@
import java.util.Objects;
import java.util.Set;

import static org.apache.commons.csv.Constants.BACKSLASH;
import static org.apache.commons.csv.Constants.COMMA;
import static org.apache.commons.csv.Constants.COMMENT;
import static org.apache.commons.csv.Constants.CR;
import static org.apache.commons.csv.Constants.CRLF;
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
import static org.apache.commons.csv.Constants.EMPTY;
import static org.apache.commons.csv.Constants.LF;
import static org.apache.commons.csv.Constants.PIPE;
import static org.apache.commons.csv.Constants.SP;
import static org.apache.commons.csv.Constants.TAB;

/**
* Specifies the format of a CSV file and parses input.
*
Expand Down Expand Up @@ -188,8 +188,6 @@ public static Builder create(final CSVFormat csvFormat) {
return new Builder(csvFormat);
}

private boolean allowDuplicateHeaderNames;

private boolean allowMissingColumnNames;

private boolean autoFlush;
Expand All @@ -198,6 +196,8 @@ public static Builder create(final CSVFormat csvFormat) {

private String delimiter;

private DuplicateHeaderMode duplicateHeaderMode;

private Character escapeCharacter;

private String[] headerComments;
Expand Down Expand Up @@ -245,7 +245,7 @@ private Builder(final CSVFormat csvFormat) {
this.trim = csvFormat.trim;
this.autoFlush = csvFormat.autoFlush;
this.quotedNullString = csvFormat.quotedNullString;
this.allowDuplicateHeaderNames = csvFormat.allowDuplicateHeaderNames;
this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
}

/**
Expand All @@ -262,12 +262,26 @@ public CSVFormat build() {
*
* @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow.
* @return This instance.
* @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}.
*/
@Deprecated
public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
setDuplicateHeaderMode(mode);
return this;
}

/**
* Sets the duplicate header names behavior.
*
* @param duplicateHeaderMode the duplicate header names behavior
* @return This instance.
*/
public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) {
this.duplicateHeaderMode = duplicateHeaderMode;
return this;
}

/**
* Sets the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an
* {@link IllegalArgumentException} to be thrown.
Expand Down Expand Up @@ -760,7 +774,8 @@ public CSVFormat getFormat() {
}

/**
* Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines.
* Standard Comma Separated Value format, as for {@link #RFC4180} but allowing
* empty lines.
*
* <p>
* The {@link Builder} settings are:
Expand All @@ -770,13 +785,13 @@ public CSVFormat getFormat() {
* <li>{@code setQuote('"')}</li>
* <li>{@code setRecordSeparator("\r\n")}</li>
* <li>{@code setIgnoreEmptyLines(true)}</li>
* <li>{@code setAllowDuplicateHeaderNames(true)}</li>
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
* </ul>
*
* @see Predefined#Default
*/
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false,
false, false, false, true);
false, false, false, DuplicateHeaderMode.ALLOW_ALL);

/**
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
Expand All @@ -799,7 +814,7 @@ public CSVFormat getFormat() {
* <li>{@code setRecordSeparator("\r\n")}</li>
* <li>{@code setIgnoreEmptyLines(false)}</li>
* <li>{@code setAllowMissingColumnNames(true)}</li>
* <li>{@code setAllowDuplicateHeaderNames(true)}</li>
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
* </ul>
* <p>
* Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and
Expand Down Expand Up @@ -1219,7 +1234,7 @@ private static boolean isLineBreak(final Character c) {
*/
public static CSVFormat newFormat(final char delimiter) {
return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false,
true);
DuplicateHeaderMode.ALLOW_ALL);
}

static String[] toStringArray(final Object[] values) {
Expand Down Expand Up @@ -1261,7 +1276,7 @@ public static CSVFormat valueOf(final String format) {
return CSVFormat.Predefined.valueOf(format).getFormat();
}

private final boolean allowDuplicateHeaderNames;
private final DuplicateHeaderMode duplicateHeaderMode;

private final boolean allowMissingColumnNames;

Expand Down Expand Up @@ -1318,7 +1333,7 @@ private CSVFormat(final Builder builder) {
this.trim = builder.trim;
this.autoFlush = builder.autoFlush;
this.quotedNullString = builder.quotedNullString;
this.allowDuplicateHeaderNames = builder.allowDuplicateHeaderNames;
this.duplicateHeaderMode = builder.duplicateHeaderMode;
validate();
}

Expand All @@ -1342,13 +1357,14 @@ private CSVFormat(final Builder builder) {
* @param trim TODO Doc me.
* @param trailingDelimiter TODO Doc me.
* @param autoFlush TODO Doc me.
* @param duplicateHeaderMode the behavior when handling duplicate headers
* @throws IllegalArgumentException if the delimiter is a line break character.
*/
private CSVFormat(final String delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape,
final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames,
final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush,
final boolean allowDuplicateHeaderNames) {
final DuplicateHeaderMode duplicateHeaderMode) {
this.delimiter = delimiter;
this.quoteCharacter = quoteChar;
this.quoteMode = quoteMode;
Expand All @@ -1367,7 +1383,7 @@ private CSVFormat(final String delimiter, final Character quoteChar, final Quote
this.trim = trim;
this.autoFlush = autoFlush;
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
this.duplicateHeaderMode = duplicateHeaderMode;
validate();
}

Expand Down Expand Up @@ -1405,7 +1421,7 @@ public boolean equals(final Object obj) {
return false;
}
final CSVFormat other = (CSVFormat) obj;
return allowDuplicateHeaderNames == other.allowDuplicateHeaderNames && allowMissingColumnNames == other.allowMissingColumnNames &&
return duplicateHeaderMode == other.duplicateHeaderMode && allowMissingColumnNames == other.allowMissingColumnNames &&
autoFlush == other.autoFlush && Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) &&
Objects.equals(escapeCharacter, other.escapeCharacter) && Arrays.equals(header, other.header) &&
Arrays.equals(headerComments, other.headerComments) && ignoreEmptyLines == other.ignoreEmptyLines &&
Expand Down Expand Up @@ -1439,9 +1455,21 @@ public String format(final Object... values) {
*
* @return whether duplicate header names are allowed
* @since 1.7
* @deprecated Use {@link #getDuplicateHeaderMode()}.
*/
@Deprecated
public boolean getAllowDuplicateHeaderNames() {
return allowDuplicateHeaderNames;
return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL;
}

/**
* Returns how duplicate headers are handled.
*
* @return if duplicate header values are allowed, allowed conditionally, or disallowed.
* @since 1.9
*/
public DuplicateHeaderMode getDuplicateHeaderMode() {
return duplicateHeaderMode;
}

/**
Expand Down Expand Up @@ -1622,7 +1650,7 @@ public int hashCode() {
int result = 1;
result = prime * result + Arrays.hashCode(header);
result = prime * result + Arrays.hashCode(headerComments);
return prime * result + Objects.hash(allowDuplicateHeaderNames, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator,
skipHeaderRecord, trailingDelimiter, trim);
}
Expand Down Expand Up @@ -2222,7 +2250,7 @@ private void validate() throws IllegalArgumentException {
}

// validate header
if (header != null && !allowDuplicateHeaderNames) {
if (header != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) {
final Set<String> dupCheck = new HashSet<>();
for (final String hdr : header) {
if (!dupCheck.add(hdr)) {
Expand All @@ -2241,7 +2269,7 @@ private void validate() throws IllegalArgumentException {
*/
@Deprecated
public CSVFormat withAllowDuplicateHeaderNames() {
return builder().setAllowDuplicateHeaderNames(true).build();
return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).build();
}

/**
Expand All @@ -2254,7 +2282,8 @@ public CSVFormat withAllowDuplicateHeaderNames() {
*/
@Deprecated
public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
return builder().setAllowDuplicateHeaderNames(allowDuplicateHeaderNames).build();
final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
return builder().setDuplicateHeaderMode(mode).build();
}

/**
Expand Down
14 changes: 9 additions & 5 deletions src/main/java/org/apache/commons/csv/CSVParser.java
Expand Up @@ -17,8 +17,6 @@

package org.apache.commons.csv;

import static org.apache.commons.csv.Token.Type.TOKEN;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
Expand All @@ -45,6 +43,8 @@
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import static org.apache.commons.csv.Token.Type.TOKEN;

/**
* Parses CSV files according to the specified format.
*
Expand Down Expand Up @@ -503,12 +503,16 @@ private Headers createHeaders() throws IOException {
throw new IllegalArgumentException(
"A header name is missing in " + Arrays.toString(headerRecord));
}
// Note: This will always allow a duplicate header if the header is empty

final boolean containsHeader = header != null && hdrMap.containsKey(header);
if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) {
final DuplicateHeaderMode headerMode = this.format.getDuplicateHeaderMode();
final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;

if (containsHeader && !duplicatesAllowed && !(emptyHeader && emptyDuplicatesAllowed)) {
throw new IllegalArgumentException(
String.format(
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
header, Arrays.toString(headerRecord)));
}
if (header != null) {
Expand Down
43 changes: 43 additions & 0 deletions src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java
@@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.commons.csv;

/**
* Determines how duplicate header fields should be handled
* if {@link CSVFormat#withHeader(String...)} is not null.
*
* @since 1.9
*/
public enum DuplicateHeaderMode {

/**
* Allows all duplicate headings.
*/
ALLOW_ALL,

/**
* Allows duplicate headings only if they're empty
* strings or null.
*/
ALLOW_EMPTY,

/**
* Disallows duplicate headings entirely.
*/
DISALLOW
}
2 changes: 1 addition & 1 deletion src/site/resources/checkstyle/checkstyle-suppressions.xml
Expand Up @@ -19,5 +19,5 @@
"-//Checkstyle//DTD SuppressionFilter Configuration 1.2//EN"
"https://checkstyle.org/dtds/suppressions_1_2.dtd">
<suppressions>
<suppress checks="LineLength" files="[\\/]CSVParser\.java$" lines="511"/>
<suppress checks="LineLength" files="[\\/]CSVParser\.java$" lines="515"/>
</suppressions>
13 changes: 13 additions & 0 deletions src/test/java/org/apache/commons/csv/CSVFormatTest.java
Expand Up @@ -260,6 +260,10 @@ public void testEqualsHash() throws Exception {
final Object a = method.invoke(CSVFormat.DEFAULT, QuoteMode.MINIMAL);
final Object b = method.invoke(CSVFormat.DEFAULT, QuoteMode.ALL);
assertNotEquals(name, type, a, b);
} else if ("org.apache.commons.csv.DuplicateHeaderMode".equals(type)) {
final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.ALLOW_ALL});
final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.DISALLOW});
assertNotEquals(name, type, a, b);
} else if ("java.lang.Object[]".equals(type)){
final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {null, null}});
final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {new Object(), new Object()}});
Expand Down Expand Up @@ -1295,6 +1299,15 @@ public void testWithEscape() {
}


@Test
public void testWithEmptyDuplicates() {
final CSVFormat formatWithEmptyDuplicates =
CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).build();

assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, formatWithEmptyDuplicates.getDuplicateHeaderMode());
assertFalse(formatWithEmptyDuplicates.getAllowDuplicateHeaderNames());
}

@Test
public void testWithEscapeCRThrowsExceptions() {
assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(CR));
Expand Down

0 comments on commit 1be64a6

Please sign in to comment.