Skip to content

Commit

Permalink
Merge 9412e00 into b1e5d93
Browse files Browse the repository at this point in the history
  • Loading branch information
t-suwa committed Feb 1, 2017
2 parents b1e5d93 + 9412e00 commit bae4999
Show file tree
Hide file tree
Showing 9 changed files with 187 additions and 21 deletions.
94 changes: 75 additions & 19 deletions src/main/java/org/apache/commons/csv/CSVFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ public CSVFormat getFormat() {
* @see Predefined#Default
*/
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
null, null, null, false, false, false, false, false);
null, null, null, false, false, false, false, false, false);

/**
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
Expand Down Expand Up @@ -427,7 +427,7 @@ private static boolean isLineBreak(final Character c) {
*/
public static CSVFormat newFormat(final char delimiter) {
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
false, false, false);
false, false, false, false);
}

/**
Expand Down Expand Up @@ -474,6 +474,8 @@ public static CSVFormat valueOf(final String format) {

private final boolean trim;

private final boolean missingColumnValuesAreNull; // use null if encounter missing values

/**
* Creates a customized CSV format.
*
Expand Down Expand Up @@ -509,6 +511,8 @@ public static CSVFormat valueOf(final String format) {
* TODO
* @param trailingDelimiter
* TODO
* @param missingColumnValuesAreNull
* use null if encounter missing values
* @throws IllegalArgumentException
* if the delimiter is a line break character
*/
Expand All @@ -517,7 +521,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
final boolean trailingDelimiter) {
final boolean trailingDelimiter, final boolean missingColumnValuesAreNull) {
this.delimiter = delimiter;
this.quoteCharacter = quoteChar;
this.quoteMode = quoteMode;
Expand All @@ -534,6 +538,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
this.ignoreHeaderCase = ignoreHeaderCase;
this.trailingDelimiter = trailingDelimiter;
this.trim = trim;
this.missingColumnValuesAreNull = missingColumnValuesAreNull;
validate();
}

Expand Down Expand Up @@ -777,6 +782,16 @@ public boolean getTrim() {
return trim;
}

/**
* Returns whether to use null for missing column values.
*
* @return whether to use null for missing column values.
* @since 1.5
*/
public boolean getMissingColumnValuesAreNull() {
return missingColumnValuesAreNull;
}

@Override
public int hashCode() {
final int prime = 31;
Expand Down Expand Up @@ -1311,7 +1326,8 @@ public CSVFormat withAllowMissingColumnNames() {
public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
Expand Down Expand Up @@ -1346,7 +1362,8 @@ public CSVFormat withCommentMarker(final Character commentMarker) {
}
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
Expand All @@ -1364,7 +1381,8 @@ public CSVFormat withDelimiter(final char delimiter) {
}
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
Expand Down Expand Up @@ -1395,7 +1413,7 @@ public CSVFormat withEscape(final Character escape) {
}
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, missingColumnValuesAreNull);
}

/**
Expand Down Expand Up @@ -1550,7 +1568,8 @@ public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLExceptio
public CSVFormat withHeader(final String... header) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
Expand All @@ -1571,7 +1590,8 @@ public CSVFormat withHeader(final String... header) {
public CSVFormat withHeaderComments(final Object... headerComments) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
Expand All @@ -1596,7 +1616,8 @@ public CSVFormat withIgnoreEmptyLines() {
public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
Expand All @@ -1622,7 +1643,8 @@ public CSVFormat withIgnoreHeaderCase() {
public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
Expand All @@ -1647,7 +1669,8 @@ public CSVFormat withIgnoreSurroundingSpaces() {
public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
Expand All @@ -1666,7 +1689,8 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac
public CSVFormat withNullString(final String nullString) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
Expand Down Expand Up @@ -1697,7 +1721,7 @@ public CSVFormat withQuote(final Character quoteChar) {
}
return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, missingColumnValuesAreNull);
}

/**
Expand All @@ -1711,7 +1735,8 @@ public CSVFormat withQuote(final Character quoteChar) {
public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
Expand Down Expand Up @@ -1749,7 +1774,8 @@ public CSVFormat withRecordSeparator(final char recordSeparator) {
public CSVFormat withRecordSeparator(final String recordSeparator) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
Expand All @@ -1776,7 +1802,8 @@ public CSVFormat withSkipHeaderRecord() {
public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
Expand All @@ -1801,7 +1828,8 @@ public CSVFormat withTrailingDelimiter() {
public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
Expand All @@ -1826,6 +1854,34 @@ public CSVFormat withTrim() {
public CSVFormat withTrim(final boolean trim) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}

/**
* Returns a new {@code CSVFormat} to use null for missing column values.
*
* @return A new CSVFormat that is equal to this but with the use
* null for missing column values on.
* @since 1.5
*/
public CSVFormat withMissingColumnValuesAreNull() {
return withMissingColumnValuesAreNull(true);
}

/**
* Returns a new {@code CSVFormat} with whether to use null for missing column values.
*
* @param missingColumnValuesAreNull
* whether to use null for missing column values.
*
* @return A new CSVFormat that is equal to this but with the specified setting.
* @since 1.5
*/
public CSVFormat withMissingColumnValuesAreNull(final boolean missingColumnValuesAreNull) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,
missingColumnValuesAreNull);
}
}
21 changes: 19 additions & 2 deletions src/main/java/org/apache/commons/csv/CSVParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -367,8 +367,25 @@ private void addRecordValue(final boolean lastRecord) {
if (lastRecord && inputClean.isEmpty() && this.format.getTrailingDelimiter()) {
return;
}
final String nullString = this.format.getNullString();
this.record.add(inputClean.equals(nullString) ? null : inputClean);
this.record.add(isNull(inputClean) ? null : inputClean);
}

private boolean isNull(final String input) {
Assertions.notNull(input, "input");

if (input.equals(this.format.getNullString())) {
return true;
}

if (!this.format.getMissingColumnValuesAreNull()) {
return false; // MUST keep backward compatibility
}

if (this.reusableToken.isQuoted) {
return false; // distinguish quoted strings from null
}

return input.isEmpty();
}

/**
Expand Down
1 change: 1 addition & 0 deletions src/main/java/org/apache/commons/csv/Lexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ Token nextToken(final Token token) throws IOException {
} else if (isQuoteChar(c)) {
// consume encapsulated token
parseEncapsulatedToken(token);
token.isQuoted = true;
} else if (isEndOfFile(c)) {
// end of file return EOF()
// noop: token.content.append("");
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/org/apache/commons/csv/Token.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,14 @@ enum Type {
/** Token ready flag: indicates a valid token with content (ready for the parser). */
boolean isReady;

/** Indicates whether token is quoted or not */
boolean isQuoted;

void reset() {
content.setLength(0);
type = INVALID;
isReady = false;
isQuoted = false;
}

/**
Expand Down
6 changes: 6 additions & 0 deletions src/test/java/org/apache/commons/csv/CSVFormatTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,12 @@ public void testWithFirstRecordAsHeader() throws Exception {
assertTrue(formatWithFirstRecordAsHeader.getHeader().length == 0);
}

@Test
public void testWithMissingColumnValuesAreNull() {
final CSVFormat format = CSVFormat.DEFAULT.withMissingColumnValuesAreNull();
assertTrue(format.getMissingColumnValuesAreNull());
}

public enum Header {
Name, Email, Phone
}
Expand Down
41 changes: 41 additions & 0 deletions src/test/java/org/apache/commons/csv/CSVParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,47 @@ public void testTrim() throws Exception {
Assert.assertEquals(3, record.size());
}

@Test
public void testMissingColumnValuesAreNull() throws Exception {
final String code = ",,\"\",";

// check backward compatibility
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final CSVRecord record = parser.iterator().next();

assertNotNull(record.get(0));
assertNotNull(record.get(1));
assertNotNull(record.get(2));
assertNotNull(record.get(3));
Assert.assertEquals(4, record.size());
}

// check withMissingColumnValuesAreNull
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT
.withMissingColumnValuesAreNull())) {
final CSVRecord record = parser.iterator().next();

assertNull(record.get(0));
assertNull(record.get(1));
assertNotNull(record.get(2));
assertNull(record.get(3));
Assert.assertEquals(4, record.size());
}

// check combination of withNullString and withMissingColumnValuesAreNull
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT
.withNullString("")
.withMissingColumnValuesAreNull())) {
final CSVRecord record = parser.iterator().next();

assertNull(record.get(0));
assertNull(record.get(1));
assertNull(record.get(2));
assertNull(record.get(3));
Assert.assertEquals(4, record.size());
}
}

private void validateLineNumbers(final String lineSeparator) throws IOException {
try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c",
CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) {
Expand Down
15 changes: 15 additions & 0 deletions src/test/java/org/apache/commons/csv/LexerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
import static org.apache.commons.csv.Token.Type.EORECORD;
import static org.apache.commons.csv.Token.Type.TOKEN;
import static org.apache.commons.csv.TokenMatchers.hasContent;
import static org.apache.commons.csv.TokenMatchers.isQuoted;
import static org.apache.commons.csv.TokenMatchers.matches;
import static org.hamcrest.core.IsNot.not;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
Expand Down Expand Up @@ -389,4 +391,17 @@ public void testEscapingAtEOF() throws Exception {
lexer.nextToken(new Token());
}
}

@Test
public void testQuotedToken() throws Exception {
final String code = ",,\"\",";

try (final Lexer lexer = createLexer(code, CSVFormat.DEFAULT)) {
assertThat(lexer.nextToken(new Token()), not(isQuoted()));
assertThat(lexer.nextToken(new Token()), not(isQuoted()));
assertThat(lexer.nextToken(new Token()), isQuoted());
assertThat(lexer.nextToken(new Token()), not(isQuoted()));
assertThat(lexer.nextToken(new Token()), matches(EOF, ""));
}
}
}

0 comments on commit bae4999

Please sign in to comment.