Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
321 changes: 141 additions & 180 deletions src/main/java/org/apache/commons/csv/CSVFormat.java

Large diffs are not rendered by default.

185 changes: 109 additions & 76 deletions src/main/java/org/apache/commons/csv/CSVParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -618,57 +618,78 @@ private Headers createHeaders() throws IOException {
final String[] formatHeader = format.getHeader();
if (formatHeader != null) {
headerMap = createEmptyHeaderMap();
String[] headerRecord = null;
if (formatHeader.length == 0) {
// read the header from the first line of the file
final CSVRecord nextRecord = nextRecord();
if (nextRecord != null) {
headerRecord = nextRecord.values();
headerComment = nextRecord.getComment();
}
} else {
if (format.getSkipHeaderRecord()) {
final CSVRecord nextRecord = nextRecord();
if (nextRecord != null) {
headerComment = nextRecord.getComment();
}
}
headerRecord = formatHeader;
}
// build the name to index mappings
if (headerRecord != null) {
// Track an occurrence of a null, empty or blank header.
boolean observedMissing = false;
for (int i = 0; i < headerRecord.length; i++) {
final String header = headerRecord[i];
final boolean blankHeader = CSVFormat.isBlank(header);
if (blankHeader && !format.getAllowMissingColumnNames()) {
throw new IllegalArgumentException("A header name is missing in " + Arrays.toString(headerRecord));
}
final boolean containsHeader = blankHeader ? observedMissing : headerMap.containsKey(header);
final DuplicateHeaderMode headerMode = format.getDuplicateHeaderMode();
final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;
if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) {
throw new IllegalArgumentException(String.format(
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
header, Arrays.toString(headerRecord)));
}
observedMissing |= blankHeader;
if (header != null) {
headerMap.put(header, Integer.valueOf(i)); // Explicit boxing is intentional
if (headerNames == null) {
headerNames = new ArrayList<>(headerRecord.length);
}
headerNames.add(header);
}
}
}
final String[] headerRecord = resolveHeaderRecord(formatHeader);
headerNames = buildHeaderNames(headerMap, headerRecord);
}
// Make header names Collection immutable
return new Headers(headerMap, headerNames == null ? Collections.emptyList() : Collections.unmodifiableList(headerNames));
}

private String[] resolveHeaderRecord(final String[] formatHeader) throws IOException {
if (formatHeader.length == 0) {
return readHeaderRecord();
}
if (format.getSkipHeaderRecord()) {
skipHeaderRecord();
}
return formatHeader;
}

private String[] readHeaderRecord() throws IOException {
// read the header from the first line of the file
final CSVRecord nextRecord = nextRecord();
if (nextRecord == null) {
return null;
}
headerComment = nextRecord.getComment();
return nextRecord.values();
}

private void skipHeaderRecord() throws IOException {
final CSVRecord nextRecord = nextRecord();
if (nextRecord != null) {
headerComment = nextRecord.getComment();
}
}

private List<String> buildHeaderNames(final Map<String, Integer> headerMap, final String[] headerRecord) {
if (headerRecord == null) {
return null;
}
List<String> headerNames = null;
boolean observedMissing = false;
for (int i = 0; i < headerRecord.length; i++) {
final String header = headerRecord[i];
final boolean blankHeader = CSVFormat.isBlank(header);
validateHeader(headerRecord, header, blankHeader, observedMissing, headerMap);
observedMissing |= blankHeader;
if (header != null) {
headerMap.put(header, Integer.valueOf(i)); // Explicit boxing is intentional
if (headerNames == null) {
headerNames = new ArrayList<>(headerRecord.length);
}
headerNames.add(header);
}
}
return headerNames;
}

private void validateHeader(final String[] headerRecord, final String header, final boolean blankHeader, final boolean observedMissing,
final Map<String, Integer> headerMap) {
if (blankHeader && !format.getAllowMissingColumnNames()) {
throw new IllegalArgumentException("A header name is missing in " + Arrays.toString(headerRecord));
}
final boolean containsHeader = blankHeader ? observedMissing : headerMap.containsKey(header);
final DuplicateHeaderMode headerMode = format.getDuplicateHeaderMode();
final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;
if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) {
throw new IllegalArgumentException(String.format(
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", header,
Arrays.toString(headerRecord)));
}
}

/**
* Gets the current line number in the input stream.
*
Expand Down Expand Up @@ -905,43 +926,55 @@ CSVRecord nextRecord() throws IOException {
do {
reusableToken.reset();
lexer.nextToken(reusableToken);
switch (reusableToken.type) {
case TOKEN:
addRecordValue(false);
break;
case EORECORD:
addRecordValue(true);
break;
case EOF:
if (reusableToken.isReady) {
addRecordValue(true);
} else if (sb != null) {
trailerComment = sb.toString();
}
break;
case INVALID:
throw new CSVException("(line %,d) invalid parse sequence", getCurrentLineNumber());
case COMMENT: // Ignored currently
if (sb == null) { // first comment for this record
sb = new StringBuilder();
} else {
sb.append(Constants.LF);
}
sb.append(reusableToken.content);
reusableToken.type = TOKEN; // Read another token
break;
default:
throw new CSVException("Unexpected Token type: %s", reusableToken.type);
}
sb = handleToken(sb);
} while (reusableToken.type == TOKEN);
if (!recordList.isEmpty()) {
recordNumber++;
result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), Objects.toString(sb, null), recordNumber, startCharPosition,
startBytePosition);
result = createRecord(sb, startCharPosition, startBytePosition);
}
return result;
}

private StringBuilder handleToken(final StringBuilder comments) throws IOException {
switch (reusableToken.type) {
case TOKEN:
addRecordValue(false);
return comments;
case EORECORD:
addRecordValue(true);
return comments;
case EOF:
handleEndOfFile(comments);
return comments;
case INVALID:
throw new CSVException("(line %,d) invalid parse sequence", getCurrentLineNumber());
case COMMENT:
return appendComment(comments);
default:
throw new CSVException("Unexpected Token type: %s", reusableToken.type);
}
}

private void handleEndOfFile(final StringBuilder comments) {
if (reusableToken.isReady) {
addRecordValue(true);
} else if (comments != null) {
trailerComment = comments.toString();
}
}

private StringBuilder appendComment(final StringBuilder comments) {
final StringBuilder commentBuffer = comments == null ? new StringBuilder() : comments.append(Constants.LF);
commentBuffer.append(reusableToken.content);
reusableToken.type = TOKEN; // Read another token
return commentBuffer;
}

private CSVRecord createRecord(final StringBuilder comments, final long startCharPosition, final long startBytePosition) {
return new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), Objects.toString(comments, null), recordNumber, startCharPosition,
startBytePosition);
}

/**
* Returns a sequential {@code Stream} with this collection as its source.
* <p>
Expand Down
54 changes: 36 additions & 18 deletions src/main/java/org/apache/commons/csv/CSVPrinter.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,20 @@ public CSVPrinter(final Appendable appendable, final CSVFormat format) throws IO
this.format = format.copy();
// TODO: Is it a good idea to do this here instead of on the first call to a print method?
// It seems a pain to have to track whether the header has already been printed or not.
printHeaderComments(format);
printHeaderRecord(format);
}

private void printHeaderComments(final CSVFormat format) throws IOException {
final String[] headerComments = format.getHeaderComments();
if (headerComments != null) {
for (final String line : headerComments) {
printComment(line);
}
}
}

private void printHeaderRecord(final CSVFormat format) throws IOException {
if (format.getHeader() != null && !format.getSkipHeaderRecord()) {
this.printRecord((Object[]) format.getHeader());
}
Expand Down Expand Up @@ -235,8 +243,7 @@ public void printComment(final String comment) throws IOException {
if (!newRecord) {
println();
}
appendable.append(format.getCommentMarker().charValue()); // Explicit unboxing is intentional
appendable.append(SP);
appendCommentStart();
for (int i = 0; i < comment.length(); i++) {
final char c = comment.charAt(i);
switch (c) {
Expand All @@ -247,8 +254,7 @@ public void printComment(final String comment) throws IOException {
// falls-through: break intentionally excluded.
case LF:
println();
appendable.append(format.getCommentMarker().charValue()); // Explicit unboxing is intentional
appendable.append(SP);
appendCommentStart();
break;
default:
appendable.append(c);
Expand All @@ -261,6 +267,11 @@ public void printComment(final String comment) throws IOException {
}
}

private void appendCommentStart() throws IOException {
appendable.append(format.getCommentMarker().charValue()); // Explicit unboxing is intentional
appendable.append(SP);
}

/**
* Prints headers for a result set based on its metadata.
*
Expand Down Expand Up @@ -491,27 +502,34 @@ public void printRecords(final ResultSet resultSet) throws SQLException, IOExcep
while (resultSet.next() && format.useRow(resultSet.getRow())) {
lock.lock();
try {
for (int i = 1; i <= columnCount; i++) {
final Object object = resultSet.getObject(i);
if (object instanceof Clob) {
try (Reader reader = ((Clob) object).getCharacterStream()) {
print(reader);
}
} else if (object instanceof Blob) {
try (InputStream inputStream = ((Blob) object).getBinaryStream()) {
print(inputStream);
}
} else {
print(object);
}
}
printResultSetRow(resultSet, columnCount);
endOfRecord();
} finally {
lock.unlock();
}
}
}

private void printResultSetRow(final ResultSet resultSet, final int columnCount) throws SQLException, IOException {
for (int i = 1; i <= columnCount; i++) {
printResultSetValue(resultSet.getObject(i));
}
}

private void printResultSetValue(final Object object) throws SQLException, IOException {
if (object instanceof Clob) {
try (Reader reader = ((Clob) object).getCharacterStream()) {
print(reader);
}
} else if (object instanceof Blob) {
try (InputStream inputStream = ((Blob) object).getBinaryStream()) {
print(inputStream);
}
} else {
print(object);
}
}

/**
* Prints all the objects with metadata in the given JDBC result set based on the header boolean.
* <p>
Expand Down
Loading