Skip to content

Commit

Permalink
[CSV-265] Update buffer position when reading line comment (#120)
Browse files Browse the repository at this point in the history
* [CSV-265] Add JiraCsv265Test

* [CSV-265] Update buffer position when reading line comment

* Update JiraCsv265Test.java

File should end in a new line.

Co-authored-by: Tyler King <tylerking001@hotmail.com>
Co-authored-by: Gary Gregory <garydgregory@users.noreply.github.com>
  • Loading branch information
3 people committed Jul 21, 2021
1 parent 399204c commit a4c6037
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 13 deletions.
34 changes: 21 additions & 13 deletions src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
Expand Up @@ -198,29 +198,37 @@ public int read(final char[] buf, final int offset, final int length) throws IOE
}

/**
* Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
* when processing a comment, otherwise information can be lost.
* Gets the next line, dropping the line terminator(s). This method should only be called when processing a
* comment, otherwise information can be lost.
* <p>
* Increments {@link #eolCounter}.
* Increments {@link #eolCounter} and updates {@link #position}.
* </p>
* <p>
* Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise to LF.
* Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise the last EOL character.
* </p>
*
* @return the line that was read, or null if reached EOF.
*/
@Override
public String readLine() throws IOException {
final String line = super.readLine();

if (line != null) {
lastChar = LF; // needed for detecting start of line
eolCounter++;
} else {
lastChar = END_OF_STREAM;
if (lookAhead() == END_OF_STREAM) {
return null;
}

return line;
final StringBuilder buffer = new StringBuilder();
while (true) {
final int current = read();
if (current == CR) {
final int next = lookAhead();
if (next == LF) {
read();
}
}
if (current == END_OF_STREAM || current == LF || current == CR) {
break;
}
buffer.append((char) current);
}
return buffer.toString();
}

}
88 changes: 88 additions & 0 deletions src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java
@@ -0,0 +1,88 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.commons.csv.issues;

import static org.junit.jupiter.api.Assertions.assertEquals;

import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.junit.jupiter.api.Test;

/**
* Tests [CSV-265] {@link CSVRecord#getCharacterPosition()} returns the correct position after encountering a comment.
*/
public class JiraCsv265Test {

@Test
public void testCharacterPositionWithComments() throws IOException {
// @formatter:off
final String csv = "# Comment1\n"
+ "Header1,Header2\n"
+ "# Comment2\n"
+ "Value1,Value2\n"
+ "# Comment3\n"
+ "Value3,Value4\n"
+ "# Comment4\n";
final CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
.setCommentMarker('#')
.setHeader()
.setSkipHeaderRecord(true)
.build();
// @formatter:on
try (final CSVParser parser = csvFormat.parse(new StringReader(csv))) {
final Iterator<CSVRecord> itr = parser.iterator();
final CSVRecord record1 = itr.next();
assertEquals(csv.indexOf("# Comment2"), record1.getCharacterPosition());
final CSVRecord record2 = itr.next();
assertEquals(csv.indexOf("# Comment3"), record2.getCharacterPosition());
}
}

@Test
public void testCharacterPositionWithCommentsSpanningMultipleLines() throws IOException {
// @formatter:off
final String csv = "# Comment1\n"
+ "# Comment2\n"
+ "Header1,Header2\n"
+ "# Comment3\n"
+ "# Comment4\n"
+ "Value1,Value2\n"
+ "# Comment5\n"
+ "# Comment6\n"
+ "Value3,Value4";
final CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
.setCommentMarker('#')
.setHeader()
.setSkipHeaderRecord(true)
.build();
// @formatter:on
try (final CSVParser parser = csvFormat.parse(new StringReader(csv))) {
final Iterator<CSVRecord> itr = parser.iterator();
final CSVRecord record1 = itr.next();
assertEquals(csv.indexOf("# Comment3"), record1.getCharacterPosition());
final CSVRecord record2 = itr.next();
assertEquals(csv.indexOf("# Comment5"), record2.getCharacterPosition());
}
}

}

0 comments on commit a4c6037

Please sign in to comment.