Skip to content
Permalink
Browse files
CSV-278: Reuse Buffers in Lexer for Delimiter Detection (#162)
* CSV-278: Reuse Buffers in Lexer for Delimiter Detection

* Remove erroneous tab character

* Reduce change set with fewer formatting changes

* Reduce change set with fewer formatting changes
  • Loading branch information
belugabehr committed Jul 15, 2021
1 parent a4e005f commit 3ac702b190fd04c56118cb03aa87577f0a6a86f7
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 12 deletions.
@@ -132,6 +132,21 @@ int lookAhead() throws IOException {
*/
char[] lookAhead(final int n) throws IOException {
final char[] buf = new char[n];
return lookAhead(buf);
}

/**
* Populates the buffer with the next {@code buf.length} characters in the
* current reader without consuming them. The next call to {@link #read()} will
* still return the next value. This doesn't affect line number or last
* character.
*
* @param buf the buffer to fill for the look ahead.
* @return the buffer itself
* @throws IOException If an I/O error occurs
*/
char[] lookAhead(final char[] buf) throws IOException {
final int n = buf.length;
super.mark(n);
super.read(buf, 0, n);
super.reset();
@@ -49,6 +49,8 @@ final class Lexer implements Closeable {
private static final char DISABLED = '\ufffe';

private final char[] delimiter;
private final char[] delimiterBuf;
private final char[] escapeDelimiterBuf;
private final char escape;
private final char quoteChar;
private final char commentStart;
@@ -68,6 +70,8 @@ final class Lexer implements Closeable {
this.commentStart = mapNullToDisabled(format.getCommentMarker());
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
this.delimiterBuf = new char[delimiter.length - 1];
this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
}

/**
@@ -112,7 +116,7 @@ boolean isCommentStart(final int ch) {
}

/**
* Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(int)}
* Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}.
*
* @param ch
* the current character.
@@ -126,14 +130,13 @@ boolean isDelimiter(final int ch) throws IOException {
if (delimiter.length == 1) {
return true;
}
final int len = delimiter.length - 1;
final char[] buf = reader.lookAhead(len);
for (int i = 0; i < len; i++) {
if (buf[i] != delimiter[i+1]) {
reader.lookAhead(delimiterBuf);
for (int i = 0; i < delimiterBuf.length; i++) {
if (delimiterBuf[i] != delimiter[i+1]) {
return false;
}
}
final int count = reader.read(buf, 0, len);
final int count = reader.read(delimiterBuf, 0, delimiterBuf.length);
return count != END_OF_STREAM;
}

@@ -156,25 +159,24 @@ boolean isEscape(final int ch) {
}

/**
* Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(int)}.
* Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}.
*
* For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]".
*
* @return true if the next characters constitute a escape delimiter.
* @throws IOException If an I/O error occurs.
*/
boolean isEscapeDelimiter() throws IOException {
final int len = 2 * delimiter.length - 1;
final char[] buf = reader.lookAhead(len);
if (buf[0] != delimiter[0]) {
reader.lookAhead(escapeDelimiterBuf);
if (escapeDelimiterBuf[0] != delimiter[0]) {
return false;
}
for (int i = 1; i < delimiter.length; i++) {
if (buf[2 * i] != delimiter[i] || buf[2 * i - 1] != escape) {
if (escapeDelimiterBuf[2 * i] != delimiter[i] || escapeDelimiterBuf[2 * i - 1] != escape) {
return false;
}
}
final int count = reader.read(buf, 0, len);
final int count = reader.read(escapeDelimiterBuf, 0, escapeDelimiterBuf.length);
return count != END_OF_STREAM;
}

0 comments on commit 3ac702b

Please sign in to comment.