Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Fix newline right after quote #6

Open
wants to merge 3 commits into from

1 participant

@tivv

PIG-2556 PiggyBank CSVExcelStorage treats "\nData" field incorrectly in multiline mode

@tivv

Actually there is one more problem: "Test ""quoted""\ndata" also does not work OK. Fixed in second commit.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Aug 17, 2012
  1. @tivv

    Fix newline right after quote

    tivv authored
  2. @tivv
Commits on Aug 20, 2012
  1. @tivv

    Fix indentation

    tivv authored
This page is out of date. Refresh to see the latest.
View
32 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
@@ -478,7 +478,6 @@ public Tuple getNext() throws IOException {
mProtoTuple.clear();
getNextInQuotedField = false;
evenQuotesSeen = true;
- sawEmbeddedRecordDelimiter = false;
getNextFieldID = 0;
recordLen = prevLineAndContinuation.length;
@@ -494,16 +493,14 @@ public Tuple getNext() throws IOException {
recordLen = value.getLength();
}
- sawEmbeddedRecordDelimiter = false;
-
nextTupleSkipChar = false;
ByteBuffer fieldBuffer = ByteBuffer.allocate(recordLen);
- sawEmbeddedRecordDelimiter = processOneInRecord(evenQuotesSeen,
- sawEmbeddedRecordDelimiter, buf, recordLen, fieldBuffer);
-
- // The last field is never delimited by a FIELD_DEL, but by
+ sawEmbeddedRecordDelimiter = processOneInRecord(evenQuotesSeen,
+ buf, recordLen, fieldBuffer);
+
+ // The last field is never delimited by a FIELD_DEL, but by
// the end of the record. So we need to add that last field.
// The '!sawEmbeddedRecordDelimiter' handles the case of
// embedded newlines; we are amidst a field, not at
@@ -567,9 +564,9 @@ public Tuple getNext() throws IOException {
* @param fieldBuffer
* @return
*/
- private boolean processOneInRecord(boolean evenQuotesSeen,
- boolean sawEmbeddedRecordDelimiter, byte[] buf, int recordLen,
- ByteBuffer fieldBuffer) {
+ private boolean processOneInRecord(boolean evenQuotesSeen,
+ byte[] buf, int recordLen,
+ ByteBuffer fieldBuffer) {
for (int i = 0; i < recordLen; i++) {
if (nextTupleSkipChar) {
nextTupleSkipChar = false;
@@ -588,17 +585,6 @@ private boolean processOneInRecord(boolean evenQuotesSeen,
if (evenQuotesSeen) {
fieldBuffer.put(DOUBLE_QUOTE);
}
- } else if (i == recordLen - 1) {
- // This is the last char we read from the input stream,
- // but we have an open double quote.
- // We either have a run-away quoted field (i.e. a missing
- // closing field in the record), or we have a field with
- // a record delimiter in it. We assume the latter,
- // and cause the outer while loop to run again, reading
- // more from the stream. Write out the delimiter:
- fieldBuffer.put(b);
- sawEmbeddedRecordDelimiter = true;
- continue;
} else
if (!evenQuotesSeen &&
(b == FIELD_DEL || b == RECORD_DEL)) {
@@ -624,8 +610,8 @@ private boolean processOneInRecord(boolean evenQuotesSeen,
evenQuotesSeen = true;
fieldBuffer.put(b);
}
- } // end for
- return sawEmbeddedRecordDelimiter && (multilineTreatment == Multiline.YES);
+ } // end for
+ return getNextInQuotedField && (multilineTreatment == Multiline.YES);
}
private void readField(ByteBuffer buf, int fieldID) {
View
8 contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
@@ -69,6 +69,8 @@
",,",
"\"Mac \"\"the knife\"\"\",Cohen,30",
"\"Conrad\nEmil\",Dinger,40",
+ "Emil,\"\nDinger\",40",
+ "Quote problem,\"My \"\"famous\"\"\nsong\",60",
"1st Field,\"A poem that continues\nfor several lines\ndo we\nhandle that?\",Good,Fairy",
};
@@ -81,6 +83,8 @@
add(Util.createTuple(new String[] {"", "", ""}));
add(Util.createTuple(new String[] {"Mac \"the knife\"", "Cohen", "30"}));
add(Util.createTuple(new String[] {"Conrad\nEmil", "Dinger", "40"}));
+ add(Util.createTuple(new String[] {"Emil", "\nDinger", "40"}));
+ add(Util.createTuple(new String[] {"Quote problem", "My \"famous\"\nsong", "60"}));
add(Util.createTuple(new String[] {"1st Field", "A poem that continues\nfor several lines\ndo we\nhandle that?", "Good", "Fairy"}));
}
};
@@ -95,6 +99,10 @@
add(Util.createTuple(new String[] {"Mac \"the knife\"", "Cohen", "30"}));
add(Util.createTuple(new String[] {"Conrad"}));
add(Util.createTuple(new String[] {"Emil,Dinger,40"})); // Trailing double quote after Emil eats rest of line
+ add(Util.createTuple(new String[] {"Emil"}));
+ add(Util.createTuple(new String[] {"Dinger,40"})); // Trailing double quote after Emil eats rest of line
+ add(Util.createTuple(new String[] {"Quote problem", "My \"famous\""}));
+ add(Util.createTuple(new String[] {"song,60"}));
add(Util.createTuple(new String[] {"1st Field", "A poem that continues"}));
add(Util.createTuple(new String[] {"for several lines"}));
add(Util.createTuple(new String[] {"do we"}));
Something went wrong with that request. Please try again.