Skip to content

Commit

Permalink
Merge pull request #3842 from ctabin/csv-quoted-nulls
Browse files Browse the repository at this point in the history
Adds support of quotedNulls in CSV handling
  • Loading branch information
katzyn committed Jul 18, 2023
2 parents 2a921a7 + 9a0d1bb commit e493084
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 12 deletions.
4 changes: 3 additions & 1 deletion h2/src/main/org/h2/res/help.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3382,9 +3382,11 @@ The following options are supported:

""lineSeparator"" (the line separator used for writing; ignored for reading),

""null"", Support reading existing CSV files that contain explicit ""null"" delimiters.
""null"" Support reading existing CSV files that contain explicit ""null"" delimiters.
Note that an empty, unquoted values are also treated as null.

""quotedNulls"" (quotes the nullString. true of false; disabled by default),

""preserveWhitespace"" (true or false; disabled by default),

""writeColumnHeader"" (true or false; enabled by default).
Expand Down
43 changes: 38 additions & 5 deletions h2/src/main/org/h2/tools/Csv.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ public class Csv implements SimpleRowSource {
private char lineComment;
private String lineSeparator = System.lineSeparator();
private String nullString = "";
private boolean quotedNulls = false;

private String fileName;
private BufferedReader input;
Expand Down Expand Up @@ -270,8 +271,14 @@ private void writeRow(String[] values) throws IOException {
} else {
output.write(s);
}
} else if (nullString != null && nullString.length() > 0) {
output.write(nullString);
} else if (nullString != null) {
if (quotedNulls && fieldDelimiter != 0) {
output.write(fieldDelimiter);
output.write(nullString);
output.write(fieldDelimiter);
} else {
output.write(nullString);
}
}
}
output.write(lineSeparator);
Expand Down Expand Up @@ -537,6 +544,7 @@ public Object[] readRow() throws SQLException {
if (input == null) {
return null;
}

String[] row = new String[columnNames.length];
try {
int i = 0;
Expand All @@ -558,9 +566,13 @@ public Object[] readRow() throws SQLException {
// Empty Strings should be NULL
// in order to prevent conversion of zero-length String
// to Number
row[i++] = v!=null && v.length() > 0
? v
: null;
if (quotedNulls) {
row[i++] = v != null && !v.equals(nullString)
? v
: null;
} else {
row[i++] = v;
}
}
if (endOfLine) {
break;
Expand Down Expand Up @@ -748,6 +760,25 @@ public String getLineSeparator() {
return lineSeparator;
}

/**
* Defines if the {@link #setNullString(java.lang.String) null values} must
* be quoted.
*
* @param quotedNulls True if the null values must be quoted.
*/
public void setQuotedNulls(boolean quotedNulls) {
this.quotedNulls = quotedNulls;
}

/**
* Returns true if the {@link #getNullString() null values} are quoted.
*
* @return True if the null values are quoted.
*/
public boolean isQuotedNulls() {
return quotedNulls;
}

/**
* Set the value that represents NULL. It is only used for non-delimited
* values.
Expand Down Expand Up @@ -834,6 +865,8 @@ public String setOptions(String options) {
setLineSeparator(value);
} else if (isParam(key, "null", "nullString")) {
setNullString(value);
} else if (isParam(key, "quotedNulls")) {
setQuotedNulls(Utils.parseBoolean(value, false, false));
} else if (isParam(key, "charset", "characterSet")) {
charset = value;
} else if (isParam(key, "preserveWhitespace")) {
Expand Down
93 changes: 87 additions & 6 deletions h2/src/test/org/h2/test/db/TestCsv.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
Expand All @@ -21,6 +22,7 @@
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.Random;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -73,6 +75,12 @@ public void test() throws Exception {
testPipe();
testReadEmptyNumbers1();
testReadEmptyNumbers2();
testCsvQuotedString1();
testCsvQuotedString2();
testCsvQuotedString3();
testCsvQuotedString4();
testCsvQuotedString5();
testCsvQuotedString6();
deleteDb("csv");
}

Expand Down Expand Up @@ -319,7 +327,7 @@ private void testNull() throws Exception {
assertEquals("D", meta.getColumnLabel(4));
assertTrue(rs.next());
assertEquals(null, rs.getString(1));
assertEquals(null, rs.getString(2));
assertEquals("", rs.getString(2));
// null is never quoted
assertEquals("\\N", rs.getString(3));
// an empty string is always parsed as null
Expand Down Expand Up @@ -369,8 +377,8 @@ private void testRandomData() throws SQLException {
for (int i = 0; i < len; i++) {
assertTrue(rs.next());
String[] pair = list.get(i);
assertEquals(pair[0]!=null && pair[0].isEmpty() ? null : pair[0], rs.getString(1));
assertEquals(pair[1]!=null && pair[1].isEmpty() ? null : pair[1], rs.getString(2));
assertEquals(pair[0], rs.getString(1));
assertEquals(pair[1], rs.getString(2));
}
assertFalse(rs.next());
conn.close();
Expand Down Expand Up @@ -523,7 +531,7 @@ private void testRead() throws Exception {
assertEquals(null, rs.getString(1));
assertEquals("abc\"", rs.getString(2));
assertEquals(null, rs.getString(3));
assertEquals(null, rs.getString(4));
assertEquals("", rs.getString(4));
assertTrue(rs.next());
assertEquals("1", rs.getString(1));
assertEquals("2", rs.getString(2));
Expand Down Expand Up @@ -603,7 +611,9 @@ private void testReadEmptyNumbers1() throws Exception {
out.write(b, 0, b.length);
out.close();

ResultSet rs = new Csv().read(fileName, null, "UTF8");
Csv csv = new Csv();
csv.setQuotedNulls(true);
ResultSet rs = csv.read(fileName, null, "UTF8");
assertTrue(rs.next());
assertNotNull(rs.getString(1));

Expand Down Expand Up @@ -634,9 +644,80 @@ private void testReadEmptyNumbers2() throws Exception {
Connection conn = DriverManager.getConnection("jdbc:h2:mem:test");
Statement stat = conn.createStatement();
stat.execute("CREATE TABLE TEST(TEST DECIMAL(12,2) NULL)");
stat.execute("INSERT INTO TEST SELECT * FROM CsvRead('" + fileName + "')");
stat.execute("INSERT INTO TEST SELECT * FROM CsvRead('" + fileName + "', NULL, 'quotedNulls=true')");

FileUtils.delete(fileName);
}

private void testCsvQuotedString1() throws Exception { testCsvQuotedNullStrings(false, "NULL"); }
private void testCsvQuotedString2() throws Exception { testCsvQuotedNullStrings(true, "NULL"); }
private void testCsvQuotedString3() throws Exception { testCsvQuotedNullStrings(false, ""); }
private void testCsvQuotedString4() throws Exception { testCsvQuotedNullStrings(true, ""); }
private void testCsvQuotedString5() throws Exception { testCsvQuotedNullStrings(false, "$empty"); }
private void testCsvQuotedString6() throws Exception { testCsvQuotedNullStrings(true, "$empty"); }

private void testCsvQuotedNullStrings(boolean quotedStrings, String nullString) throws Exception {
String fileName = getBaseDir() + "/test.csv";
FileUtils.delete(fileName);

deleteDb("csv");
Connection conn = DriverManager.getConnection("jdbc:h2:mem:test");
Statement stat = conn.createStatement();
stat.execute("DROP TABLE IF EXISTS TEST");
stat.execute("CREATE TABLE TEST(ID char(2) NOT NULL, NAME varchar(255), HEIGHT integer, BIRTHDATE date, PRIMARY KEY (ID))");
stat.execute("INSERT INTO TEST VALUES('01', 'Penrosed Roberto', 511, '1958-03-29')");
stat.execute("INSERT INTO TEST VALUES('02', NULL, 512, '1975-07-12')");
stat.execute("INSERT INTO TEST VALUES('03', 'Smith John', NULL, '1971-11-03')");
stat.execute("INSERT INTO TEST VALUES('04', 'Hatchet Eve', 500, NULL)");
stat.execute("INSERT INTO TEST VALUES('05', NULL, NULL, NULL)");
stat.execute("CALL CSVWRITE('" + fileName + "', 'SELECT * FROM TEST ORDER BY ID','quotedNulls=" + quotedStrings + " nullString=" + nullString + "')");

InputStream fis = FileUtils.newInputStream(fileName);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int read = fis.read(buffer);
while (read >= 0) {
baos.write(buffer, 0, read);
read = fis.read(buffer);
}
baos.close();
fis.close();

String csvWrittenContent = new String(baos.toByteArray());
if (quotedStrings) {
assertTrue(csvWrittenContent.contains("\""+nullString+"\""));
} else {
assertTrue(csvWrittenContent.contains(nullString));
assertFalse(csvWrittenContent.contains("\""+nullString+"\""));
}

stat.execute("DELETE FROM TEST");
stat.execute("INSERT INTO TEST SELECT * FROM CSVREAD('" + fileName + "', NULL, 'quotedNulls="+quotedStrings+" nullString="+nullString+"')");

//check imported results
ResultSet rs = stat.executeQuery("SELECT * FROM TEST ORDER BY ID");
for (int i = 1 ; i <= 5 ; ++i) {
assertTrue("Missing record " + i, rs.next());

if (i == 1) {
assertEquals("Penrosed Roberto", rs.getString("NAME"));
assertEquals(511, rs.getInt("HEIGHT"));
assertEquals(LocalDate.of(1958, 3, 29), rs.getObject("BIRTHDATE", LocalDate.class));
} else if (i == 2) {
assertNull(rs.getString("NAME"));
} else if (i == 3) {
assertNull(rs.getObject("HEIGHT"));
} else if (i == 4) {
assertNull(rs.getDate("BIRTHDATE"));
} else {
assertNull(rs.getString("NAME"));
assertNull(rs.getObject("HEIGHT"));
assertNull(rs.getDate("BIRTHDATE"));
}
}
rs.close();

FileUtils.delete(fileName);
}
}

0 comments on commit e493084

Please sign in to comment.