Skip to content

Commit

Permalink
Test to fail how quote/escape configurations are applied to CsvTokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
dmikurube committed Apr 28, 2023
1 parent 543d5d1 commit cdd9ce3
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,10 @@ static class CsvRecordValidateException extends DataException {
}
}

static CsvTokenizer.Builder buildCsvTokenizerBuilderForTesting(final PluginTask task) {
return buildCsvTokenizerBuilder(task);
}

private static CsvTokenizer.Builder buildCsvTokenizerBuilder(final PluginTask task) {
try {
final CsvTokenizer.Builder builder = CsvTokenizer.builder(task.getDelimiter());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.embulk.config.ConfigException;
import org.embulk.config.ConfigSource;
import org.embulk.util.config.ConfigMapperFactory;
import org.embulk.util.csv.CsvTokenizer;
import org.embulk.util.text.Newline;
import org.junit.Rule;
import org.junit.Test;
Expand Down Expand Up @@ -86,4 +87,138 @@ public void checkLoadConfig() {
assertEquals(Optional.of(new CsvParserPlugin.QuoteCharacter('\\')), task.getQuoteChar());
assertEquals(true, task.getAllowOptionalColumns());
}

@SuppressWarnings("deprecation")
@Test
public void testCsvTokenizerQuoteBackslash() {
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
.set("charset", "utf-16")
.set("newline", "LF")
.set("header_line", true)
.set("delimiter", "\t")
.set("quote", "\\")
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
final CsvParserPlugin.PluginTask task =
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);

final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
assertEquals('\\', builder.peekQuote());
}

@SuppressWarnings("deprecation")
@Test
public void testCsvTokenizerQuoteQuotation() {
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
.set("charset", "utf-16")
.set("newline", "LF")
.set("header_line", true)
.set("delimiter", "\t")
.set("quote", "\"")
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
final CsvParserPlugin.PluginTask task =
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);

final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
assertEquals('\"', builder.peekQuote());
}

@SuppressWarnings("deprecation")
@Test
public void testCsvTokenizerQuoteUnspecified() {
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
.set("charset", "utf-16")
.set("newline", "LF")
.set("header_line", true)
.set("delimiter", "\t")
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
final CsvParserPlugin.PluginTask task =
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);

final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
assertEquals('\"', builder.peekQuote());
}

@SuppressWarnings("deprecation")
@Test
public void testCsvTokenizerQuoteNull() {
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
.set("charset", "utf-16")
.set("newline", "LF")
.set("header_line", true)
.set("delimiter", "\t")
.setNested("quote", null) // #setNested is needed to set null
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
final CsvParserPlugin.PluginTask task =
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);

final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
assertEquals(CsvTokenizer.NO_QUOTE, builder.peekQuote());
}

@SuppressWarnings("deprecation")
@Test
public void testCsvTokenizerEscapeBackslash() {
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
.set("charset", "utf-16")
.set("newline", "LF")
.set("header_line", true)
.set("delimiter", "\t")
.set("escape", "\\")
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
final CsvParserPlugin.PluginTask task =
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);

final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
assertEquals('\\', builder.peekEscape());
}

@SuppressWarnings("deprecation")
@Test
public void testCsvTokenizerEscapeSlash() {
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
.set("charset", "utf-16")
.set("newline", "LF")
.set("header_line", true)
.set("delimiter", "\t")
.set("escape", "/")
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
final CsvParserPlugin.PluginTask task =
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);

final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
assertEquals('/', builder.peekEscape());
}

@SuppressWarnings("deprecation")
@Test
public void testCsvTokenizerEscapeUnspecified() {
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
.set("charset", "utf-16")
.set("newline", "LF")
.set("header_line", true)
.set("delimiter", "\t")
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
final CsvParserPlugin.PluginTask task =
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);

final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
assertEquals('\\', builder.peekEscape());
}

@SuppressWarnings("deprecation")
@Test
public void testCsvTokenizerEscapeNull() {
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
.set("charset", "utf-16")
.set("newline", "LF")
.set("header_line", true)
.set("delimiter", "\t")
.setNested("escape", null) // #setNested is needed to set null
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
final CsvParserPlugin.PluginTask task =
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);

final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
assertEquals(CsvTokenizer.NO_ESCAPE, builder.peekEscape());
}
}

0 comments on commit cdd9ce3

Please sign in to comment.