From f0fffbb846370c27b7f707538fc59b84b3b99867 Mon Sep 17 00:00:00 2001 From: neo4j-oss-build Date: Tue, 5 Jul 2022 02:46:29 -0700 Subject: [PATCH] Fixes #2932: The apoc.import.csv skipLines config doesn't work correctly (#2984) (#3012) Co-authored-by: Giuseppe Villani --- .../java/apoc/export/csv/CsvEntityLoader.java | 11 ++++++-- .../java/apoc/export/csv/CsvLoaderConfig.java | 3 ++- .../java/apoc/export/csv/ImportCsvTest.java | 25 +++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/apoc/export/csv/CsvEntityLoader.java b/core/src/main/java/apoc/export/csv/CsvEntityLoader.java index 4d7d25fb03..7283c86e92 100644 --- a/core/src/main/java/apoc/export/csv/CsvEntityLoader.java +++ b/core/src/main/java/apoc/export/csv/CsvEntityLoader.java @@ -7,7 +7,9 @@ import apoc.load.Mapping; import apoc.load.util.Results; import apoc.util.FileUtils; +import com.opencsv.CSVParserBuilder; import com.opencsv.CSVReader; +import com.opencsv.CSVReaderBuilder; import org.neo4j.graphdb.*; import org.neo4j.logging.Log; @@ -48,7 +50,6 @@ public void loadNodes(final Object fileName, final List labels, final Gr try (final CountingReader reader = FileUtils.readerFor(fileName, clc.getCompressionAlgo())) { final String header = readFirstLine(reader); - reader.skip(clc.getSkipLines() - 1); final List fields = CsvHeaderFields.processHeader(header, clc.getDelimiter(), clc.getQuotationCharacter()); final Optional idField = fields.stream() @@ -67,7 +68,13 @@ public void loadNodes(final Object fileName, final List labels, final Gr final Map mapping = getMapping(fields); - final CSVReader csv = new CSVReader(reader, clc.getDelimiter(), clc.getQuotationCharacter()); + final CSVReader csv = new CSVReaderBuilder(reader) + .withCSVParser(new CSVParserBuilder() + .withSeparator(clc.getDelimiter()) + .withQuoteChar(clc.getQuotationCharacter()) + .build()) + .withSkipLines(clc.getSkipLines() - 1) + .build(); final String[] loadCsvCompatibleHeader = fields.stream().map(f -> f.getName()).toArray(String[]::new); int lineNo = 0; diff --git a/core/src/main/java/apoc/export/csv/CsvLoaderConfig.java b/core/src/main/java/apoc/export/csv/CsvLoaderConfig.java index f5034c083e..2b375c9d08 100644 --- a/core/src/main/java/apoc/export/csv/CsvLoaderConfig.java +++ b/core/src/main/java/apoc/export/csv/CsvLoaderConfig.java @@ -2,6 +2,7 @@ import apoc.util.CompressionAlgo; import apoc.util.CompressionConfig; +import apoc.util.Util; import java.util.Map; @@ -121,7 +122,7 @@ public static CsvLoaderConfig from(Map config) { if (config.get(ARRAY_DELIMITER) != null) builder.arrayDelimiter(getCharacterOrString(config, ARRAY_DELIMITER)); if (config.get(QUOTATION_CHARACTER) != null) builder.quotationCharacter(getCharacterOrString(config, QUOTATION_CHARACTER)); if (config.get(STRING_IDS) != null) builder.stringIds((boolean) config.get(STRING_IDS)); - if (config.get(SKIP_LINES) != null) builder.skipLines((int) config.get(SKIP_LINES)); + if (config.get(SKIP_LINES) != null) builder.skipLines(Util.toInteger(config.get(SKIP_LINES))); if (config.get(BATCH_SIZE) != null) builder.batchSize((int) config.get(BATCH_SIZE)); if (config.get(IGNORE_DUPLICATE_NODES) != null) builder.ignoreDuplicateNodes((boolean) config.get(IGNORE_DUPLICATE_NODES)); if (config.get(IGNORE_BLANK_STRING) != null) builder.ignoreBlankString((boolean) config.get(IGNORE_BLANK_STRING)); diff --git a/core/src/test/java/apoc/export/csv/ImportCsvTest.java b/core/src/test/java/apoc/export/csv/ImportCsvTest.java index ded643921d..f657ecafad 100644 --- a/core/src/test/java/apoc/export/csv/ImportCsvTest.java +++ b/core/src/test/java/apoc/export/csv/ImportCsvTest.java @@ -189,6 +189,31 @@ public void testNodesWithIds() { List ids = TestUtil.firstColumn(db, "MATCH (n:Person) RETURN n.id AS id ORDER BY id"); assertThat(ids, Matchers.contains(1L, 2L)); } + + @Test + public void testImportCsvWithSkipLines() { + // skip only-header (default config) + testSkipLine(1L, 2); + + // skip header and another one + testSkipLine(2L, 1); + + // skip header and another two (no result because the file has 3 lines) + testSkipLine(3L, 0); + } + + private void testSkipLine(long skipLine, int nodes) { + TestUtil.testCall(db, + "call apoc.import.csv([{fileName: 'id-idspaces.csv', labels: ['SkipLine']}], [], $config)", + map("config", + map("delimiter", '|', "skipLines", skipLine)), + (r) -> assertEquals((long) nodes, r.get("nodes")) + ); + + TestUtil.testCallCount(db, "MATCH (n:SkipLine) RETURN n", nodes); + + db.executeTransactionally("MATCH (n:SkipLine) DETACH DELETE n"); + } @Test public void testNodesAndRelsWithMultiTypes() {