From 39c9255cc8baae1bf36390073f9cd31248533cab Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Tue, 17 Oct 2023 17:33:32 +0200 Subject: [PATCH] NIFI-12238 Fix SplitText endline trimming with max fragment size --- .../nifi/processors/standard/SplitText.java | 26 +++++++++---------- .../processors/standard/TestSplitText.java | 21 +++++++++++++++ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitText.java index ca33b4046baa..5797a205af91 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitText.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitText.java @@ -459,13 +459,6 @@ private SplitInfo nextSplit(TextLineDemarcator demarcator, long startOffset, lon while ((offsetInfo = demarcator.nextOffsetInfo()) != null) { lastCrlfLength = offsetInfo.getCrlfLength(); - if (offsetInfo.getLength() == offsetInfo.getCrlfLength()) { - trailingCrlfLength += offsetInfo.getCrlfLength(); - trailingLineCount++; - } else if (offsetInfo.getLength() > offsetInfo.getCrlfLength()) { - trailingCrlfLength = 0; // non-empty line came in, thus resetting counter - } - if (length + offsetInfo.getLength() + startingLength > this.maxSplitSize) { if (length == 0) { // single line per split length += offsetInfo.getLength(); @@ -474,12 +467,19 @@ private SplitInfo nextSplit(TextLineDemarcator demarcator, long startOffset, lon remaningOffsetInfo = offsetInfo; } break; - } else { - length += offsetInfo.getLength(); - actualLineCount++; - if (splitMaxLineCount > 0 && actualLineCount >= splitMaxLineCount) { - break; - } + } + + if (offsetInfo.getLength() == offsetInfo.getCrlfLength()) { + trailingCrlfLength += offsetInfo.getCrlfLength(); + trailingLineCount++; + } else if (offsetInfo.getLength() > offsetInfo.getCrlfLength()) { + trailingCrlfLength = 0; // non-empty line came in, thus resetting counter + } + + length += offsetInfo.getLength(); + actualLineCount++; + if (splitMaxLineCount > 0 && actualLineCount >= splitMaxLineCount) { + break; } } diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestSplitText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestSplitText.java index c76a49fc9708..0f1ec059e7a9 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestSplitText.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestSplitText.java @@ -890,4 +890,25 @@ public void testWithEmptyHeaderLines() { splits.get(1).assertContentEquals("\n"); } + @Test + public void testMaxFragmentSizeWithTrimmedEndlines() { + final TestRunner splitRunner = TestRunners.newTestRunner(new SplitText()); + splitRunner.setProperty(SplitText.HEADER_LINE_COUNT, "2"); + splitRunner.setProperty(SplitText.LINE_SPLIT_COUNT, "0"); + splitRunner.setProperty(SplitText.FRAGMENT_MAX_SIZE, "30 B"); + splitRunner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "true"); + + splitRunner.enqueue("header1\nheader2\nline1 longer than limit\nline2\nline3\n\n\n\n\n"); + + splitRunner.run(); + splitRunner.assertTransferCount(SplitText.REL_SPLITS, 3); + splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1); + splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0); + + final List splits = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS); + splits.get(0).assertContentEquals("header1\nheader2\nline1 longer than limit"); + splits.get(1).assertContentEquals("header1\nheader2\nline2\nline3"); + splits.get(2).assertContentEquals("header1\nheader2"); + } + }