From a960047e47f09531979c47aafef63b2f3c4321a6 Mon Sep 17 00:00:00 2001
From: Jacob Smith <jacob.smith@gov.bc.ca>
Date: Fri, 27 Feb 2026 10:55:54 -0800
Subject: [PATCH 1/5] AB#32008 Add Office document text extraction support
 (Word/Excel)

# Conflicts:
#	applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
#	applications/Unity.GrantManager/src/Unity.GrantManager.Application/Unity.GrantManager.Application.csproj
---
 .../AI/TextExtractionService.cs               | 167 ++++++++++++++++--
 .../Unity.GrantManager.Application.csproj     |   1 +
 2 files changed, 152 insertions(+), 16 deletions(-)
diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
index 3c2b3f2b3..8e7f3d41b 100644
--- a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
+++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
@@ -1,6 +1,10 @@
 using Microsoft.Extensions.Logging;
+using NPOI.SS.UserModel;
+using NPOI.XWPF.UserModel;
 using System;
+using System.Collections.Generic;
 using System.IO;
+using System.Linq;
 using System.Text;
 using System.Text.RegularExpressions;
 using System.Threading.Tasks;
@@ -12,6 +16,12 @@ namespace Unity.GrantManager.AI
     public class TextExtractionService : ITextExtractionService, ITransientDependency
     {
         private const int MaxExtractedTextLength = 50000;
+        private const int MaxExcelSheets = 10;
+        private const int MaxExcelRowsPerSheet = 2000;
+        private const int MaxExcelCellsPerRow = 50;
+        private const int MaxDocxParagraphs = 2000;
+        private const int MaxDocxTableRows = 2000;
+        private const int MaxDocxTableCellsPerRow = 50;
         private readonly ILogger<TextExtractionService> _logger;
 
         public TextExtractionService(ILogger<TextExtractionService> logger)
@@ -29,13 +39,11 @@ public async Task<string> ExtractTextAsync(string fileName, byte[] fileContent,
 
             try
             {
-                // Normalize content type
                 var normalizedContentType = contentType?.ToLowerInvariant() ?? string.Empty;
                 var extension = Path.GetExtension(fileName)?.ToLowerInvariant() ?? string.Empty;
 
                 string rawText;
 
-                // Handle text-based files
                 if (normalizedContentType.Contains("text/") ||
                     extension == ".txt" ||
                     extension == ".csv" ||
@@ -46,37 +54,37 @@ public async Task<string> ExtractTextAsync(string fileName, byte[] fileContent,
                     return NormalizeAndLimitText(rawText, fileName);
                 }
 
-                // Handle PDF files
                 if (normalizedContentType.Contains("pdf") || extension == ".pdf")
                 {
-                    rawText = await Task.FromResult(ExtractTextFromPdfFile(fileName, fileContent));
+                    rawText = ExtractTextFromPdfFile(fileName, fileContent);
                     return NormalizeAndLimitText(rawText, fileName);
                 }
 
-                // Handle Word documents
                 if (normalizedContentType.Contains("word") ||
                     normalizedContentType.Contains("msword") ||
                     normalizedContentType.Contains("officedocument.wordprocessingml") ||
                     extension == ".doc" ||
                     extension == ".docx")
                 {
-                    // For now, return empty string - can be enhanced with Word parsing library
-                    _logger.LogDebug("Word document text extraction not yet implemented for {FileName}", fileName);
+                    if (extension == ".docx" || normalizedContentType.Contains("officedocument.wordprocessingml"))
+                    {
+                        rawText = ExtractTextFromWordDocx(fileContent);
+                        return NormalizeAndLimitText(rawText, fileName);
+                    }
+
+                    _logger.LogDebug("Legacy .doc extraction is not supported for {FileName}", fileName);
                     return string.Empty;
                 }
 
-                // Handle Excel files
                 if (normalizedContentType.Contains("excel") ||
                     normalizedContentType.Contains("spreadsheet") ||
                     extension == ".xls" ||
                     extension == ".xlsx")
                 {
-                    // For now, return empty string - can be enhanced with Excel parsing library
-                    _logger.LogDebug("Excel text extraction not yet implemented for {FileName}", fileName);
-                    return string.Empty;
+                    rawText = ExtractTextFromExcelFile(fileName, fileContent);
+                    return NormalizeAndLimitText(rawText, fileName);
                 }
 
-                // For other file types, return empty string
                 _logger.LogDebug("No text extraction available for content type {ContentType} with extension {Extension}",
                     contentType, extension);
                 return string.Empty;
@@ -92,17 +100,13 @@ private async Task<string> ExtractTextFromTextFileAsync(byte[] fileContent)
         {
             try
             {
-                // Try UTF-8 first
                 var text = Encoding.UTF8.GetString(fileContent);
 
-                // Check if the decoded text contains replacement characters (indicates encoding issue)
                 if (text.Contains('\uFFFD'))
                 {
-                    // Try other encodings
                     text = Encoding.ASCII.GetString(fileContent);
                 }
 
-                // Limit the extracted text to a reasonable size.
                 if (text.Length > MaxExtractedTextLength)
                 {
                     text = text.Substring(0, MaxExtractedTextLength);
@@ -154,6 +158,137 @@ private string ExtractTextFromPdfFile(string fileName, byte[] fileContent)
             }
         }
 
+        private string ExtractTextFromWordDocx(byte[] fileContent)
+        {
+            try
+            {
+                using var stream = new MemoryStream(fileContent, writable: false);
+                using var document = new XWPFDocument(stream);
+                var parts = new List<string>();
+
+                foreach (var paragraph in document.Paragraphs.Take(MaxDocxParagraphs))
+                {
+                    if (!string.IsNullOrWhiteSpace(paragraph.ParagraphText))
+                    {
+                        parts.Add(paragraph.ParagraphText);
+                    }
+                }
+
+                foreach (var table in document.Tables)
+                {
+                    foreach (var row in table.Rows.Take(MaxDocxTableRows))
+                    {
+                        foreach (var cell in row.GetTableCells().Take(MaxDocxTableCellsPerRow))
+                        {
+                            var text = cell.GetText();
+                            if (!string.IsNullOrWhiteSpace(text))
+                            {
+                                parts.Add(text);
+                            }
+                        }
+                    }
+                }
+
+                var combined = string.Join(Environment.NewLine, parts);
+                if (combined.Length > MaxExtractedTextLength)
+                {
+                    combined = combined.Substring(0, MaxExtractedTextLength);
+                }
+
+                return combined;
+            }
+            catch (Exception ex)
+            {
+                _logger.LogWarning(ex, "Word (.docx) text extraction failed");
+                return string.Empty;
+            }
+        }
+
+        private string ExtractTextFromExcelFile(string fileName, byte[] fileContent)
+        {
+            try
+            {
+                using var stream = new MemoryStream(fileContent, writable: false);
+                using var workbook = WorkbookFactory.Create(stream);
+                var rows = new List<string>();
+                var totalLength = 0;
+                var sheetCount = Math.Min(workbook.NumberOfSheets, MaxExcelSheets);
+
+                for (var sheetIndex = 0; sheetIndex < sheetCount; sheetIndex++)
+                {
+                    var sheet = workbook.GetSheetAt(sheetIndex);
+                    if (sheet == null)
+                    {
+                        continue;
+                    }
+
+                    var processedRows = 0;
+                    foreach (IRow row in sheet)
+                    {
+                        if (processedRows >= MaxExcelRowsPerSheet || totalLength >= MaxExtractedTextLength)
+                        {
+                            break;
+                        }
+
+                        var cellTexts = row.Cells
+                            .Take(MaxExcelCellsPerRow)
+                            .Select(GetCellText)
+                            .Where(value => !string.IsNullOrWhiteSpace(value))
+                            .ToList();
+
+                        processedRows++;
+
+                        if (cellTexts.Count == 0)
+                        {
+                            continue;
+                        }
+
+                        var rowText = string.Join(" | ", cellTexts);
+                        rows.Add(rowText);
+                        totalLength += rowText.Length;
+                    }
+
+                    if (totalLength >= MaxExtractedTextLength)
+                    {
+                        break;
+                    }
+                }
+
+                var combined = string.Join(Environment.NewLine, rows);
+                if (combined.Length > MaxExtractedTextLength)
+                {
+                    combined = combined.Substring(0, MaxExtractedTextLength);
+                }
+
+                return combined;
+            }
+            catch (Exception ex)
+            {
+                _logger.LogWarning(ex, "Excel text extraction failed for {FileName}", fileName);
+                return string.Empty;
+            }
+        }
+
+        private static string GetCellText(ICell cell)
+        {
+            if (cell == null)
+            {
+                return string.Empty;
+            }
+
+            return (cell.CellType switch
+            {
+                CellType.String => cell.StringCellValue ?? string.Empty,
+                CellType.Numeric => DateUtil.IsCellDateFormatted(cell)
+                    ? cell.DateCellValue.ToString()
+                    : cell.NumericCellValue.ToString(),
+                CellType.Boolean => cell.BooleanCellValue ? "true" : "false",
+                CellType.Formula => cell.ToString(),
+                CellType.Blank => string.Empty,
+                _ => cell.ToString() ?? string.Empty
+            }) ?? string.Empty;
+        }
+
         private string NormalizeAndLimitText(string text, string fileName)
         {
             var normalized = NormalizeExtractedText(text);
diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Unity.GrantManager.Application.csproj b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Unity.GrantManager.Application.csproj
index 8ec3e53bc..ff57bfd94 100644
--- a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Unity.GrantManager.Application.csproj
+++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Unity.GrantManager.Application.csproj
@@ -33,6 +33,7 @@
     <PackageReference Include="Quartz.Serialization.Json" Version="3.14.0" />
     <PackageReference Include="RestSharp" Version="112.1.0" />
     <PackageReference Include="PdfPig" Version="0.1.13" />
+    <PackageReference Include="NPOI" Version="2.7.5" />
     <PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
     <PackageReference Include="Volo.Abp.BackgroundWorkers.Quartz" Version="9.1.3" />
     <PackageReference Include="Volo.Abp.BlobStoring" Version="9.1.3" />

From 7ed4d3f3a94e2700c9df5892b134203912b8b4cd Mon Sep 17 00:00:00 2001
From: Jacob Smith <jacob.smith@gov.bc.ca>
Date: Fri, 27 Feb 2026 12:01:21 -0800
Subject: [PATCH 2/5] AB#32008 Resolve ICell specificity error

---
 .../Unity.GrantManager.Application/AI/TextExtractionService.cs  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
index 8e7f3d41b..8de6b180e 100644
--- a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
+++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
@@ -269,7 +269,7 @@ private string ExtractTextFromExcelFile(string fileName, byte[] fileContent)
             }
         }
 
-        private static string GetCellText(ICell cell)
+        private static string GetCellText(NPOI.SS.UserModel.ICell cell)
         {
             if (cell == null)
             {

From ce760cb67827dc4a9693dc0499af77ad5d1aa0d6 Mon Sep 17 00:00:00 2001
From: Jacob Smith <jacob.smith@gov.bc.ca>
Date: Fri, 27 Feb 2026 16:25:11 -0800
Subject: [PATCH 3/5] AB#32008 Sonar fix simplify docx paragraph extraction
 loop

---
 .../AI/TextExtractionService.cs                             | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
index 8de6b180e..0f55a62e3 100644
--- a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
+++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
@@ -166,11 +166,11 @@ private string ExtractTextFromWordDocx(byte[] fileContent)
                 using var document = new XWPFDocument(stream);
                 var parts = new List<string>();
 
-                foreach (var paragraph in document.Paragraphs.Take(MaxDocxParagraphs))
+                foreach (var paragraphText in document.Paragraphs.Take(MaxDocxParagraphs).Select(paragraph => paragraph.ParagraphText))
                 {
-                    if (!string.IsNullOrWhiteSpace(paragraph.ParagraphText))
+                    if (!string.IsNullOrWhiteSpace(paragraphText))
                     {
-                        parts.Add(paragraph.ParagraphText);
+                        parts.Add(paragraphText);
                     }
                 }
 

From 5d02ba95714fd89f72a7473688051633bc06f2b8 Mon Sep 17 00:00:00 2001
From: Jacob Smith <jacob.smith@gov.bc.ca>
Date: Mon, 2 Mar 2026 10:05:41 -0800
Subject: [PATCH 4/5] AB#32008 Simplify text extraction async flow and stale
 comments

---
 .../AI/TextExtractionService.cs               | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
index 0f55a62e3..88080d6a3 100644
--- a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
+++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
@@ -29,12 +29,12 @@ public TextExtractionService(ILogger<TextExtractionService> logger)
             _logger = logger;
         }
 
-        public async Task<string> ExtractTextAsync(string fileName, byte[] fileContent, string contentType)
+        public Task<string> ExtractTextAsync(string fileName, byte[] fileContent, string contentType)
         {
             if (fileContent == null || fileContent.Length == 0)
             {
                 _logger.LogDebug("File content is empty for {FileName}", fileName);
-                return string.Empty;
+                return Task.FromResult(string.Empty);
             }
 
             try
@@ -50,14 +50,14 @@ public async Task<string> ExtractTextAsync(string fileName, byte[] fileContent,
                     extension == ".json" ||
                     extension == ".xml")
                 {
-                    rawText = await ExtractTextFromTextFileAsync(fileContent);
-                    return NormalizeAndLimitText(rawText, fileName);
+                    rawText = ExtractTextFromTextFile(fileContent);
+                    return Task.FromResult(NormalizeAndLimitText(rawText, fileName));
                 }
 
                 if (normalizedContentType.Contains("pdf") || extension == ".pdf")
                 {
                     rawText = ExtractTextFromPdfFile(fileName, fileContent);
-                    return NormalizeAndLimitText(rawText, fileName);
+                    return Task.FromResult(NormalizeAndLimitText(rawText, fileName));
                 }
 
                 if (normalizedContentType.Contains("word") ||
@@ -69,11 +69,11 @@ public async Task<string> ExtractTextAsync(string fileName, byte[] fileContent,
                     if (extension == ".docx" || normalizedContentType.Contains("officedocument.wordprocessingml"))
                     {
                         rawText = ExtractTextFromWordDocx(fileContent);
-                        return NormalizeAndLimitText(rawText, fileName);
+                        return Task.FromResult(NormalizeAndLimitText(rawText, fileName));
                     }
 
                     _logger.LogDebug("Legacy .doc extraction is not supported for {FileName}", fileName);
-                    return string.Empty;
+                    return Task.FromResult(string.Empty);
                 }
 
                 if (normalizedContentType.Contains("excel") ||
@@ -82,21 +82,21 @@ public async Task<string> ExtractTextAsync(string fileName, byte[] fileContent,
                     extension == ".xlsx")
                 {
                     rawText = ExtractTextFromExcelFile(fileName, fileContent);
-                    return NormalizeAndLimitText(rawText, fileName);
+                    return Task.FromResult(NormalizeAndLimitText(rawText, fileName));
                 }
 
                 _logger.LogDebug("No text extraction available for content type {ContentType} with extension {Extension}",
                     contentType, extension);
-                return string.Empty;
+                return Task.FromResult(string.Empty);
             }
             catch (Exception ex)
             {
                 _logger.LogError(ex, "Error extracting text from {FileName}", fileName);
-                return string.Empty;
+                return Task.FromResult(string.Empty);
             }
         }
 
-        private async Task<string> ExtractTextFromTextFileAsync(byte[] fileContent)
+        private string ExtractTextFromTextFile(byte[] fileContent)
         {
             try
             {
@@ -113,7 +113,7 @@ private async Task<string> ExtractTextFromTextFileAsync(byte[] fileContent)
                     _logger.LogDebug("Truncated text content to {MaxLength} characters", MaxExtractedTextLength);
                 }
 
-                return await Task.FromResult(text);
+                return text;
             }
             catch (Exception ex)
             {

From b16d19db495edeb16c17c9f540aed39f7caa9e0e Mon Sep 17 00:00:00 2001
From: Jacob Smith <jacob.smith@gov.bc.ca>
Date: Wed, 4 Mar 2026 11:47:31 -0800
Subject: [PATCH 5/5] AB#32008 Optimize Office text extraction memory usage and
 limits

---
 .../AI/TextExtractionService.cs               | 141 ++++++++++++------
 1 file changed, 95 insertions(+), 46 deletions(-)

diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
index 88080d6a3..3b6f81b42 100644
--- a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
+++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs
@@ -68,7 +68,7 @@ public Task<string> ExtractTextAsync(string fileName, byte[] fileContent, string
                 {
                     if (extension == ".docx" || normalizedContentType.Contains("officedocument.wordprocessingml"))
                     {
-                        rawText = ExtractTextFromWordDocx(fileContent);
+                        rawText = ExtractTextFromWordDocx(fileName, fileContent);
                         return Task.FromResult(NormalizeAndLimitText(rawText, fileName));
                     }
 
@@ -158,48 +158,56 @@ private string ExtractTextFromPdfFile(string fileName, byte[] fileContent)
             }
         }
 
-        private string ExtractTextFromWordDocx(byte[] fileContent)
+        private string ExtractTextFromWordDocx(string fileName, byte[] fileContent)
         {
             try
             {
                 using var stream = new MemoryStream(fileContent, writable: false);
                 using var document = new XWPFDocument(stream);
-                var parts = new List<string>();
+                var builder = new StringBuilder();
 
                 foreach (var paragraphText in document.Paragraphs.Take(MaxDocxParagraphs).Select(paragraph => paragraph.ParagraphText))
                 {
-                    if (!string.IsNullOrWhiteSpace(paragraphText))
+                    var limitReached = AppendWithLimit(builder, paragraphText, MaxExtractedTextLength, Environment.NewLine);
+                    if (limitReached)
                     {
-                        parts.Add(paragraphText);
+                        break;
                     }
                 }
 
-                foreach (var table in document.Tables)
+                if (builder.Length < MaxExtractedTextLength)
                 {
-                    foreach (var row in table.Rows.Take(MaxDocxTableRows))
+                    foreach (var table in document.Tables)
                     {
-                        foreach (var cell in row.GetTableCells().Take(MaxDocxTableCellsPerRow))
+                        foreach (var row in table.Rows.Take(MaxDocxTableRows))
                         {
-                            var text = cell.GetText();
-                            if (!string.IsNullOrWhiteSpace(text))
+                            foreach (var cell in row.GetTableCells().Take(MaxDocxTableCellsPerRow))
                             {
-                                parts.Add(text);
+                                var limitReached = AppendWithLimit(builder, cell.GetText(), MaxExtractedTextLength, Environment.NewLine);
+                                if (limitReached)
+                                {
+                                    break;
+                                }
+                            }
+
+                            if (builder.Length >= MaxExtractedTextLength)
+                            {
+                                break;
                             }
                         }
-                    }
-                }
 
-                var combined = string.Join(Environment.NewLine, parts);
-                if (combined.Length > MaxExtractedTextLength)
-                {
-                    combined = combined.Substring(0, MaxExtractedTextLength);
+                        if (builder.Length >= MaxExtractedTextLength)
+                        {
+                            break;
+                        }
+                    }
                 }
 
-                return combined;
+                return builder.ToString();
             }
             catch (Exception ex)
             {
-                _logger.LogWarning(ex, "Word (.docx) text extraction failed");
+                _logger.LogWarning(ex, "Word (.docx) text extraction failed for {FileName}", fileName);
                 return string.Empty;
             }
         }
@@ -210,12 +218,17 @@ private string ExtractTextFromExcelFile(string fileName, byte[] fileContent)
             {
                 using var stream = new MemoryStream(fileContent, writable: false);
                 using var workbook = WorkbookFactory.Create(stream);
-                var rows = new List<string>();
-                var totalLength = 0;
+                var builder = new StringBuilder();
                 var sheetCount = Math.Min(workbook.NumberOfSheets, MaxExcelSheets);
+                var limitReached = false;
 
                 for (var sheetIndex = 0; sheetIndex < sheetCount; sheetIndex++)
                 {
+                    if (limitReached || builder.Length >= MaxExtractedTextLength)
+                    {
+                        break;
+                    }
+
                     var sheet = workbook.GetSheetAt(sheetIndex);
                     if (sheet == null)
                     {
@@ -225,42 +238,38 @@ private string ExtractTextFromExcelFile(string fileName, byte[] fileContent)
                     var processedRows = 0;
                     foreach (IRow row in sheet)
                     {
-                        if (processedRows >= MaxExcelRowsPerSheet || totalLength >= MaxExtractedTextLength)
+                        if (processedRows >= MaxExcelRowsPerSheet || builder.Length >= MaxExtractedTextLength)
                         {
                             break;
                         }
 
-                        var cellTexts = row.Cells
-                            .Take(MaxExcelCellsPerRow)
-                            .Select(GetCellText)
-                            .Where(value => !string.IsNullOrWhiteSpace(value))
-                            .ToList();
+                        var rowHasValue = false;
+                        foreach (var cell in row.Cells.Take(MaxExcelCellsPerRow))
+                        {
+                            var value = GetCellText(cell);
+                            if (string.IsNullOrWhiteSpace(value))
+                            {
+                                continue;
+                            }
 
-                        processedRows++;
+                            var separator = rowHasValue ? " | " : (builder.Length > 0 ? Environment.NewLine : null);
+                            limitReached = AppendWithLimit(builder, value, MaxExtractedTextLength, separator);
+                            rowHasValue = true;
+                            if (limitReached)
+                            {
+                                break;
+                            }
+                        }
 
-                        if (cellTexts.Count == 0)
+                        processedRows++;
+                        if (limitReached)
                         {
-                            continue;
+                            break;
                         }
-
-                        var rowText = string.Join(" | ", cellTexts);
-                        rows.Add(rowText);
-                        totalLength += rowText.Length;
-                    }
-
-                    if (totalLength >= MaxExtractedTextLength)
-                    {
-                        break;
                     }
                 }
 
-                var combined = string.Join(Environment.NewLine, rows);
-                if (combined.Length > MaxExtractedTextLength)
-                {
-                    combined = combined.Substring(0, MaxExtractedTextLength);
-                }
-
-                return combined;
+                return builder.ToString();
             }
             catch (Exception ex)
             {
@@ -269,6 +278,46 @@ private string ExtractTextFromExcelFile(string fileName, byte[] fileContent)
             }
         }
 
+        private static bool AppendWithLimit(StringBuilder builder, string? value, int maxLength, string? separator = null)
+        {
+            if (string.IsNullOrWhiteSpace(value))
+            {
+                return builder.Length >= maxLength;
+            }
+
+            if (builder.Length >= maxLength)
+            {
+                return true;
+            }
+
+            var remaining = maxLength - builder.Length;
+            if (remaining <= 0)
+            {
+                return true;
+            }
+
+            if (!string.IsNullOrEmpty(separator) && builder.Length > 0)
+            {
+                if (separator.Length >= remaining)
+                {
+                    builder.Append(separator.AsSpan(0, remaining));
+                    return true;
+                }
+
+                builder.Append(separator);
+                remaining -= separator.Length;
+            }
+
+            if (value.Length >= remaining)
+            {
+                builder.Append(value.AsSpan(0, remaining));
+                return true;
+            }
+
+            builder.Append(value);
+            return false;
+        }
+
         private static string GetCellText(NPOI.SS.UserModel.ICell cell)
         {
             if (cell == null)