From fd9a5f67c4b8c1741dc7bc95b12bbcad2379b92c Mon Sep 17 00:00:00 2001
From: David Pilato <david@pilato.fr>
Date: Thu, 16 Oct 2025 14:30:45 +0200
Subject: [PATCH 1/3] Support multiple delimiters for path tokenization

This adds a new ctor for the `PathHierarchyTokenizer`, which supports an array of delimiters instead of a single one: `PathHierarchyTokenizer(AttributeFactory, int, char[], char, int)`.

This allows having two different delimiters instead of a single one.

Closes #15196.
---
 .../analysis/path/PathHierarchyTokenizer.java | 22 ++++++++++++++----
 .../path/TestPathHierarchyTokenizer.java      | 23 +++++++++++++++++++
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
index dfd727570342..7bf541fc3397 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
@@ -75,6 +75,11 @@ public PathHierarchyTokenizer(int bufferSize, char delimiter, char replacement,
 
   public PathHierarchyTokenizer(
       AttributeFactory factory, int bufferSize, char delimiter, char replacement, int skip) {
+    this(factory, bufferSize, new char[] {delimiter}, replacement, skip);
+  }
+
+  public PathHierarchyTokenizer(
+      AttributeFactory factory, int bufferSize, char[] delimiters, char replacement, int skip) {
     super(factory);
     if (bufferSize < 0) {
       throw new IllegalArgumentException("bufferSize cannot be negative");
@@ -84,7 +89,7 @@ public PathHierarchyTokenizer(
     }
     termAtt.resizeBuffer(bufferSize);
 
-    this.delimiter = delimiter;
+    this.delimiters = delimiters;
     this.replacement = replacement;
     this.skip = skip;
     resultToken = new StringBuilder(bufferSize);
@@ -94,7 +99,7 @@ public PathHierarchyTokenizer(
   public static final char DEFAULT_DELIMITER = '/';
   public static final int DEFAULT_SKIP = 0;
 
-  private final char delimiter;
+  private final char[] delimiters;
   private final char replacement;
   private final int skip;
 
@@ -145,13 +150,13 @@ public final boolean incrementToken() throws IOException {
         added = true;
         skipped++;
         if (skipped > skip) {
-          termAtt.append(c == delimiter ? replacement : (char) c);
+          termAtt.append(isDelimiterFound((char) c) ? replacement : (char) c);
           length++;
         } else {
           startPosition++;
         }
       } else {
-        if (c == delimiter) {
+        if (isDelimiterFound((char) c)) {
           if (skipped > skip) {
             endDelimiter = true;
             break;
@@ -181,6 +186,15 @@ public final boolean incrementToken() throws IOException {
     return true;
   }
 
+  private boolean isDelimiterFound(char c) {
+    for (char delimiter : delimiters) {
+      if (c == delimiter) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   @Override
   public final void end() throws IOException {
     super.end();
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
index 1662171c9bac..37bac1e79a43 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
@@ -237,6 +237,29 @@ public void testOnlyDelimitersSkip() throws Exception {
         t, new String[] {"/"}, new int[] {1}, new int[] {2}, new int[] {1}, path.length());
   }
 
+  public void testWindowsAndLinuxPaths() throws Exception {
+    String path1 = "c:\\a\\b\\c";
+    String path2 = "/a/b/c";
+    PathHierarchyTokenizer t =
+        new PathHierarchyTokenizer(newAttributeFactory(), 1024, new char[] {'/', '\\'}, '/', DEFAULT_SKIP);
+    t.setReader(new StringReader(path1));
+    assertTokenStreamContents(
+        t,
+        new String[] {"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
+        new int[] {0, 0, 0, 0},
+        new int[] {2, 4, 6, 8},
+        new int[] {1, 1, 1, 1},
+        path1.length());
+    t.setReader(new StringReader(path2));
+    assertTokenStreamContents(
+        t,
+        new String[] {"/a", "/a/b", "/a/b/c"},
+        new int[] {0, 0, 0},
+        new int[] {2, 4, 6},
+        new int[] {1, 1, 1},
+        path2.length());
+  }
+
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
     Analyzer a =

From 7f63f5d7c51c29afedc5701692712d03392e50bd Mon Sep 17 00:00:00 2001
From: David Pilato <david@pilato.fr>
Date: Thu, 16 Oct 2025 14:57:06 +0200
Subject: [PATCH 2/3] Format code

---
 .../lucene/analysis/path/TestPathHierarchyTokenizer.java       | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
index 37bac1e79a43..3b75deef1fa3 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
@@ -241,7 +241,8 @@ public void testWindowsAndLinuxPaths() throws Exception {
     String path1 = "c:\\a\\b\\c";
     String path2 = "/a/b/c";
     PathHierarchyTokenizer t =
-        new PathHierarchyTokenizer(newAttributeFactory(), 1024, new char[] {'/', '\\'}, '/', DEFAULT_SKIP);
+        new PathHierarchyTokenizer(
+            newAttributeFactory(), 1024, new char[] {'/', '\\'}, '/', DEFAULT_SKIP);
     t.setReader(new StringReader(path1));
     assertTokenStreamContents(
         t,

From 30daf9bdec8c43116e06ed9de3e88623501e4258 Mon Sep 17 00:00:00 2001
From: David Pilato <david@pilato.fr>
Date: Thu, 16 Oct 2025 14:58:50 +0200
Subject: [PATCH 3/3] Add PR to CHANGES.txt

---
 lucene/CHANGES.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 8ba40ef88d06..b4a88a8fd10a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -7,6 +7,8 @@ http://s.apache.org/luceneversions
 
 API Changes
 ---------------------
+* GITHUB#15340: Support multiple delimiters for path tokenization. (David Pilato)
+
 * GITHUB#15215: Switch to Java 25 as the minimum required platform. Upgrade to gradle 9.1.0.
   (Robert Muir, Kaival Parikh, Dawid Weiss)