Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ http://s.apache.org/luceneversions

API Changes
---------------------
* GITHUB#15340: Support multiple delimiters for path tokenization. (David Pilato)

* GITHUB#15215: Switch to Java 25 as the minimum required platform. Upgrade to gradle 9.1.0.
(Robert Muir, Kaival Parikh, Dawid Weiss)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ public PathHierarchyTokenizer(int bufferSize, char delimiter, char replacement,

public PathHierarchyTokenizer(
AttributeFactory factory, int bufferSize, char delimiter, char replacement, int skip) {
this(factory, bufferSize, new char[] {delimiter}, replacement, skip);
}

public PathHierarchyTokenizer(
AttributeFactory factory, int bufferSize, char[] delimiters, char replacement, int skip) {
super(factory);
if (bufferSize < 0) {
throw new IllegalArgumentException("bufferSize cannot be negative");
Expand All @@ -84,7 +89,7 @@ public PathHierarchyTokenizer(
}
termAtt.resizeBuffer(bufferSize);

this.delimiter = delimiter;
this.delimiters = delimiters;
this.replacement = replacement;
this.skip = skip;
resultToken = new StringBuilder(bufferSize);
Expand All @@ -94,7 +99,7 @@ public PathHierarchyTokenizer(
public static final char DEFAULT_DELIMITER = '/';
public static final int DEFAULT_SKIP = 0;

private final char delimiter;
private final char[] delimiters;
private final char replacement;
private final int skip;

Expand Down Expand Up @@ -145,13 +150,13 @@ public final boolean incrementToken() throws IOException {
added = true;
skipped++;
if (skipped > skip) {
termAtt.append(c == delimiter ? replacement : (char) c);
termAtt.append(isDelimiterFound((char) c) ? replacement : (char) c);
length++;
} else {
startPosition++;
}
} else {
if (c == delimiter) {
if (isDelimiterFound((char) c)) {
if (skipped > skip) {
endDelimiter = true;
break;
Expand Down Expand Up @@ -181,6 +186,15 @@ public final boolean incrementToken() throws IOException {
return true;
}

private boolean isDelimiterFound(char c) {
for (char delimiter : delimiters) {
if (c == delimiter) {
return true;
}
}
return false;
}

@Override
public final void end() throws IOException {
super.end();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,30 @@ public void testOnlyDelimitersSkip() throws Exception {
t, new String[] {"/"}, new int[] {1}, new int[] {2}, new int[] {1}, path.length());
}

public void testWindowsAndLinuxPaths() throws Exception {
String path1 = "c:\\a\\b\\c";
String path2 = "/a/b/c";
PathHierarchyTokenizer t =
new PathHierarchyTokenizer(
newAttributeFactory(), 1024, new char[] {'/', '\\'}, '/', DEFAULT_SKIP);
t.setReader(new StringReader(path1));
assertTokenStreamContents(
t,
new String[] {"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
new int[] {0, 0, 0, 0},
new int[] {2, 4, 6, 8},
new int[] {1, 1, 1, 1},
path1.length());
t.setReader(new StringReader(path2));
assertTokenStreamContents(
t,
new String[] {"/a", "/a/b", "/a/b/c"},
new int[] {0, 0, 0},
new int[] {2, 4, 6},
new int[] {1, 1, 1},
path2.length());
}

/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
Analyzer a =
Expand Down
Loading