Skip to content

Commit

Permalink
LUCENE-10335: IOUtils.getDecodingReader(Class<?>, String) is broken w…
Browse files Browse the repository at this point in the history
…ith modules/
  • Loading branch information
dweiss committed Dec 23, 2021
1 parent 9fe2855 commit 7743921
Show file tree
Hide file tree
Showing 21 changed files with 66 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getWordSet(
IOUtils.getDecodingReader(
BrazilianAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8),
() -> BrazilianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8),
"#");
} catch (IOException ex) {
// default set should always be present as it is part of the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getWordSet(
IOUtils.getDecodingReader(
SoraniAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> SoraniAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ private static class DefaultSetHolder {
DEFAULT_SET =
WordlistLoader.getWordSet(
IOUtils.getDecodingReader(
CzechAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8),
() -> CzechAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8),
"#");
} catch (IOException ex) {
// default set should always be present as it is part of the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
IOUtils.getDecodingReader(
SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ private static class DefaultSetHolder {
DEFAULT_SET =
WordlistLoader.getSnowballWordSet(
IOUtils.getDecodingReader(
SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
IOUtils.getDecodingReader(
SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
IOUtils.getDecodingReader(
SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
IOUtils.getDecodingReader(
SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getWordSet(
IOUtils.getDecodingReader(
GalicianAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> GalicianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
IOUtils.getDecodingReader(
SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
IOUtils.getDecodingReader(
SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getWordSet(
IOUtils.getDecodingReader(
LatvianAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> LatvianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
IOUtils.getDecodingReader(
SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
IOUtils.getDecodingReader(
SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
IOUtils.getDecodingReader(
SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
IOUtils.getDecodingReader(
SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ private static class DefaultSetHolder {
DEFAULT_STOP_SET =
WordlistLoader.getSnowballWordSet(
IOUtils.getDecodingReader(
SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
() -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ static CharArraySet loadDefaultStopWordSet() throws IOException {
return CharArraySet.unmodifiableSet(
WordlistLoader.getWordSet(
IOUtils.getDecodingReader(
SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8),
() -> SmartChineseAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8),
STOPWORD_FILE_COMMENT));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ private static class DefaultsHolder {
DEFAULT_STOP_SET =
WordlistLoader.getWordSet(
IOUtils.getDecodingReader(
PolishAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8),
() -> PolishAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE),
StandardCharsets.UTF_8),
"#");
} catch (IOException ex) {
// default set should always be present as it is part of the
Expand Down
25 changes: 25 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/IOUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import java.util.function.Supplier;
import org.apache.lucene.store.Directory;

/**
Expand Down Expand Up @@ -156,6 +157,27 @@ public static Reader getDecodingReader(InputStream stream, Charset charSet) {
return new BufferedReader(new InputStreamReader(stream, charSetDecoder));
}

/**
* Opens a Reader for the stream supplied by the provided {@link Supplier} using a {@link
* CharsetDecoder}. Unlike Java's defaults this reader will throw an exception if your it detects
* the read charset doesn't match the expected {@link Charset}.
*
* <p>Decoding readers are useful to load configuration files, stopword lists or synonym files to
* detect character set problems. However, it's not recommended to use as a common purpose reader.
*
* @param streamSupplier A supplier of the input stream for docoding.
* @param charSet the expected charset
* @return a reader to read the given file
*/
public static Reader getDecodingReader(Supplier<InputStream> streamSupplier, Charset charSet)
throws IOException {
var is = streamSupplier.get();
if (is == null) {
throw new IOException("The input stream for decoding must not be null.");
}
return getDecodingReader(is, charSet);
}

/**
* Opens a Reader for the given resource using a {@link CharsetDecoder}. Unlike Java's defaults
* this reader will throw an exception if your it detects the read charset doesn't match the
Expand All @@ -168,7 +190,10 @@ public static Reader getDecodingReader(InputStream stream, Charset charSet) {
* @param resource the resource name to load
* @param charSet the expected charset
* @return a reader to read the given file
* @deprecated This method is caller sensitive and may not work with the module system. Please use
* {@link #getDecodingReader(Supplier, Charset)} instead.
*/
@Deprecated
public static Reader getDecodingReader(Class<?> clazz, String resource, Charset charSet)
throws IOException {
InputStream stream = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -732,8 +732,9 @@ static int findSingleQuoteStringEnd(String text, int start) {
final Properties props = new Properties();
try (Reader in =
IOUtils.getDecodingReader(
JavascriptCompiler.class,
JavascriptCompiler.class.getSimpleName() + ".properties",
() ->
JavascriptCompiler.class.getResourceAsStream(
JavascriptCompiler.class.getSimpleName() + ".properties"),
StandardCharsets.UTF_8)) {
props.load(in);
}
Expand Down

0 comments on commit 7743921

Please sign in to comment.