From 77439216a70bb1296f844ea370e81e3b167cd36f Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Thu, 23 Dec 2021 08:10:52 +0100 Subject: [PATCH] LUCENE-10335: IOUtils.getDecodingReader(Class, String) is broken with modules/ --- .../lucene/analysis/br/BrazilianAnalyzer.java | 3 ++- .../lucene/analysis/ckb/SoraniAnalyzer.java | 3 ++- .../lucene/analysis/cz/CzechAnalyzer.java | 3 ++- .../lucene/analysis/da/DanishAnalyzer.java | 3 ++- .../lucene/analysis/de/GermanAnalyzer.java | 3 ++- .../lucene/analysis/es/SpanishAnalyzer.java | 3 ++- .../lucene/analysis/fi/FinnishAnalyzer.java | 3 ++- .../lucene/analysis/fr/FrenchAnalyzer.java | 3 ++- .../lucene/analysis/gl/GalicianAnalyzer.java | 3 ++- .../lucene/analysis/hu/HungarianAnalyzer.java | 3 ++- .../lucene/analysis/it/ItalianAnalyzer.java | 3 ++- .../lucene/analysis/lv/LatvianAnalyzer.java | 3 ++- .../lucene/analysis/nl/DutchAnalyzer.java | 3 ++- .../lucene/analysis/no/NorwegianAnalyzer.java | 3 ++- .../analysis/pt/PortugueseAnalyzer.java | 3 ++- .../lucene/analysis/ru/RussianAnalyzer.java | 3 ++- .../lucene/analysis/sv/SwedishAnalyzer.java | 3 ++- .../cn/smart/SmartChineseAnalyzer.java | 3 ++- .../lucene/analysis/pl/PolishAnalyzer.java | 3 ++- .../java/org/apache/lucene/util/IOUtils.java | 25 +++++++++++++++++++ .../expressions/js/JavascriptCompiler.java | 5 ++-- 21 files changed, 66 insertions(+), 21 deletions(-) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java index 082a658ecc3..9b4f9e3f969 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java @@ -65,7 +65,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getWordSet( IOUtils.getDecodingReader( - BrazilianAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), + () -> BrazilianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8), "#"); } catch (IOException ex) { // default set should always be present as it is part of the diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java index bf52479ac1e..78d3f9239b7 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java @@ -65,7 +65,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getWordSet( IOUtils.getDecodingReader( - SoraniAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> SoraniAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java index 8496b3bd758..84adbd17f18 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java @@ -61,7 +61,8 @@ private static class DefaultSetHolder { DEFAULT_SET = WordlistLoader.getWordSet( IOUtils.getDecodingReader( - CzechAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), + () -> CzechAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8), "#"); } catch (IOException ex) { // default set should always be present as it is part of the diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java index 45b48444ff2..1091303186e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java @@ -66,7 +66,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet( IOUtils.getDecodingReader( - SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java index 9e2829ac0a2..d60c4aff57d 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java @@ -76,7 +76,8 @@ private static class DefaultSetHolder { DEFAULT_SET = WordlistLoader.getSnowballWordSet( IOUtils.getDecodingReader( - SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java index cc6025624e2..633608a733e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java @@ -65,7 +65,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet( IOUtils.getDecodingReader( - SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java index 8119560c233..93f63a05dfe 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java @@ -66,7 +66,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet( IOUtils.getDecodingReader( - SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java index 29a4b6f3dd9..3ce87dcdbe6 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java @@ -82,7 +82,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet( IOUtils.getDecodingReader( - SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java index 5700dfff905..41b34a1337f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java @@ -63,7 +63,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getWordSet( IOUtils.getDecodingReader( - GalicianAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> GalicianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java index 01ff42e3d20..117dfee179f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java @@ -66,7 +66,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet( IOUtils.getDecodingReader( - SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java index bd0e42b64f1..bcdda26cbdb 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java @@ -75,7 +75,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet( IOUtils.getDecodingReader( - SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java index ee545107019..9922d809513 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java @@ -64,7 +64,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getWordSet( IOUtils.getDecodingReader( - LatvianAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> LatvianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java index a002429c205..f182e43c3a0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java @@ -70,7 +70,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet( IOUtils.getDecodingReader( - SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java index a949bae9364..64e837eccc7 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java @@ -66,7 +66,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet( IOUtils.getDecodingReader( - SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java index a68f89315a1..0c6223f2e22 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java @@ -65,7 +65,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet( IOUtils.getDecodingReader( - SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java index 475556f5fa2..a965ac01131 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java @@ -53,7 +53,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet( IOUtils.getDecodingReader( - SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java index ede41fcc63c..93736a1eaf3 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java @@ -66,7 +66,8 @@ private static class DefaultSetHolder { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet( IOUtils.getDecodingReader( - SnowballFilter.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + () -> SnowballFilter.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java index f2e17dd940b..c60520c192d 100644 --- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java +++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java @@ -87,7 +87,8 @@ static CharArraySet loadDefaultStopWordSet() throws IOException { return CharArraySet.unmodifiableSet( WordlistLoader.getWordSet( IOUtils.getDecodingReader( - SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), + () -> SmartChineseAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8), STOPWORD_FILE_COMMENT)); } } diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java index 89d5a426597..f03edc56b45 100644 --- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java +++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java @@ -76,7 +76,8 @@ private static class DefaultsHolder { DEFAULT_STOP_SET = WordlistLoader.getWordSet( IOUtils.getDecodingReader( - PolishAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), + () -> PolishAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE), + StandardCharsets.UTF_8), "#"); } catch (IOException ex) { // default set should always be present as it is part of the diff --git a/lucene/core/src/java/org/apache/lucene/util/IOUtils.java b/lucene/core/src/java/org/apache/lucene/util/IOUtils.java index fc675d4d723..af17272b024 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IOUtils.java +++ b/lucene/core/src/java/org/apache/lucene/util/IOUtils.java @@ -39,6 +39,7 @@ import java.util.LinkedHashMap; import java.util.Map; import java.util.Objects; +import java.util.function.Supplier; import org.apache.lucene.store.Directory; /** @@ -156,6 +157,27 @@ public static Reader getDecodingReader(InputStream stream, Charset charSet) { return new BufferedReader(new InputStreamReader(stream, charSetDecoder)); } + /** + * Opens a Reader for the stream supplied by the provided {@link Supplier} using a {@link + * CharsetDecoder}. Unlike Java's defaults this reader will throw an exception if your it detects + * the read charset doesn't match the expected {@link Charset}. + * + *

Decoding readers are useful to load configuration files, stopword lists or synonym files to + * detect character set problems. However, it's not recommended to use as a common purpose reader. + * + * @param streamSupplier A supplier of the input stream for docoding. + * @param charSet the expected charset + * @return a reader to read the given file + */ + public static Reader getDecodingReader(Supplier streamSupplier, Charset charSet) + throws IOException { + var is = streamSupplier.get(); + if (is == null) { + throw new IOException("The input stream for decoding must not be null."); + } + return getDecodingReader(is, charSet); + } + /** * Opens a Reader for the given resource using a {@link CharsetDecoder}. Unlike Java's defaults * this reader will throw an exception if your it detects the read charset doesn't match the @@ -168,7 +190,10 @@ public static Reader getDecodingReader(InputStream stream, Charset charSet) { * @param resource the resource name to load * @param charSet the expected charset * @return a reader to read the given file + * @deprecated This method is caller sensitive and may not work with the module system. Please use + * {@link #getDecodingReader(Supplier, Charset)} instead. */ + @Deprecated public static Reader getDecodingReader(Class clazz, String resource, Charset charSet) throws IOException { InputStream stream = null; diff --git a/lucene/expressions/src/java/org/apache/lucene/expressions/js/JavascriptCompiler.java b/lucene/expressions/src/java/org/apache/lucene/expressions/js/JavascriptCompiler.java index 11744ca6a68..90d10ed356c 100644 --- a/lucene/expressions/src/java/org/apache/lucene/expressions/js/JavascriptCompiler.java +++ b/lucene/expressions/src/java/org/apache/lucene/expressions/js/JavascriptCompiler.java @@ -732,8 +732,9 @@ static int findSingleQuoteStringEnd(String text, int start) { final Properties props = new Properties(); try (Reader in = IOUtils.getDecodingReader( - JavascriptCompiler.class, - JavascriptCompiler.class.getSimpleName() + ".properties", + () -> + JavascriptCompiler.class.getResourceAsStream( + JavascriptCompiler.class.getSimpleName() + ".properties"), StandardCharsets.UTF_8)) { props.load(in); }