diff --git a/LICENSE b/LICENSE
index 27da2a08f..58a20c820 100644
--- a/LICENSE
+++ b/LICENSE
@@ -230,6 +230,41 @@ The following license applies to the Snowball stemmers:
 	OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+The following license applies to the bundled stopword lists in
+opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword.
+These lists are derived from Apache Lucene, which redistributes them from
+the Snowball project; the Bulgarian list (bg.txt) was created by Jacques
+Savoy (http://members.unine.ch/jacques.savoy/clef/index.html). They are
+distributed under the BSD license:
+
+	Copyright (c) 2001, Dr Martin Porter
+	Copyright (c) 2002, Richard Boulton
+	Copyright (c) Jacques Savoy
+	All rights reserved.
+
+	Redistribution and use in source and binary forms, with or without
+	modification, are permitted provided that the following conditions are met:
+
+	    * Redistributions of source code must retain the above copyright notice,
+	    * this list of conditions and the following disclaimer.
+	    * Redistributions in binary form must reproduce the above copyright
+	    * notice, this list of conditions and the following disclaimer in the
+	    * documentation and/or other materials provided with the distribution.
+	    * Neither the name of the copyright holders nor the names of its contributors
+	    * may be used to endorse or promote products derived from this software
+	    * without specific prior written permission.
+
+	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+	DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+	FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+	DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+	SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+	CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+	OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 The following license applies to the Wordpiece tokenizer implementation:
 
     The MIT License (MIT)
diff --git a/NOTICE b/NOTICE
index 0a9b44508..bef1f6f2a 100644
--- a/NOTICE
+++ b/NOTICE
@@ -14,6 +14,19 @@ http://snowball.tartarus.org/
 
 ============================================================================
 
+The bundled stopword lists in
+opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword
+are derived from Apache Lucene
+(https://github.com/apache/lucene/tree/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis),
+which in turn distributes them under the BSD license from the Snowball project
+(https://snowballstem.org/license.html). The Bulgarian list (bg.txt) is the
+Lucene per-language Bulgarian stopwords file originally created by Jacques
+Savoy (http://members.unine.ch/jacques.savoy/clef/index.html) and also
+distributed under the BSD license. The original upstream license and
+attribution headers are preserved verbatim at the top of each bundled file.
+
+============================================================================
+
 The Wordpiece tokenizer in opennlp-tools/main/java/opennlp/tools/tokenize
 is taken from https://github.com/robrua/easy-bert licensed under
 
diff --git a/README.md b/README.md
index 10a5ad074..d9c326434 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ The Apache OpenNLP library is a machine learning based toolkit for the processin
 
 This toolkit is written completely in Java and provides support for common NLP tasks, such as tokenization,
  sentence segmentation, part-of-speech tagging, named entity extraction, chunking, parsing,
-  coreference resolution, language detection and more!
+  coreference resolution, language detection, stopword filtering (with bundled lists for 11 languages) and more!
 
 These tasks are usually required to build more advanced text processing services.
 
diff --git a/opennlp-api/src/main/java/opennlp/tools/stopword/StopwordFilter.java b/opennlp-api/src/main/java/opennlp/tools/stopword/StopwordFilter.java
new file mode 100644
index 000000000..abdfe7b16
--- /dev/null
+++ b/opennlp-api/src/main/java/opennlp/tools/stopword/StopwordFilter.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.stopword;
+
+import java.util.Set;
+
+/**
+ * A pluggable filter that decides whether a token (or a sequence of tokens)
+ * is a stopword that should be removed during downstream text processing.
+ * <p>
+ * Implementations may be backed by a static bundled list, a user-supplied
+ * file, an in-memory data structure, or any other source.
+ * Both single-token and multi-token (n-gram) membership tests are supported.
+ *
+ * @see opennlp.tools.util.LanguageCodeValidator
+ */
+public interface StopwordFilter {
+
+  /**
+   * Checks whether the given token is a single-token stopword.
+   * Equivalent to {@code isStopword(new String[] { token.toString() })} when
+   * {@code token} is non-{@code null}.
+   *
+   * @param token The token to test. May be {@code null}, in which case
+   *     implementations should return {@code false}.
+   * @return {@code true} if {@code token} is registered as a single-token
+   *     stopword, {@code false} otherwise.
+   */
+  boolean isStopword(final CharSequence token);
+
+  /**
+   * Checks whether the given sequence of tokens is a multi-token stopword
+   * (n-gram). For a single token this is equivalent to
+   * {@link #isStopword(CharSequence)}.
+   *
+   * @param tokens The tokens to test as one entry. May be {@code null} or
+   *     empty, in which case implementations should return {@code false}.
+   * @return {@code true} if the sequence is registered as a stopword,
+   *     {@code false} otherwise.
+   */
+  boolean isStopword(final String... tokens);
+
+  /**
+   * Returns a copy of {@code tokens} with stopword matches removed,
+   * preserving the input order.
+   * <p>
+   * Implementations should honor both 1-gram and n-gram entries. A
+   * recommended strategy is a greedy left-to-right window scan: at each
+   * position try the longest registered window first; if it matches, skip
+   * those tokens; otherwise advance by one and keep the current token.
+   * Implementations that do not support n-gram entries may fall back to
+   * 1-gram filtering.
+   *
+   * @param tokens The input token array. Must not be {@code null}.
+   *     Individual array elements may be {@code null} and are kept as-is.
+   * @return A new array containing the surviving tokens. Never {@code null}.
+   * @throws IllegalArgumentException if {@code tokens} is {@code null}.
+   */
+  String[] filter(final String[] tokens);
+
+  /**
+   * @return {@code true} if this filter performs case-sensitive matching;
+   *     {@code false} if matching is case-insensitive.
+   */
+  boolean isCaseSensitive();
+
+  /**
+   * Returns an unmodifiable snapshot of the registered single-token
+   * stopwords. Multi-token (n-gram) entries are not included in this view
+   * and must be tested via {@link #isStopword(String...)}.
+   * <p>
+   * Attempts to mutate the returned {@link Set} will fail.
+   *
+   * @return An unmodifiable {@link Set} of stopwords. Never {@code null}.
+   * @throws UnsupportedOperationException if a caller attempts to add to,
+   *     remove from, or otherwise mutate the returned {@link Set}.
+   */
+  Set<String> stopwords();
+}
diff --git a/opennlp-core/opennlp-cli/src/main/java/opennlp/tools/cmdline/CLI.java b/opennlp-core/opennlp-cli/src/main/java/opennlp/tools/cmdline/CLI.java
index 589212189..e09be4a8f 100644
--- a/opennlp-core/opennlp-cli/src/main/java/opennlp/tools/cmdline/CLI.java
+++ b/opennlp-core/opennlp-cli/src/main/java/opennlp/tools/cmdline/CLI.java
@@ -77,6 +77,7 @@
 import opennlp.tools.cmdline.sentiment.SentimentCrossValidatorTool;
 import opennlp.tools.cmdline.sentiment.SentimentEvaluatorTool;
 import opennlp.tools.cmdline.sentiment.SentimentTrainerTool;
+import opennlp.tools.cmdline.stopword.StopwordFilterTool;
 import opennlp.tools.cmdline.tokenizer.DictionaryDetokenizerTool;
 import opennlp.tools.cmdline.tokenizer.SimpleTokenizerTool;
 import opennlp.tools.cmdline.tokenizer.TokenizerConverterTool;
@@ -130,6 +131,9 @@ public final class CLI {
     tools.add(new TokenizerConverterTool());
     tools.add(new DictionaryDetokenizerTool());
 
+    // Stopword filter
+    tools.add(new StopwordFilterTool());
+
     // Sentence detector
     tools.add(new SentenceDetectorTool());
     tools.add(new SentenceDetectorTrainerTool());
diff --git a/opennlp-core/opennlp-cli/src/main/java/opennlp/tools/cmdline/stopword/StopwordFilterTool.java b/opennlp-core/opennlp-cli/src/main/java/opennlp/tools/cmdline/stopword/StopwordFilterTool.java
new file mode 100644
index 000000000..39700dfc5
--- /dev/null
+++ b/opennlp-core/opennlp-cli/src/main/java/opennlp/tools/cmdline/stopword/StopwordFilterTool.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.stopword;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.PrintWriter;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.InvalidPathException;
+import java.nio.file.NoSuchFileException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import opennlp.tools.cmdline.BasicCmdLineTool;
+import opennlp.tools.cmdline.CLI;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.stopword.StopwordFilter;
+import opennlp.tools.stopword.StopwordLists;
+
+/**
+ * A command line tool that filters stop words from whitespace-separated
+ * tokens read on standard input and prints the kept tokens to standard
+ * output, one input line per output line.
+ *
+ * <p>Usage: {@code opennlp StopwordFilter <lang|file>}. The single argument is
+ * either an ISO 639 language code matching one of the bundled lists, or a path
+ * to a custom stopword list file (one entry per line, {@code #} comments and
+ * blank lines ignored, loaded case-insensitively). The tokens to filter are
+ * always read from standard input. A bundled language code takes precedence;
+ * to force loading a file whose name happens to be a language code, qualify it
+ * with a path (e.g. {@code ./en}).
+ */
+public final class StopwordFilterTool extends BasicCmdLineTool {
+
+  @Override
+  public String getShortDescription() {
+    return "filters stop words from tokens read on stdin";
+  }
+
+  @Override
+  public String getHelp() {
+    return "Usage: " + CLI.CMD + " " + getName() + " <lang|file>\n"
+        + "  <lang> ISO 639 code of a bundled list; supported: "
+        + StopwordLists.supportedLanguages() + "\n"
+        + "  <file> path to a custom stopword list (one entry per line; "
+        + "'#' comments and blank lines ignored)";
+  }
+
+  @Override
+  public boolean hasParams() {
+    return true;
+  }
+
+  @Override
+  public void run(final String[] args) {
+    if (args.length != 1) {
+      System.out.println(getHelp());
+      return;
+    }
+
+    final StopwordFilter filter = resolveFilter(args[0]);
+
+    try (BufferedReader reader = new BufferedReader(
+        new InputStreamReader(System.in, StandardCharsets.UTF_8));
+         PrintWriter writer = new PrintWriter(
+             new java.io.OutputStreamWriter(System.out, StandardCharsets.UTF_8))) {
+
+      String line;
+      while ((line = reader.readLine()) != null) {
+        if (line.isEmpty()) {
+          writer.println();
+          continue;
+        }
+        final String[] tokens = line.split("\\s+");
+        final String[] kept = filter.filter(tokens);
+        writer.println(String.join(" ", kept));
+      }
+
+      writer.flush();
+    } catch (final IOException e) {
+      throw new TerminateToolException(1, "Error reading from stdin: " + e.getMessage(), e);
+    }
+  }
+
+  /**
+   * Resolves the {@code <lang|file>} argument to a {@link StopwordFilter}. A
+   * bundled language code is preferred; otherwise the argument is treated as a
+   * path to a custom stopword list file loaded via
+   * {@link StopwordLists#load(InputStream, java.nio.charset.Charset, boolean)}.
+   */
+  private static StopwordFilter resolveFilter(final String source) {
+    final StopwordFilter bundled = tryBundled(source);
+    if (bundled != null) {
+      return bundled;
+    }
+
+    final Path path;
+    try {
+      path = Paths.get(source);
+    } catch (final InvalidPathException e) {
+      throw new TerminateToolException(1, neitherMessage(source));
+    }
+
+    try (InputStream in = Files.newInputStream(path)) {
+      return StopwordLists.load(in, StandardCharsets.UTF_8, false);
+    } catch (final NoSuchFileException e) {
+      throw new TerminateToolException(1, neitherMessage(source));
+    } catch (final IOException e) {
+      throw new TerminateToolException(1,
+          "Error reading stopword list file '" + source + "': " + e.getMessage(), e);
+    }
+  }
+
+  /**
+   * @return A bundled {@link StopwordFilter} for {@code code}, or {@code null}
+   *     if {@code code} is not a supported bundled ISO 639 language code.
+   */
+  private static StopwordFilter tryBundled(final String code) {
+    try {
+      return StopwordLists.forLanguage(code);
+    } catch (final IllegalArgumentException e) {
+      return null;
+    }
+  }
+
+  private static String neitherMessage(final String source) {
+    return "'" + source + "' is neither a supported language code "
+        + StopwordLists.supportedLanguages() + " nor an existing file.";
+  }
+}
diff --git a/opennlp-core/opennlp-cli/src/test/java/opennlp/tools/cmdline/stopword/StopwordFilterToolTest.java b/opennlp-core/opennlp-cli/src/test/java/opennlp/tools/cmdline/stopword/StopwordFilterToolTest.java
new file mode 100644
index 000000000..fe997f8d4
--- /dev/null
+++ b/opennlp-core/opennlp-cli/src/test/java/opennlp/tools/cmdline/stopword/StopwordFilterToolTest.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.stopword;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import opennlp.tools.cmdline.TerminateToolException;
+
+public class StopwordFilterToolTest {
+
+  private InputStream originalIn;
+  private PrintStream originalOut;
+  private ByteArrayOutputStream capturedOut;
+
+  @BeforeEach
+  void redirectStreams() {
+    originalIn = System.in;
+    originalOut = System.out;
+    capturedOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(capturedOut, true, StandardCharsets.UTF_8));
+  }
+
+  @AfterEach
+  void restoreStreams() {
+    System.setIn(originalIn);
+    System.setOut(originalOut);
+  }
+
+  @Test
+  void filtersEnglishStopwordsFromStdin() {
+    final String in = "the quick brown fox\n";
+    System.setIn(new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8)));
+
+    StopwordFilterTool tool = new StopwordFilterTool();
+    tool.run(new String[] {"en"});
+
+    String out = capturedOut.toString(StandardCharsets.UTF_8);
+    int idxQuick = out.indexOf("quick");
+    int idxBrown = out.indexOf("brown");
+    int idxFox = out.indexOf("fox");
+
+    Assertions.assertTrue(idxQuick >= 0, "Expected 'quick' in output, was: " + out);
+    Assertions.assertTrue(idxBrown > idxQuick, "Expected 'brown' after 'quick' in output, was: " + out);
+    Assertions.assertTrue(idxFox > idxBrown, "Expected 'fox' after 'brown' in output, was: " + out);
+    Assertions.assertFalse(out.contains("the"),
+        "Did not expect 'the' to appear in output, was: " + out);
+  }
+
+  @Test
+  void printsHelpWhenNoArgs() {
+    // empty stdin in case run() reads it
+    System.setIn(new ByteArrayInputStream(new byte[0]));
+
+    StopwordFilterTool tool = new StopwordFilterTool();
+    tool.run(new String[] {});
+
+    String out = capturedOut.toString(StandardCharsets.UTF_8);
+    Assertions.assertTrue(out.contains("Usage") || out.contains("lang"),
+        "Expected help message containing 'Usage' or 'lang', was: " + out);
+  }
+
+  @Test
+  void unknownLanguageOrFileThrows() {
+    System.setIn(new ByteArrayInputStream(new byte[0]));
+
+    // "xx" is neither a supported bundled code nor an existing file, so the
+    // tool reports a terminate error rather than silently doing nothing.
+    StopwordFilterTool tool = new StopwordFilterTool();
+    Assertions.assertThrows(TerminateToolException.class,
+        () -> tool.run(new String[] {"xx"}));
+  }
+
+  @Test
+  void filtersUsingCustomListFile(@TempDir Path tmp) throws IOException {
+    final Path list = tmp.resolve("custom-stopwords.txt");
+    Files.write(list, List.of("# custom list", "brown", "fox"), StandardCharsets.UTF_8);
+
+    System.setIn(new ByteArrayInputStream(
+        "the quick brown fox".getBytes(StandardCharsets.UTF_8)));
+
+    StopwordFilterTool tool = new StopwordFilterTool();
+    tool.run(new String[] {list.toString()});
+
+    final String out = capturedOut.toString(StandardCharsets.UTF_8).trim();
+    // The custom list drops "brown" and "fox"; "the"/"quick" are kept since
+    // they are not in this list.
+    Assertions.assertEquals("the quick", out);
+  }
+}
diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/stopword/DictionaryStopwordFilter.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/stopword/DictionaryStopwordFilter.java
new file mode 100644
index 000000000..13d857591
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/stopword/DictionaryStopwordFilter.java
@@ -0,0 +1,418 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.stopword;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.UncheckedIOException;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+import opennlp.tools.commons.ThreadSafe;
+import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.util.StringList;
+
+/**
+ * An immutable, thread-safe {@link StopwordFilter} backed by an OpenNLP
+ * {@link Dictionary}.
+ * <p>
+ * The backing store supports both 1-gram and n-gram entries. Multi-word
+ * entries are queried via {@link #isStopword(String...)}; the
+ * {@link #filter(String[])} method performs a greedy left-to-right window
+ * scan, preferring the longest registered match at each position.
+ * <p>
+ * Instances are constructed once and never modified afterwards. Use the
+ * {@link Builder} ({@link #builder()}) to assemble a filter from one or
+ * more sources (programmatic entries, an input stream, an existing
+ * {@link Dictionary}), or the public constructors for the common cases.
+ * <p>
+ * <strong>Thread-safety:</strong> instances are immutable after
+ * construction and may be shared freely across threads without external
+ * synchronization. All fields are {@code final}; the only mutation of the
+ * backing {@link Dictionary} happens inside the constructor / builder before
+ * the instance is published.
+ */
+@ThreadSafe
+public final class DictionaryStopwordFilter implements StopwordFilter {
+
+  private static final String COMMENT_PREFIX = "#";
+
+  private final Dictionary backing;
+
+  /**
+   * Loads a stopword list from the given input stream and freezes it into
+   * an immutable filter.
+   * <p>
+   * Format: UTF-8 (or the supplied {@link Charset}), one entry per line.
+   * Whitespace-separated tokens on the same line form one multi-word entry.
+   * Blank lines and lines starting with {@code #} are skipped.
+   *
+   * @param in The input stream to read from. Must not be {@code null}.
+   * @param cs The {@link Charset} to decode with. Must not be {@code null}.
+   * @param caseSensitive Whether matching is case-sensitive.
+   * @throws IllegalArgumentException if {@code in} or {@code cs} is
+   *     {@code null}.
+   * @throws IOException Thrown if an IO error occurs while reading.
+   */
+  public DictionaryStopwordFilter(final InputStream in, final Charset cs,
+                                  final boolean caseSensitive) throws IOException {
+    if (in == null) {
+      throw new IllegalArgumentException("in must not be null");
+    }
+    if (cs == null) {
+      throw new IllegalArgumentException("cs must not be null");
+    }
+    this.backing = parseStream(in, cs, caseSensitive);
+  }
+
+  /**
+   * Creates an immutable filter from a defensive copy of {@code source}.
+   * Subsequent mutation of {@code source} does not affect this filter.
+   *
+   * @param source The dictionary whose contents seed the filter. Must not
+   *     be {@code null}.
+   * @throws IllegalArgumentException if {@code source} is {@code null}.
+   */
+  public DictionaryStopwordFilter(final Dictionary source) {
+    if (source == null) {
+      throw new IllegalArgumentException("source must not be null");
+    }
+    final Dictionary copy = new Dictionary(source.isCaseSensitive());
+    for (final StringList entry : source) {
+      copy.put(entry);
+    }
+    this.backing = copy;
+  }
+
+  /**
+   * @return A new {@link Builder} that assembles a {@link DictionaryStopwordFilter}.
+   */
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  /**
+   * Convenience factory equivalent to
+   * {@link #DictionaryStopwordFilter(InputStream, Charset, boolean)} but
+   * wrapping any {@link IOException} thrown during reading in an
+   * {@link UncheckedIOException}. Useful in contexts where a checked
+   * exception is inconvenient (e.g. lambdas, static initializers).
+   *
+   * @param in The input stream. Must not be {@code null}.
+   * @param cs The charset. Must not be {@code null}.
+   * @param caseSensitive Whether matching is case-sensitive.
+   * @return A new filter loaded from {@code in}.
+   * @throws IllegalArgumentException if {@code in} or {@code cs} is
+   *     {@code null}.
+   * @throws UncheckedIOException if an IO error occurs while reading from
+   *     {@code in}.
+   */
+  public static DictionaryStopwordFilter loadUnchecked(final InputStream in,
+                                                       final Charset cs,
+                                                       final boolean caseSensitive) {
+    try {
+      return new DictionaryStopwordFilter(in, cs, caseSensitive);
+    } catch (final IOException e) {
+      throw new UncheckedIOException(e);
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * @param token The token to test. May be {@code null}, in which case this
+   *     method returns {@code false}.
+   * @return {@code true} if {@code token} is registered as a single-token
+   *     stopword, {@code false} otherwise.
+   */
+  @Override
+  public boolean isStopword(final CharSequence token) {
+    if (token == null) {
+      return false;
+    }
+    return backing.contains(new StringList(token.toString()));
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * @param tokens The tokens to test as one entry. May be {@code null} or
+   *     empty, in which case this method returns {@code false}.
+   * @return {@code true} if the sequence is registered as a stopword,
+   *     {@code false} otherwise.
+   */
+  @Override
+  public boolean isStopword(final String... tokens) {
+    if (tokens == null || tokens.length == 0) {
+      return false;
+    }
+    for (final String t : tokens) {
+      if (t == null) {
+        return false;
+      }
+    }
+    return backing.contains(new StringList(tokens));
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * <p>Performs a greedy left-to-right window scan: at each position the
+   * longest registered window is tried first. If it matches, those tokens
+   * are dropped; otherwise the position advances by one and the current
+   * token is kept. {@code null} elements never participate in a window and
+   * are kept as-is.
+   *
+   * @throws IllegalArgumentException if {@code tokens} is {@code null}.
+   */
+  @Override
+  public String[] filter(final String[] tokens) {
+    if (tokens == null) {
+      throw new IllegalArgumentException("tokens must not be null");
+    }
+    final int maxWindow = backing.getMaxTokenCount();
+    final List<String> kept = new ArrayList<>(tokens.length);
+    int i = 0;
+    while (i < tokens.length) {
+      int matched = 0;
+      // Try the longest possible window first, decreasing down to 1.
+      for (int w = Math.min(maxWindow, tokens.length - i); w >= 1; w--) {
+        if (containsAnyNullInWindow(tokens, i, w)) {
+          continue;
+        }
+        final String[] window = Arrays.copyOfRange(tokens, i, i + w);
+        if (backing.contains(new StringList(window))) {
+          matched = w;
+          break;
+        }
+      }
+      if (matched > 0) {
+        i += matched;
+      } else {
+        kept.add(tokens[i]);
+        i++;
+      }
+    }
+    return kept.toArray(new String[0]);
+  }
+
+  private static boolean containsAnyNullInWindow(final String[] tokens,
+                                                 final int start, final int len) {
+    for (int k = 0; k < len; k++) {
+      if (tokens[start + k] == null) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  @Override
+  public boolean isCaseSensitive() {
+    return backing.isCaseSensitive();
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * @return An unmodifiable {@link Set} of single-token stopwords. Never
+   *     {@code null}.
+   * @throws UnsupportedOperationException if a caller attempts to mutate the
+   *     returned {@link Set}.
+   */
+  @Override
+  public Set<String> stopwords() {
+    return Collections.unmodifiableSet(backing.asStringSet());
+  }
+
+  private static Dictionary parseStream(final InputStream in, final Charset cs,
+                                        final boolean caseSensitive) throws IOException {
+    final Dictionary dict = new Dictionary(caseSensitive);
+    try (Reader reader = new InputStreamReader(in, cs);
+         BufferedReader lineReader = new BufferedReader(reader)) {
+      String line;
+      while ((line = lineReader.readLine()) != null) {
+        final String trimmed = line.trim();
+        if (trimmed.isEmpty() || trimmed.startsWith(COMMENT_PREFIX)) {
+          continue;
+        }
+        final String[] tokens = trimmed.split("\\s+");
+        if (tokens.length > 0) {
+          dict.put(new StringList(tokens));
+        }
+      }
+    }
+    return dict;
+  }
+
+  /**
+   * Fluent builder for {@link DictionaryStopwordFilter}. Accumulates
+   * {@code add} / {@code remove} operations together with a case-sensitivity
+   * setting; {@link #build()} produces an immutable filter that reflects the
+   * accumulated state.
+   * <p>
+   * Operations are applied at {@link #build()} time in the order
+   * "all adds, then all removes". Within each phase, insertion order is
+   * preserved but is not externally observable.
+   */
+  public static final class Builder {
+
+    private final List<String[]> addEntries = new ArrayList<>();
+    private final List<String[]> removeEntries = new ArrayList<>();
+    private boolean caseSensitive;
+
+    private Builder() {
+      // use DictionaryStopwordFilter.builder()
+    }
+
+    /**
+     * @param cs Whether the resulting filter performs case-sensitive matching.
+     *           Defaults to {@code false}.
+     * @return This builder.
+     */
+    public Builder caseSensitive(final boolean cs) {
+      this.caseSensitive = cs;
+      return this;
+    }
+
+    /**
+     * Adds one entry (1-gram or n-gram).
+     *
+     * @param tokens The tokens forming the entry. Must not be {@code null}
+     *               or empty.
+     * @return This builder.
+     * @throws IllegalArgumentException if {@code tokens} is {@code null} or
+     *     empty.
+     */
+    public Builder add(final String... tokens) {
+      if (tokens == null || tokens.length == 0) {
+        throw new IllegalArgumentException("tokens must not be null or empty");
+      }
+      addEntries.add(tokens.clone());
+      return this;
+    }
+
+    /**
+     * Adds a bulk of entries.
+     *
+     * @param entries The entries to add. Must not be {@code null}.
+     * @return This builder.
+     * @throws IllegalArgumentException if {@code entries} is {@code null}, or
+     *     if any element is {@code null} or empty.
+     */
+    public Builder addAll(final Collection<String[]> entries) {
+      if (entries == null) {
+        throw new IllegalArgumentException("entries must not be null");
+      }
+      for (final String[] entry : entries) {
+        add(entry);
+      }
+      return this;
+    }
+
+    /**
+     * Schedules removal of one entry (applied after all adds at
+     * {@link #build()} time).
+     *
+     * @param tokens The tokens forming the entry to remove.
+     * @return This builder.
+     * @throws IllegalArgumentException if {@code tokens} is {@code null} or
+     *     empty.
+     */
+    public Builder remove(final String... tokens) {
+      if (tokens == null || tokens.length == 0) {
+        throw new IllegalArgumentException("tokens must not be null or empty");
+      }
+      removeEntries.add(tokens.clone());
+      return this;
+    }
+
+    /**
+     * Schedules a bulk of removals.
+     *
+     * @param entries The entries to remove. Must not be {@code null}.
+     * @return This builder.
+     * @throws IllegalArgumentException if {@code entries} is {@code null}, or
+     *     if any element is {@code null} or empty.
+     */
+    public Builder removeAll(final Collection<String[]> entries) {
+      if (entries == null) {
+        throw new IllegalArgumentException("entries must not be null");
+      }
+      for (final String[] entry : entries) {
+        remove(entry);
+      }
+      return this;
+    }
+
+    /**
+     * Reads one-per-line stopword entries from {@code in} (whitespace
+     * separates tokens of a multi-word entry; blank and {@code #}-prefixed
+     * lines are skipped) and schedules them for addition.
+     *
+     * @param in The input stream to read from. Must not be {@code null}.
+     * @param cs The {@link Charset} to decode with. Must not be {@code null}.
+     * @return This builder.
+     * @throws IllegalArgumentException if {@code in} or {@code cs} is
+     *     {@code null}.
+     * @throws IOException If an IO error occurs while reading.
+     */
+    public Builder load(final InputStream in, final Charset cs) throws IOException {
+      if (in == null) {
+        throw new IllegalArgumentException("in must not be null");
+      }
+      if (cs == null) {
+        throw new IllegalArgumentException("cs must not be null");
+      }
+      try (Reader reader = new InputStreamReader(in, cs);
+           BufferedReader lineReader = new BufferedReader(reader)) {
+        String line;
+        while ((line = lineReader.readLine()) != null) {
+          final String trimmed = line.trim();
+          if (trimmed.isEmpty() || trimmed.startsWith(COMMENT_PREFIX)) {
+            continue;
+          }
+          addEntries.add(trimmed.split("\\s+"));
+        }
+      }
+      return this;
+    }
+
+    /**
+     * @return A new immutable {@link DictionaryStopwordFilter} reflecting
+     *     the accumulated state.
+     */
+    public DictionaryStopwordFilter build() {
+      final Dictionary dict = new Dictionary(caseSensitive);
+      for (final String[] entry : addEntries) {
+        dict.put(new StringList(entry));
+      }
+      for (final String[] entry : removeEntries) {
+        dict.remove(new StringList(entry));
+      }
+      return new DictionaryStopwordFilter(dict);
+    }
+  }
+}
diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/stopword/StopwordFilterStream.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/stopword/StopwordFilterStream.java
new file mode 100644
index 000000000..70a2054d1
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/stopword/StopwordFilterStream.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.stopword;
+
+import java.io.IOException;
+
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * A {@link FilterObjectStream} which removes stopwords from each
+ * {@code String[]} sample produced by an underlying
+ * {@link ObjectStream ObjectStream&lt;String[]&gt;}.
+ * <p>
+ * Stopword membership is decided by the supplied {@link StopwordFilter};
+ * filtering is delegated to {@link StopwordFilter#filter(String[])} so the
+ * relative order of surviving tokens within a sample is preserved.
+ * <p>
+ * {@link #reset()} and {@link #close()} are inherited from
+ * {@link FilterObjectStream} and simply forward to the wrapped stream.
+ */
+public final class StopwordFilterStream extends FilterObjectStream<String[], String[]> {
+
+  private final StopwordFilter filter;
+
+  /**
+   * Initializes a {@link StopwordFilterStream}.
+   *
+   * @param samples The {@link ObjectStream} of token arrays to filter.
+   *                Must not be {@code null}.
+   * @param filter  The {@link StopwordFilter} used to drop stopwords.
+   *                Must not be {@code null}.
+   * @throws IllegalArgumentException if {@code samples} or {@code filter} is
+   *                                  {@code null}.
+   */
+  public StopwordFilterStream(final ObjectStream<String[]> samples,
+                              final StopwordFilter filter) {
+    super(requireNonNullArg(samples, "samples"));
+    requireNonNullArg(filter, "filter");
+    this.filter = filter;
+  }
+
+  /**
+   * Reads the next sample from the wrapped stream and returns it with
+   * stopwords removed. Returns {@code null} once the underlying stream is
+   * exhausted.
+   *
+   * @return The filtered sample or {@code null} at the end of the stream.
+   * @throws IOException If the underlying stream throws an I/O error.
+   */
+  @Override
+  public String[] read() throws IOException {
+    final String[] in = samples.read();
+    return in == null ? null : filter.filter(in);
+  }
+
+  private static <T> T requireNonNullArg(final T value, final String name) {
+    if (value == null) {
+      throw new IllegalArgumentException(name + " must not be null");
+    }
+    return value;
+  }
+}
diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/stopword/StopwordFilteringTokenizer.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/stopword/StopwordFilteringTokenizer.java
new file mode 100644
index 000000000..a28a41ac6
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/stopword/StopwordFilteringTokenizer.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.stopword;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.commons.ThreadSafe;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.util.Span;
+
+/**
+ * A {@link Tokenizer} decorator which delegates tokenization to a wrapped
+ * {@link Tokenizer} and then removes any tokens identified as stopwords by
+ * the supplied {@link StopwordFilter}.
+ * <p>
+ * Both {@link #tokenize(String)} and {@link #tokenizePos(String)} apply the
+ * filter using the same greedy longest-match window scan, so single-token
+ * (1-gram) and multi-token (n-gram) stopword entries are dropped identically
+ * across {@link #tokenize(String)}, {@link #tokenizePos(String)} and
+ * {@link StopwordFilterStream}. For {@link #tokenizePos(String)} the
+ * {@link Span Spans} covering a matched entry are dropped while the offsets of
+ * the remaining spans are kept intact (they continue to refer to positions in
+ * the original input string).
+ * <p>
+ * Instances are immutable and therefore safe for concurrent use provided that
+ * both the wrapped {@link Tokenizer} and the {@link StopwordFilter} are
+ * thread-safe. {@link DictionaryStopwordFilter} is unconditionally
+ * thread-safe; combined with a thread-safe delegate tokenizer
+ * (e.g. {@code SimpleTokenizer.INSTANCE}) the resulting decorator is
+ * thread-safe with no further synchronization required.
+ */
+@ThreadSafe
+public final class StopwordFilteringTokenizer implements Tokenizer {
+
+  private final Tokenizer delegate;
+  private final StopwordFilter filter;
+
+  /**
+   * Initializes a {@link StopwordFilteringTokenizer}.
+   *
+   * @param delegate The underlying {@link Tokenizer} that produces the raw
+   *                 tokens. Must not be {@code null}.
+   * @param filter   The {@link StopwordFilter} which decides whether a token
+   *                 is a stopword. Must not be {@code null}.
+   * @throws IllegalArgumentException if {@code delegate} or {@code filter} is
+   *                                  {@code null}.
+   */
+  public StopwordFilteringTokenizer(final Tokenizer delegate, final StopwordFilter filter) {
+    if (delegate == null) {
+      throw new IllegalArgumentException("delegate must not be null");
+    }
+    if (filter == null) {
+      throw new IllegalArgumentException("filter must not be null");
+    }
+    this.delegate = delegate;
+    this.filter = filter;
+  }
+
+  /**
+   * Tokenizes the supplied string with the wrapped {@link Tokenizer} and then
+   * removes any tokens which the {@link StopwordFilter} considers a stopword.
+   *
+   * @param s The string to be tokenized.
+   * @return  The remaining tokens in their original order.
+   */
+  @Override
+  public String[] tokenize(final String s) {
+    return filter.filter(delegate.tokenize(s));
+  }
+
+  /**
+   * Computes token spans with the wrapped {@link Tokenizer} and then drops
+   * the spans covering any stopword entry according to the
+   * {@link StopwordFilter}. A greedy left-to-right window scan mirrors
+   * {@link StopwordFilter#filter(String[])}: at each position the longest
+   * window of consecutive spans whose covered texts form a registered entry is
+   * removed; otherwise the current span is kept and the scan advances by one.
+   * This way multi-word (n-gram) entries are dropped here exactly as they are
+   * by {@link #tokenize(String)}. The relative order and the offsets of the
+   * surviving spans are preserved.
+   *
+   * @param s The string to be tokenized.
+   * @return  The remaining {@link Span Spans} in their original order.
+   */
+  @Override
+  public Span[] tokenizePos(final String s) {
+    final Span[] spans = delegate.tokenizePos(s);
+    if (spans == null || spans.length == 0) {
+      return spans;
+    }
+    final List<Span> kept = new ArrayList<>(spans.length);
+    int i = 0;
+    while (i < spans.length) {
+      int matched = 0;
+      // Try the longest possible window first, decreasing down to 1.
+      for (int w = spans.length - i; w >= 1; w--) {
+        final String[] window = new String[w];
+        for (int k = 0; k < w; k++) {
+          window[k] = spans[i + k].getCoveredText(s).toString();
+        }
+        if (filter.isStopword(window)) {
+          matched = w;
+          break;
+        }
+      }
+      if (matched > 0) {
+        i += matched;
+      } else {
+        kept.add(spans[i]);
+        i++;
+      }
+    }
+    return kept.toArray(new Span[0]);
+  }
+}
diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/stopword/StopwordLists.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/stopword/StopwordLists.java
new file mode 100644
index 000000000..59f1c1550
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/stopword/StopwordLists.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.stopword;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UncheckedIOException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashSet;
+import java.util.Locale;
+import java.util.Map;
+import java.util.MissingResourceException;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import opennlp.tools.util.LanguageCodeValidator;
+
+/**
+ * Static factory for {@link StopwordFilter} instances backed by bundled
+ * language-specific stopword resources or caller-supplied input streams.
+ * <p>
+ * Bundled lists ship for the eleven languages enumerated in
+ * <a href="https://issues.apache.org/jira/browse/OPENNLP-660">OPENNLP-660</a>:
+ * Bulgarian (bg), Danish (da), German (de), English (en), Spanish (es),
+ * Finnish (fi), French (fr), Italian (it), Dutch (nl), Portuguese (pt),
+ * Russian (ru). Each list is keyed by its ISO 639-1 two-letter code.
+ */
+public final class StopwordLists {
+
+  private static final String RESOURCE_PATH_PREFIX = "/opennlp/tools/stopword/";
+
+  private static final Set<String> SUPPORTED_LANGUAGES;
+
+  /**
+   * Maps three-letter ISO 639-2/3 codes to their ISO 639-1 two-letter
+   * equivalent. Built once at class-initialization time from the JVM's locale
+   * data (terminologic forms such as {@code nld}, {@code fra}, {@code deu})
+   * plus the ISO 639-2 bibliographic forms ({@code dut}, {@code fre},
+   * {@code ger}) that {@link Locale#getISO3Language()} does not produce, so
+   * that {@link #normalizeToIso6391(String)} resolves codes with a single map
+   * lookup instead of scanning {@link Locale#getAvailableLocales()} on every
+   * call.
+   */
+  private static final Map<String, String> ISO6393_TO_ISO6391;
+
+  /**
+   * Caches the immutable, thread-safe filters loaded from the bundled
+   * resources, keyed by normalized ISO 639-1 code, so that repeated
+   * {@link #forLanguage(String)} calls do not re-read and re-parse the same
+   * classpath resource.
+   */
+  private static final Map<String, StopwordFilter> BUNDLED_CACHE =
+      new ConcurrentHashMap<>();
+
+  static {
+    final Set<String> langs = new LinkedHashSet<>();
+    Collections.addAll(langs,
+        "bg", "da", "de", "en", "es", "fi", "fr", "it", "nl", "pt", "ru");
+    SUPPORTED_LANGUAGES = Collections.unmodifiableSet(langs);
+
+    final Map<String, String> iso3 = new HashMap<>();
+    // ISO 639-2 bibliographic codes that getISO3Language() never returns.
+    iso3.put("dut", "nl"); // Dutch
+    iso3.put("fre", "fr"); // French
+    iso3.put("ger", "de"); // German
+    // Resolve the terminologic three-letter forms once from the JVM locale data.
+    for (final Locale locale : Locale.getAvailableLocales()) {
+      final String lang = locale.getLanguage();
+      if (lang.length() != 2) {
+        continue;
+      }
+      try {
+        final String iso3Lang = locale.getISO3Language();
+        if (!iso3Lang.isEmpty()) {
+          iso3.putIfAbsent(iso3Lang, lang);
+        }
+      } catch (final MissingResourceException ignored) {
+        // locale has no three-letter form; skip it
+      }
+    }
+    ISO6393_TO_ISO6391 = Collections.unmodifiableMap(iso3);
+  }
+
+  private StopwordLists() {
+    // utility class
+  }
+
+  /**
+   * Returns a case-insensitive {@link StopwordFilter} for the given ISO 639
+   * language code. Three-letter codes are normalized to their two-letter
+   * equivalent when a bundled list exists for the latter.
+   *
+   * @param iso639Code The ISO 639-1 or ISO 639-2/3 language code.
+   *     Must not be {@code null}.
+   * @return A {@link StopwordFilter} backed by the bundled resource. The
+   *     returned instance is immutable, thread-safe and cached, so repeated
+   *     calls for the same language return the same shared filter.
+   * @throws IllegalArgumentException if {@code iso639Code} is {@code null},
+   *     is not a valid ISO 639 code, or has no bundled list for this language.
+   * @throws UncheckedIOException if reading the bundled resource fails.
+   */
+  public static StopwordFilter forLanguage(final String iso639Code) {
+    if (iso639Code == null) {
+      throw new IllegalArgumentException("iso639Code must not be null");
+    }
+    LanguageCodeValidator.validateLanguageCode(iso639Code);
+
+    final String normalized = normalizeToIso6391(iso639Code);
+
+    if (!SUPPORTED_LANGUAGES.contains(normalized)) {
+      throw new IllegalArgumentException(
+          "No bundled stopword list for language '" + iso639Code
+              + "'. Supported languages: " + SUPPORTED_LANGUAGES);
+    }
+
+    return BUNDLED_CACHE.computeIfAbsent(normalized, StopwordLists::loadBundled);
+  }
+
+  private static StopwordFilter loadBundled(final String normalized) {
+    final String resource = RESOURCE_PATH_PREFIX + normalized + ".txt";
+    final InputStream in = StopwordLists.class.getResourceAsStream(resource);
+    if (in == null) {
+      throw new IllegalArgumentException(
+          "Bundled stopword resource '" + resource + "' not found on the"
+              + " classpath. Supported languages: " + SUPPORTED_LANGUAGES);
+    }
+    try (InputStream stream = in) {
+      return new DictionaryStopwordFilter(stream, StandardCharsets.UTF_8, false);
+    } catch (final IOException e) {
+      throw new UncheckedIOException(
+          "Failed to load bundled stopword list for '" + normalized + "'", e);
+    }
+  }
+
+  /**
+   * @return An unmodifiable view of the bundled ISO 639-1 codes for which
+   *     stopword lists are shipped.
+   */
+  public static Set<String> supportedLanguages() {
+    return SUPPORTED_LANGUAGES;
+  }
+
+  /**
+   * Loads a stopword filter from a caller-supplied input stream.
+   *
+   * @param in The input stream. Must not be {@code null}.
+   * @param cs The {@link Charset} to decode with. Must not be {@code null}.
+   * @param caseSensitive Whether the resulting filter matches case-sensitively.
+   * @return A {@link StopwordFilter} populated from {@code in}.
+   * @throws IllegalArgumentException if {@code in} or {@code cs} is
+   *     {@code null}.
+   * @throws IOException Thrown if an IO error occurs while reading.
+   */
+  public static StopwordFilter load(final InputStream in, final Charset cs,
+                                    final boolean caseSensitive) throws IOException {
+    if (in == null) {
+      throw new IllegalArgumentException("in must not be null");
+    }
+    if (cs == null) {
+      throw new IllegalArgumentException("cs must not be null");
+    }
+    return new DictionaryStopwordFilter(in, cs, caseSensitive);
+  }
+
+  /**
+   * Normalizes an ISO 639-2/3 three-letter code to its ISO 639-1 two-letter
+   * equivalent when one is available, otherwise returns the (lower-cased)
+   * input unchanged. The caller is responsible for validating the code first
+   * via {@link LanguageCodeValidator#validateLanguageCode(String)}.
+   * <p>
+   * Two-letter inputs are simply lower-cased and returned. Three-letter inputs
+   * are resolved with a single lookup against {@link #ISO6393_TO_ISO6391},
+   * which is precomputed once at class-initialization time (covering both the
+   * terminologic forms produced by {@link Locale#getISO3Language()} and the
+   * ISO 639-2 bibliographic forms {@code dut}, {@code fre} and {@code ger}).
+   * Unresolved codes are returned lower-cased and unchanged.
+   */
+  private static String normalizeToIso6391(final String code) {
+    final String lower = code.toLowerCase(Locale.ROOT);
+    if (lower.length() == 2) {
+      return lower;
+    }
+    return ISO6393_TO_ISO6391.getOrDefault(lower, lower);
+  }
+}
diff --git a/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/bg.txt b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/bg.txt
new file mode 100644
index 000000000..dbf47d565
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/bg.txt
@@ -0,0 +1,194 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Comments were changed from `|` to `#` so that this list can be parsed by OpenNLP's stopword loader.
+а
+аз
+ако
+ала
+бе
+без
+беше
+би
+бил
+била
+били
+било
+близо
+бъдат
+бъде
+бяха
+в
+вас
+ваш
+ваша
+вероятно
+вече
+взема
+ви
+вие
+винаги
+все
+всеки
+всички
+всичко
+всяка
+във
+въпреки
+върху
+г
+ги
+главно
+го
+д
+да
+дали
+до
+докато
+докога
+дори
+досега
+доста
+е
+едва
+един
+ето
+за
+зад
+заедно
+заради
+засега
+затова
+защо
+защото
+и
+из
+или
+им
+има
+имат
+иска
+й
+каза
+как
+каква
+какво
+както
+какъв
+като
+кога
+когато
+което
+които
+кой
+който
+колко
+която
+къде
+където
+към
+ли
+м
+ме
+между
+мен
+ми
+мнозина
+мога
+могат
+може
+моля
+момента
+му
+н
+на
+над
+назад
+най
+направи
+напред
+например
+нас
+не
+него
+нея
+ни
+ние
+никой
+нито
+но
+някои
+някой
+няма
+обаче
+около
+освен
+особено
+от
+отгоре
+отново
+още
+пак
+по
+повече
+повечето
+под
+поне
+поради
+после
+почти
+прави
+пред
+преди
+през
+при
+пък
+първо
+с
+са
+само
+се
+сега
+си
+скоро
+след
+сме
+според
+сред
+срещу
+сте
+съм
+със
+също
+т
+тази
+така
+такива
+такъв
+там
+твой
+те
+тези
+ти
+тн
+то
+това
+тогава
+този
+той
+толкова
+точно
+трябва
+тук
+тъй
+тя
+тях
+у
+харесва
+ч
+че
+често
+чрез
+ще
+щом
+я
diff --git a/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/da.txt b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/da.txt
new file mode 100644
index 000000000..c3608fd52
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/da.txt
@@ -0,0 +1,110 @@
+# From https://snowballstem.org/algorithms/danish/stop.txt
+# This file is distributed under the BSD License.
+# See https://snowballstem.org/license.html
+# Also see https://opensource.org/licenses/bsd-license.html
+#  - Encoding was converted to UTF-8.
+#  - This notice was added.
+#  - Comments were changed from `|` to `#` so that this list can be parsed by OpenNLP's stopword loader.
+#
+
+# A Danish stop word list. Comments begin with vertical bar. Each stop
+# word is at the start of a line.
+
+# This is a ranked list (commonest to rarest) of stopwords derived from
+# a large text sample.
+
+
+og
+i
+jeg
+det
+at
+en
+den
+til
+er
+som
+på
+de
+med
+han
+af
+for
+ikke
+der
+var
+mig
+sig
+men
+et
+har
+om
+vi
+min
+havde
+ham
+hun
+nu
+over
+da
+fra
+du
+ud
+sin
+dem
+os
+op
+man
+hans
+hvor
+eller
+hvad
+skal
+selv
+her
+alle
+vil
+blev
+kunne
+ind
+når
+være
+dog
+noget
+ville
+jo
+deres
+efter
+ned
+skulle
+denne
+end
+dette
+mit
+også
+under
+have
+dig
+anden
+hende
+mine
+alt
+meget
+sit
+sine
+vor
+mod
+disse
+hvis
+din
+nogle
+hos
+blive
+mange
+ad
+bliver
+hendes
+været
+thi
+jer
+sådan
diff --git a/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/de.txt b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/de.txt
new file mode 100644
index 000000000..f297306ee
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/de.txt
@@ -0,0 +1,294 @@
+# From https://snowballstem.org/algorithms/german/stop.txt
+# This file is distributed under the BSD License.
+# See https://snowballstem.org/license.html
+# Also see https://opensource.org/licenses/bsd-license.html
+#  - Encoding was converted to UTF-8.
+#  - This notice was added.
+#  - Comments were changed from `|` to `#` so that this list can be parsed by OpenNLP's stopword loader.
+#
+
+# A German stop word list. Comments begin with vertical bar. Each stop
+# word is at the start of a line.
+
+# The number of forms in this list is reduced significantly by passing it
+# through the German stemmer.
+
+
+aber
+
+alle
+allem
+allen
+aller
+alles
+
+als
+also
+am
+an
+
+ander
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch
+auf
+aus
+bei
+bin
+bis
+bist
+da
+damit
+dann
+
+der
+den
+des
+dem
+die
+das
+
+daß
+
+derselbe
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu
+
+dein
+deine
+deinem
+deinen
+deiner
+deines
+
+denn
+
+derer
+dessen
+
+dich
+dir
+du
+
+dies
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch
+dort
+
+
+durch
+
+ein
+eine
+einem
+einen
+einer
+eines
+
+einig
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal
+
+er
+ihn
+ihm
+
+es
+etwas
+
+euer
+eure
+eurem
+euren
+eurer
+eures
+
+für
+gegen
+gewesen
+hab
+habe
+haben
+hat
+hatte
+hatten
+hier
+hin
+hinter
+
+ich
+mich
+mir
+
+
+ihr
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch
+
+im
+in
+indem
+ins
+ist
+
+jede
+jedem
+jeden
+jeder
+jedes
+
+jene
+jenem
+jenen
+jener
+jenes
+
+jetzt
+kann
+
+kein
+keine
+keinem
+keinen
+keiner
+keines
+
+können
+könnte
+machen
+man
+
+manche
+manchem
+manchen
+mancher
+manches
+
+mein
+meine
+meinem
+meinen
+meiner
+meines
+
+mit
+muss
+musste
+nach
+nicht
+nichts
+noch
+nun
+nur
+ob
+oder
+ohne
+sehr
+
+sein
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst
+sich
+
+sie
+ihnen
+
+sind
+so
+
+solche
+solchem
+solchen
+solcher
+solches
+
+soll
+sollte
+sondern
+sonst
+über
+um
+und
+
+uns
+unse
+unsem
+unsen
+unser
+unses
+
+unter
+viel
+vom
+von
+vor
+während
+war
+waren
+warst
+was
+weg
+weil
+weiter
+
+welche
+welchem
+welchen
+welcher
+welches
+
+wenn
+werde
+werden
+wie
+wieder
+will
+wir
+wird
+wirst
+wo
+wollen
+wollte
+würde
+würden
+zu
+zum
+zur
+zwar
+zwischen
+
diff --git a/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/en.txt b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/en.txt
new file mode 100644
index 000000000..7ae6d01dd
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/en.txt
@@ -0,0 +1,320 @@
+# From https://snowballstem.org/algorithms/english/stop.txt
+# This file is distributed under the BSD License.
+# See https://snowballstem.org/license.html
+# Also see https://opensource.org/licenses/bsd-license.html
+#  - Encoding was converted to UTF-8.
+#  - This notice was added.
+#  - Comments were changed from `|` to `#` so that this list can be parsed by OpenNLP's stopword loader.
+#
+
+# An English stop word list. Comments begin with vertical bar. Each stop
+# word is at the start of a line.
+
+# Many of the forms below are quite rare (e.g. "yourselves") but included for
+#  completeness.
+
+# PRONOUNS FORMS
+# 1st person sing
+
+i
+
+me
+my
+# the possessive pronoun `mine' is best suppressed, because of the
+# sense of coal-mine etc.
+myself
+# 1st person plural
+we
+
+# us           | object
+# care is required here because US = United States. It is usually
+# safe to remove it if it is in lower case.
+our
+ours
+ourselves
+# second person (archaic `thou' forms not included)
+you
+your
+yours
+yourself
+yourselves
+# third person singular
+he
+him
+his
+himself
+
+she
+her
+hers
+herself
+
+it
+its
+itself
+# third person plural
+they
+them
+their
+theirs
+themselves
+# other forms (demonstratives, interrogatives)
+what
+which
+who
+whom
+this
+that
+these
+those
+
+# VERB FORMS (using F.R. Palmer's nomenclature)
+# BE
+am
+is
+are
+was
+were
+be
+been
+being
+# HAVE
+have
+has
+had
+having
+# DO
+do
+does
+did
+doing
+
+# The forms below are, I believe, best omitted, because of the significant
+# homonym forms:
+
+#  He made a WILL
+#  old tin CAN
+#  merry month of MAY
+#  a smell of MUST
+#  fight the good fight with all thy MIGHT
+
+# would, could, should, ought might however be included
+
+#          | AUXILIARIES
+#            | WILL
+#will
+
+would
+
+#            | SHALL
+#shall
+
+should
+
+#            | CAN
+#can
+
+could
+
+#            | MAY
+#may
+#might
+#            | MUST
+#must
+#            | OUGHT
+
+ought
+
+# COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing
+# pronoun + verb
+
+i'm
+you're
+he's
+she's
+it's
+we're
+they're
+i've
+you've
+we've
+they've
+i'd
+you'd
+he'd
+she'd
+we'd
+they'd
+i'll
+you'll
+he'll
+she'll
+we'll
+they'll
+
+# verb + negation
+
+isn't
+aren't
+wasn't
+weren't
+hasn't
+haven't
+hadn't
+doesn't
+don't
+didn't
+
+# auxiliary + negation
+
+won't
+wouldn't
+shan't
+shouldn't
+can't
+cannot
+couldn't
+mustn't
+
+# miscellaneous forms
+
+let's
+that's
+who's
+what's
+here's
+there's
+when's
+where's
+why's
+how's
+
+# rarer forms
+
+# daren't needn't
+
+# doubtful forms
+
+# oughtn't mightn't
+
+# ARTICLES
+a
+an
+the
+
+# THE REST (Overlap among prepositions, conjunctions, adverbs etc is so
+# high, that classification is pointless.)
+and
+but
+if
+or
+because
+as
+until
+while
+
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+
+again
+further
+then
+once
+
+here
+there
+when
+where
+why
+how
+
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+
+# Just for the record, the following words are among the commonest in English
+
+# one
+# every
+# least
+# less
+# many
+# now
+# ever
+# never
+# say
+# says
+# said
+# also
+# get
+# go
+# goes
+# just
+# made
+# make
+# put
+# see
+# seen
+# whether
+# like
+# well
+# back
+# even
+# still
+# way
+# take
+# since
+# another
+# however
+# two
+# three
+# four
+# five
+# first
+# second
+# new
+# old
+# high
+# long
+
diff --git a/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/es.txt b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/es.txt
new file mode 100644
index 000000000..f1955d62e
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/es.txt
@@ -0,0 +1,356 @@
+# From https://snowballstem.org/algorithms/spanish/stop.txt
+# This file is distributed under the BSD License.
+# See https://snowballstem.org/license.html
+# Also see https://opensource.org/licenses/bsd-license.html
+#  - Encoding was converted to UTF-8.
+#  - This notice was added.
+#  - Comments were changed from `|` to `#` so that this list can be parsed by OpenNLP's stopword loader.
+#
+
+# A Spanish stop word list. Comments begin with vertical bar. Each stop
+# word is at the start of a line.
+
+
+# The following is a ranked list (commonest to rarest) of stopwords
+# deriving from a large sample of text.
+
+# Extra words have been added at the end.
+
+de
+la
+que
+el
+en
+y
+a
+los
+del
+se
+las
+por
+un
+para
+con
+no
+una
+su
+al
+# es         from SER
+lo
+como
+más
+pero
+sus
+le
+ya
+o
+# fue        from SER
+este
+# ha         from HABER
+sí
+porque
+esta
+# son        from SER
+entre
+# está     from ESTAR
+cuando
+muy
+sin
+sobre
+# ser        from SER
+# tiene      from TENER
+también
+me
+hasta
+hay
+donde
+# han        from HABER
+quien
+# están      from ESTAR
+# estado     from ESTAR
+desde
+todo
+nos
+durante
+# estados    from ESTAR
+todos
+uno
+les
+ni
+contra
+otros
+# fueron     from SER
+ese
+eso
+# había      from HABER
+ante
+ellos
+e
+esto
+mí
+antes
+algunos
+qué
+unos
+yo
+otro
+otras
+otra
+él
+tanto
+esa
+estos
+mucho
+quienes
+nada
+muchos
+cual
+# sea        from SER
+poco
+ella
+estar
+# haber      from HABER
+estas
+# estaba     from ESTAR
+# estamos    from ESTAR
+algunas
+algo
+nosotros
+
+# other forms
+
+mi
+mis
+tú
+te
+ti
+tu
+tus
+ellas
+nosotras
+vosotros
+vosotras
+os
+mío
+mía
+míos
+mías
+tuyo
+tuya
+tuyos
+tuyas
+suyo
+suya
+suyos
+suyas
+nuestro
+nuestra
+nuestros
+nuestras
+vuestro
+vuestra
+vuestros
+vuestras
+esos
+esas
+
+# forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estaría
+estarías
+estaríamos
+estaríais
+estarían
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+# forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habría
+habrías
+habríamos
+habríais
+habrían
+había
+habías
+habíamos
+habíais
+habían
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+# forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+sería
+serías
+seríamos
+seríais
+serían
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+#  sed also means 'thirst'
+
+# forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendría
+tendrías
+tendríamos
+tendríais
+tendrían
+tenía
+tenías
+teníamos
+teníais
+tenían
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
diff --git a/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/fi.txt b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/fi.txt
new file mode 100644
index 000000000..667c57a3c
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/fi.txt
@@ -0,0 +1,265 @@
+# From https://snowballstem.org/algorithms/finnish/stop.txt
+# This file is distributed under the BSD License.
+# See https://snowballstem.org/license.html
+# Also see https://opensource.org/licenses/bsd-license.html
+#  - Encoding was converted to UTF-8.
+#  - This notice was added.
+#  - Comments were changed from `|` to `#` so that this list can be parsed by OpenNLP's stopword loader.
+#  - The pronoun/determiner paradigm rows (originally whitespace-separated columns) were expanded to one token per line so that each form is registered as an individual stopword by OpenNLP's loader.
+#
+
+# forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en
+et
+ei
+emme
+ette
+eivät
+
+# Personal pronoun paradigms
+# Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat (Ess Trans where present)
+minä
+minun
+minut
+minua
+minussa
+minusta
+minuun
+minulla
+minulta
+minulle
+sinä
+sinun
+sinut
+sinua
+sinussa
+sinusta
+sinuun
+sinulla
+sinulta
+sinulle
+hän
+hänen
+hänet
+häntä
+hänessä
+hänestä
+häneen
+hänellä
+häneltä
+hänelle
+me
+meidän
+meidät
+meitä
+meissä
+meistä
+meihin
+meillä
+meiltä
+meille
+te
+teidän
+teidät
+teitä
+teissä
+teistä
+teihin
+teillä
+teiltä
+teille
+he
+heidän
+heidät
+heitä
+heissä
+heistä
+heihin
+heillä
+heiltä
+heille
+
+# Demonstrative pronoun paradigms
+tämä
+tämän
+tätä
+tässä
+tästä
+tähän
+tällä
+tältä
+tälle
+tänä
+täksi
+tuo
+tuon
+tuota
+tuossa
+tuosta
+tuohon
+tuolla
+tuolta
+tuolle
+tuona
+tuoksi
+se
+sen
+sitä
+siinä
+siitä
+siihen
+sillä
+siltä
+sille
+sinä
+siksi
+nämä
+näiden
+näitä
+näissä
+näistä
+näihin
+näillä
+näiltä
+näille
+näinä
+näiksi
+nuo
+noiden
+noita
+noissa
+noista
+noihin
+noilla
+noilta
+noille
+noina
+noiksi
+ne
+niiden
+niitä
+niissä
+niistä
+niihin
+niillä
+niiltä
+niille
+niinä
+niiksi
+
+# Interrogative pronoun paradigms
+kuka
+kenen
+kenet
+ketä
+kenessä
+kenestä
+keneen
+kenellä
+keneltä
+kenelle
+kenenä
+keneksi
+ketkä
+keiden
+keitä
+keissä
+keistä
+keihin
+keillä
+keiltä
+keille
+keinä
+keiksi
+mikä
+minkä
+mitä
+missä
+mistä
+mihin
+millä
+miltä
+mille
+minä
+miksi
+mitkä
+
+# Relative pronoun paradigms
+joka
+jonka
+jota
+jossa
+josta
+johon
+jolla
+jolta
+jolle
+jona
+joksi
+jotka
+joiden
+joita
+joissa
+joista
+joihin
+joilla
+joilta
+joille
+joina
+joiksi
+
+# conjunctions
+
+että
+ja
+jos
+koska
+kuin
+mutta
+niin
+sekä
+sillä
+tai
+vaan
+vai
+vaikka
+
+
+# prepositions
+
+kanssa
+mukaan
+noin
+poikki
+yli
+
+# other
+
+kun
+nyt
+itse
diff --git a/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/fr.txt b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/fr.txt
new file mode 100644
index 000000000..e721a2a64
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/fr.txt
@@ -0,0 +1,186 @@
+# From https://snowballstem.org/algorithms/french/stop.txt
+# This file is distributed under the BSD License.
+# See https://snowballstem.org/license.html
+# Also see https://opensource.org/licenses/bsd-license.html
+#  - Encoding was converted to UTF-8.
+#  - This notice was added.
+#  - Comments were changed from `|` to `#` so that this list can be parsed by OpenNLP's stopword loader.
+#
+
+# A French stop word list. Comments begin with vertical bar. Each stop
+# word is at the start of a line.
+
+au
+aux
+avec
+ce
+ces
+dans
+de
+des
+du
+elle
+en
+et
+eux
+il
+je
+la
+le
+leur
+lui
+ma
+mais
+me
+même
+mes
+moi
+mon
+ne
+nos
+notre
+nous
+on
+ou
+par
+pas
+pour
+qu
+que
+qui
+sa
+se
+ses
+# son            |  his, her (masc). Omitted because it is homonym of "sound"
+sur
+ta
+te
+tes
+toi
+ton
+tu
+un
+une
+vos
+votre
+vous
+
+#  single letter forms
+
+c
+d
+j
+l
+à
+m
+n
+s
+t
+y
+
+# forms of être (not including the infinitive):
+# été - Omitted because it is homonym of "summer"
+étée
+étées
+# étés - Omitted because it is homonym of "summers"
+étant
+suis
+es
+# est - Omitted because it is homonym of "east"
+# sommes - Omitted because it is homonym of "sums"
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+# fût - Omitted because it is homonym of "tap", like in "beer on tap"
+fussions
+fussiez
+fussent
+
+# forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+# as - Omitted because it is homonym of "ace"
+avons
+avez
+ont
+aurai
+# auras - Omitted because it is also the name of a kind of wind
+# aura - Omitted because it is also the name of a kind of wind and homonym of "aura"
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+# avions - Omitted because it is homonym of "planes"
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+# Later additions (from Jean-Christophe Deschamps)
+ceci
+cela
+celà
+cet
+cette
+ici
+ils
+les
+leurs
+quel
+quels
+quelle
+quelles
+sans
+soi
+
diff --git a/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/it.txt b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/it.txt
new file mode 100644
index 000000000..dbaf5e860
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/it.txt
@@ -0,0 +1,303 @@
+# From https://snowballstem.org/algorithms/italian/stop.txt
+# This file is distributed under the BSD License.
+# See https://snowballstem.org/license.html
+# Also see https://opensource.org/licenses/bsd-license.html
+#  - Encoding was converted to UTF-8.
+#  - This notice was added.
+#  - Comments were changed from `|` to `#` so that this list can be parsed by OpenNLP's stopword loader.
+#
+
+# An Italian stop word list. Comments begin with vertical bar. Each stop
+# word is at the start of a line.
+
+ad
+al
+allo
+ai
+agli
+all
+agl
+alla
+alle
+con
+col
+coi
+da
+dal
+dallo
+dai
+dagli
+dall
+dagl
+dalla
+dalle
+di
+del
+dello
+dei
+degli
+dell
+degl
+della
+delle
+in
+nel
+nello
+nei
+negli
+nell
+negl
+nella
+nelle
+su
+sul
+sullo
+sui
+sugli
+sull
+sugl
+sulla
+sulle
+per
+tra
+contro
+io
+tu
+lui
+lei
+noi
+voi
+loro
+mio
+mia
+miei
+mie
+tuo
+tua
+tuoi
+tue
+suo
+sua
+suoi
+sue
+nostro
+nostra
+nostri
+nostre
+vostro
+vostra
+vostri
+vostre
+mi
+ti
+ci
+vi
+lo
+la
+li
+le
+gli
+ne
+il
+un
+uno
+una
+ma
+ed
+se
+perché
+anche
+come
+dov
+dove
+che
+chi
+cui
+non
+più
+quale
+quanto
+quanti
+quanta
+quante
+quello
+quelli
+quella
+quelle
+questo
+questi
+questa
+queste
+si
+tutto
+tutti
+
+#  single letter forms:
+
+a
+c
+e
+i
+l
+o
+
+# forms of avere, to have (not including the infinitive):
+
+ho
+hai
+ha
+abbiamo
+avete
+hanno
+abbia
+abbiate
+abbiano
+avrò
+avrai
+avrà
+avremo
+avrete
+avranno
+avrei
+avresti
+avrebbe
+avremmo
+avreste
+avrebbero
+avevo
+avevi
+aveva
+avevamo
+avevate
+avevano
+ebbi
+avesti
+ebbe
+avemmo
+aveste
+ebbero
+avessi
+avesse
+avessimo
+avessero
+avendo
+avuto
+avuta
+avuti
+avute
+
+# forms of essere, to be (not including the infinitive):
+sono
+sei
+è
+siamo
+siete
+sia
+siate
+siano
+sarò
+sarai
+sarà
+saremo
+sarete
+saranno
+sarei
+saresti
+sarebbe
+saremmo
+sareste
+sarebbero
+ero
+eri
+era
+eravamo
+eravate
+erano
+fui
+fosti
+fu
+fummo
+foste
+furono
+fossi
+fosse
+fossimo
+fossero
+essendo
+
+# forms of fare, to do (not including the infinitive, fa, fat-):
+faccio
+fai
+facciamo
+fanno
+faccia
+facciate
+facciano
+farò
+farai
+farà
+faremo
+farete
+faranno
+farei
+faresti
+farebbe
+faremmo
+fareste
+farebbero
+facevo
+facevi
+faceva
+facevamo
+facevate
+facevano
+feci
+facesti
+fece
+facemmo
+faceste
+fecero
+facessi
+facesse
+facessimo
+facessero
+facendo
+
+# forms of stare, to be (not including the infinitive):
+sto
+stai
+sta
+stiamo
+stanno
+stia
+stiate
+stiano
+starò
+starai
+starà
+staremo
+starete
+staranno
+starei
+staresti
+starebbe
+staremmo
+stareste
+starebbero
+stavo
+stavi
+stava
+stavamo
+stavate
+stavano
+stetti
+stesti
+stette
+stemmo
+steste
+stettero
+stessi
+stesse
+stessimo
+stessero
+stando
diff --git a/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/nl.txt b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/nl.txt
new file mode 100644
index 000000000..805fe2a8f
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/nl.txt
@@ -0,0 +1,121 @@
+# From https://snowballstem.org/algorithms/dutch/stop.txt
+# This file is distributed under the BSD License.
+# See https://snowballstem.org/license.html
+# Also see https://opensource.org/licenses/bsd-license.html
+#  - Encoding was converted to UTF-8.
+#  - This notice was added.
+#  - Comments were changed from `|` to `#` so that this list can be parsed by OpenNLP's stopword loader.
+#
+
+
+# A Dutch stop word list. Comments begin with vertical bar. Each stop
+# word is at the start of a line.
+
+# This is a ranked list (commonest to rarest) of stopwords derived from
+# a large sample of Dutch text.
+
+# Dutch stop words frequently exhibit homonym clashes. These are indicated
+# clearly below.
+
+de
+en
+van
+ik
+te
+dat
+die
+in
+een
+hij
+het
+niet
+zijn
+is
+was
+op
+aan
+met
+als
+voor
+had
+er
+maar
+om
+hem
+dan
+zou
+of
+wat
+mijn
+men
+dit
+zo
+door
+over
+ze
+zich
+bij
+ook
+tot
+je
+mij
+uit
+der
+daar
+haar
+naar
+heb
+hoe
+heeft
+hebben
+deze
+u
+want
+nog
+zal
+me
+zij
+nu
+ge
+geen
+omdat
+iets
+worden
+toch
+al
+waren
+veel
+meer
+doen
+toen
+moet
+ben
+zonder
+kan
+hun
+dus
+alles
+onder
+ja
+eens
+hier
+wie
+werd
+altijd
+doch
+wordt
+wezen
+kunnen
+ons
+zelf
+tegen
+na
+reeds
+wil
+kon
+niets
+uw
+iemand
+geweest
+andere
+
diff --git a/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/pt.txt b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/pt.txt
new file mode 100644
index 000000000..e54eb08a3
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/pt.txt
@@ -0,0 +1,253 @@
+# From https://snowballstem.org/algorithms/portuguese/stop.txt
+# This file is distributed under the BSD License.
+# See https://snowballstem.org/license.html
+# Also see https://opensource.org/licenses/bsd-license.html
+#  - Encoding was converted to UTF-8.
+#  - This notice was added.
+#  - Comments were changed from `|` to `#` so that this list can be parsed by OpenNLP's stopword loader.
+#
+
+# A Portuguese stop word list. Comments begin with vertical bar. Each stop
+# word is at the start of a line.
+
+
+# The following is a ranked list (commonest to rarest) of stopwords
+# deriving from a large sample of text.
+
+# Extra words have been added at the end.
+
+de
+a
+o
+que
+e
+do
+da
+em
+um
+para
+# é          from SER
+com
+não
+uma
+os
+no
+se
+na
+por
+mais
+as
+dos
+como
+mas
+# foi        from SER
+ao
+ele
+das
+# tem        from TER
+à
+seu
+sua
+ou
+# ser        from SER
+quando
+muito
+# há         from HAV
+nos
+já
+# está       from EST
+eu
+também
+só
+pelo
+pela
+até
+isso
+ela
+entre
+# era        from SER
+depois
+sem
+mesmo
+aos
+# ter        from TER
+seus
+quem
+nas
+me
+esse
+eles
+# estão      from EST
+você
+# tinha      from TER
+# foram      from SER
+essa
+num
+nem
+suas
+meu
+às
+minha
+# têm        from TER
+numa
+pelos
+elas
+# havia      from HAV
+# seja       from SER
+qual
+# será       from SER
+nós
+# tenho      from TER
+lhe
+deles
+essas
+esses
+pelas
+este
+# fosse      from SER
+dele
+
+# other words. There are many contractions such as naquele = em+aquele,
+# mo = me+o, but they are rare.
+# Indefinite article plural forms are also rare.
+
+tu
+te
+vocês
+vos
+lhes
+meus
+minhas
+teu
+tua
+teus
+tuas
+nosso
+nossa
+nossos
+nossas
+
+dela
+delas
+
+esta
+estes
+estas
+aquele
+aquela
+aqueles
+aquelas
+isto
+aquilo
+
+# forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+# forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houveríamos
+houveriam
+
+# forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+seríamos
+seriam
+
+# forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tínhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+teríamos
+teriam
diff --git a/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/ru.txt b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/ru.txt
new file mode 100644
index 000000000..311f57b0e
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/resources/opennlp/tools/stopword/ru.txt
@@ -0,0 +1,244 @@
+# From https://snowballstem.org/algorithms/russian/stop.txt
+# This file is distributed under the BSD License.
+# See https://snowballstem.org/license.html
+# Also see https://opensource.org/licenses/bsd-license.html
+#  - Encoding was converted to UTF-8.
+#  - This notice was added.
+#  - Comments were changed from `|` to `#` so that this list can be parsed by OpenNLP's stopword loader.
+#
+
+
+# a russian stop word list. comments begin with vertical bar. each stop
+# word is at the start of a line.
+
+# this is a ranked list (commonest to rarest) of stopwords derived from
+# a large text sample.
+
+# letter `ё' is translated to `е'.
+
+и
+в
+во
+не
+что
+он
+на
+я
+с
+со
+как
+а
+то
+все
+она
+так
+его
+но
+да
+ты
+к
+у
+же
+вы
+за
+бы
+по
+только
+ее
+мне
+было
+вот
+от
+меня
+еще
+нет
+о
+из
+ему
+теперь
+когда
+даже
+ну
+вдруг
+ли
+если
+уже
+или
+ни
+быть
+был
+него
+до
+вас
+нибудь
+опять
+уж
+вам
+сказал
+ведь
+там
+потом
+себя
+ничего
+ей
+может
+они
+тут
+где
+есть
+надо
+ней
+для
+мы
+тебя
+их
+чем
+была
+сам
+чтоб
+без
+будто
+человек
+чего
+раз
+тоже
+себе
+под
+жизнь
+будет
+ж
+тогда
+кто
+этот
+говорил
+того
+потому
+этого
+какой
+совсем
+ним
+здесь
+этом
+один
+почти
+мой
+тем
+чтобы
+нее
+кажется
+сейчас
+были
+куда
+зачем
+сказать
+всех
+никогда
+сегодня
+можно
+при
+наконец
+два
+об
+другой
+хоть
+после
+над
+больше
+тот
+через
+эти
+нас
+про
+всего
+них
+какая
+много
+разве
+сказала
+три
+эту
+моя
+впрочем
+хорошо
+свою
+этой
+перед
+иногда
+лучше
+чуть
+том
+нельзя
+такой
+им
+более
+всегда
+конечно
+всю
+между
+
+
+# b: some paradigms
+#
+# personal pronouns
+#
+# я  меня  мне  мной  [мною]
+# ты  тебя  тебе  тобой  [тобою]
+# он  его  ему  им  [него, нему, ним]
+# она  ее  эи  ею  [нее, нэи, нею]
+# оно  его  ему  им  [него, нему, ним]
+#
+# мы  нас  нам  нами
+# вы  вас  вам  вами
+# они  их  им  ими  [них, ним, ними]
+#
+#   себя  себе  собой   [собою]
+#
+# demonstrative pronouns: этот (this), тот (that)
+#
+# этот  эта  это  эти
+# этого  эты  это  эти
+# этого  этой  этого  этих
+# этому  этой  этому  этим
+# этим  этой  этим  [этою]  этими
+# этом  этой  этом  этих
+#
+# тот  та  то  те
+# того  ту  то  те
+# того  той  того  тех
+# тому  той  тому  тем
+# тем  той  тем  [тою]  теми
+# том  той  том  тех
+#
+# determinative pronouns
+#
+# (a) весь (all)
+#
+# весь  вся  все  все
+# всего  всю  все  все
+# всего  всей  всего  всех
+# всему  всей  всему  всем
+# всем  всей  всем  [всею]  всеми
+# всем  всей  всем  всех
+#
+# (b) сам (himself etc)
+#
+# сам  сама  само  сами
+# самого саму  само  самих
+# самого самой самого  самих
+# самому самой самому  самим
+# самим  самой  самим  [самою]  самими
+# самом самой самом  самих
+#
+# stems of verbs `to be', `to have', `to do' and modal
+#
+# быть  бы  буд  быв  есть  суть
+# име
+# дел
+# мог   мож  мочь
+# уме
+# хоч  хот
+# долж
+# можн
+# нужн
+# нельзя
+
diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/stopword/DictionaryStopwordFilterTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/stopword/DictionaryStopwordFilterTest.java
new file mode 100644
index 000000000..8f4683603
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/stopword/DictionaryStopwordFilterTest.java
@@ -0,0 +1,467 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.stopword;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UncheckedIOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Set;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.util.StringList;
+
+public class DictionaryStopwordFilterTest {
+
+  private static DictionaryStopwordFilter empty() {
+    return DictionaryStopwordFilter.builder().build();
+  }
+
+  private static DictionaryStopwordFilter withEntries(final String[]... entries) {
+    final DictionaryStopwordFilter.Builder b = DictionaryStopwordFilter.builder();
+    for (final String[] e : entries) {
+      b.add(e);
+    }
+    return b.build();
+  }
+
+  @Test
+  void testEmptyBuilderProducesCaseInsensitiveEmptyFilter() {
+    final DictionaryStopwordFilter filter = empty();
+    Assertions.assertFalse(filter.isCaseSensitive());
+    Assertions.assertTrue(filter.stopwords().isEmpty());
+    Assertions.assertFalse(filter.isStopword("the"));
+  }
+
+  @Test
+  void testCaseInsensitiveMatching() {
+    final DictionaryStopwordFilter filter = DictionaryStopwordFilter.builder()
+        .add("the")
+        .build();
+    Assertions.assertTrue(filter.isStopword("the"));
+    Assertions.assertTrue(filter.isStopword("THE"));
+    Assertions.assertTrue(filter.isStopword("The"));
+  }
+
+  @Test
+  void testCaseSensitiveMatching() {
+    final DictionaryStopwordFilter filter = DictionaryStopwordFilter.builder()
+        .caseSensitive(true)
+        .add("the")
+        .build();
+    Assertions.assertTrue(filter.isCaseSensitive());
+    Assertions.assertTrue(filter.isStopword("the"));
+    Assertions.assertFalse(filter.isStopword("The"));
+    Assertions.assertFalse(filter.isStopword("THE"));
+  }
+
+  @Test
+  void testFilterPreservesOrderAndDropsOneGramStopwords() {
+    final DictionaryStopwordFilter filter = DictionaryStopwordFilter.builder()
+        .add("the")
+        .add("a")
+        .build();
+
+    final String[] input = { "the", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog" };
+    final String[] expected = { "quick", "brown", "fox", "jumps", "over", "lazy", "dog" };
+    final String[] actual = filter.filter(input);
+
+    Assertions.assertArrayEquals(expected, actual);
+  }
+
+  @Test
+  void testBuilderRemoveUndoesAdd() {
+    final DictionaryStopwordFilter filter = DictionaryStopwordFilter.builder()
+        .add("foo")
+        .remove("foo")
+        .build();
+    Assertions.assertFalse(filter.isStopword("foo"));
+  }
+
+  @Test
+  void testBuilderAddAllAndRemoveAll() {
+    final DictionaryStopwordFilter added = DictionaryStopwordFilter.builder()
+        .addAll(Arrays.asList(new String[] {"alpha"}, new String[] {"beta"}))
+        .build();
+    Assertions.assertTrue(added.isStopword("alpha"));
+    Assertions.assertTrue(added.isStopword("beta"));
+
+    final DictionaryStopwordFilter undone = DictionaryStopwordFilter.builder()
+        .addAll(Arrays.asList(new String[] {"alpha"}, new String[] {"beta"}))
+        .removeAll(Arrays.asList(new String[] {"alpha"}, new String[] {"beta"}))
+        .build();
+    Assertions.assertFalse(undone.isStopword("alpha"));
+    Assertions.assertFalse(undone.isStopword("beta"));
+  }
+
+  @Test
+  void testMultiWordIsStopwordAndIndividualTokensNotMembers() {
+    final DictionaryStopwordFilter filter = DictionaryStopwordFilter.builder()
+        .add("of", "the")
+        .build();
+
+    Assertions.assertTrue(filter.isStopword("of", "the"));
+    Assertions.assertFalse(filter.isStopword("of"));
+    Assertions.assertFalse(filter.isStopword("the"));
+  }
+
+  @Test
+  void testFilterDropsNGramMatches() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"of", "the"});
+    final String[] result = filter.filter(new String[] {"of", "the", "cat"});
+    Assertions.assertArrayEquals(new String[] {"cat"}, result);
+  }
+
+  @Test
+  void testFilterPrefersLongestMatchGreedy() {
+    final DictionaryStopwordFilter filter = withEntries(
+        new String[] {"of"}, new String[] {"of", "the"});
+    final String[] result = filter.filter(new String[] {"of", "the", "cat"});
+    Assertions.assertArrayEquals(new String[] {"cat"}, result);
+  }
+
+  @Test
+  void testFilterMixedOneAndTwoGramEntries() {
+    final DictionaryStopwordFilter filter = withEntries(
+        new String[] {"the"}, new String[] {"in", "spite"});
+
+    final String[] result = filter.filter(
+        new String[] {"the", "cat", "sat", "in", "spite", "of", "rain"});
+    Assertions.assertArrayEquals(
+        new String[] {"cat", "sat", "of", "rain"}, result);
+  }
+
+  @Test
+  void testFilterNGramAtStartOfInput() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"as", "well", "as"});
+    final String[] result = filter.filter(new String[] {"as", "well", "as", "cats"});
+    Assertions.assertArrayEquals(new String[] {"cats"}, result);
+  }
+
+  @Test
+  void testFilterNGramAtEndOfInput() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"of", "the"});
+    final String[] result = filter.filter(new String[] {"king", "of", "the"});
+    Assertions.assertArrayEquals(new String[] {"king"}, result);
+  }
+
+  @Test
+  void testFilterNGramInMiddleOfInput() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"of", "the"});
+    final String[] result = filter.filter(new String[] {"king", "of", "the", "hill"});
+    Assertions.assertArrayEquals(new String[] {"king", "hill"}, result);
+  }
+
+  @Test
+  void testFilterPartialTailDoesNotMatch() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"of", "the"});
+    final String[] result = filter.filter(new String[] {"king", "of"});
+    Assertions.assertArrayEquals(new String[] {"king", "of"}, result);
+  }
+
+  @Test
+  void testFilterWindowLongerThanInput() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"a", "b", "c", "d"});
+    final String[] result = filter.filter(new String[] {"a", "b"});
+    Assertions.assertArrayEquals(new String[] {"a", "b"}, result);
+  }
+
+  @Test
+  void testFilterTwoConsecutiveNGramMatches() {
+    final DictionaryStopwordFilter filter = withEntries(
+        new String[] {"of", "the"}, new String[] {"in", "spite"});
+    final String[] result = filter.filter(
+        new String[] {"of", "the", "in", "spite", "rain"});
+    Assertions.assertArrayEquals(new String[] {"rain"}, result);
+  }
+
+  @Test
+  void testFilterThreeGramEntry() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"in", "spite", "of"});
+    final String[] result = filter.filter(
+        new String[] {"won", "in", "spite", "of", "rain"});
+    Assertions.assertArrayEquals(new String[] {"won", "rain"}, result);
+  }
+
+  @Test
+  void testFilterLongestMatchWhenShorterOverlapAlsoMatches() {
+    final DictionaryStopwordFilter filter = withEntries(
+        new String[] {"a", "b"}, new String[] {"a", "b", "c"});
+    final String[] result = filter.filter(new String[] {"a", "b", "c", "d"});
+    Assertions.assertArrayEquals(new String[] {"d"}, result);
+  }
+
+  @Test
+  void testFilterFallsBackToShorterMatchWhenLongestDoesNotApply() {
+    final DictionaryStopwordFilter filter = withEntries(
+        new String[] {"a", "b"}, new String[] {"a", "b", "c"});
+    final String[] result = filter.filter(new String[] {"a", "b", "x", "d"});
+    Assertions.assertArrayEquals(new String[] {"x", "d"}, result);
+  }
+
+  @Test
+  void testFilterNullElementInterruptsWindow() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"of", "the"});
+    final String[] result = filter.filter(new String[] {"of", null, "the", "cat"});
+    Assertions.assertArrayEquals(new String[] {"of", null, "the", "cat"}, result);
+  }
+
+  @Test
+  void testFilterLeadingNullPassesThrough() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"the"});
+    final String[] result = filter.filter(new String[] {null, "the", "cat"});
+    Assertions.assertArrayEquals(new String[] {null, "cat"}, result);
+  }
+
+  @Test
+  void testFilterNGramCaseInsensitiveByDefault() {
+    final DictionaryStopwordFilter filter = DictionaryStopwordFilter.builder()
+        .caseSensitive(false)
+        .add("of", "the")
+        .build();
+    final String[] result = filter.filter(new String[] {"Of", "THE", "cat"});
+    Assertions.assertArrayEquals(new String[] {"cat"}, result);
+  }
+
+  @Test
+  void testFilterNGramCaseSensitiveHonorsCasing() {
+    final DictionaryStopwordFilter filter = DictionaryStopwordFilter.builder()
+        .caseSensitive(true)
+        .add("of", "the")
+        .build();
+    final String[] caseDiff = filter.filter(new String[] {"Of", "THE", "cat"});
+    Assertions.assertArrayEquals(new String[] {"Of", "THE", "cat"}, caseDiff);
+
+    final String[] exact = filter.filter(new String[] {"of", "the", "cat"});
+    Assertions.assertArrayEquals(new String[] {"cat"}, exact);
+  }
+
+  @Test
+  void testFilterDoesNotEatRegisteredOneGramAfterAddingTwoGram() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"of", "the"});
+    final String[] result = filter.filter(new String[] {"king", "of", "rain"});
+    Assertions.assertArrayEquals(new String[] {"king", "of", "rain"}, result);
+  }
+
+  @Test
+  void testFilterEmptyDictionaryKeepsAllTokens() {
+    final DictionaryStopwordFilter filter = empty();
+    final String[] result = filter.filter(new String[] {"the", "quick", "brown", "fox"});
+    Assertions.assertArrayEquals(new String[] {"the", "quick", "brown", "fox"}, result);
+  }
+
+  @Test
+  void testFilterAdjacentSameNGramMatchesBoth() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"of", "the"});
+    final String[] result = filter.filter(new String[] {"of", "the", "of", "the", "end"});
+    Assertions.assertArrayEquals(new String[] {"end"}, result);
+  }
+
+  @Test
+  void testFilterNGramMatchAfterUnmatchedToken() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"of", "the"});
+    final String[] result = filter.filter(new String[] {"x", "of", "the", "y"});
+    Assertions.assertArrayEquals(new String[] {"x", "y"}, result);
+  }
+
+  @Test
+  void testFilterReturnsNewArrayInstance() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"the"});
+    final String[] input = new String[] {"the", "cat"};
+    final String[] output = filter.filter(input);
+    Assertions.assertNotSame(input, output);
+    input[1] = "dog";
+    Assertions.assertArrayEquals(new String[] {"cat"}, output);
+  }
+
+  @Test
+  void testFilterInputNullThrowsIllegalArgument() {
+    final DictionaryStopwordFilter filter = empty();
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> filter.filter(null));
+  }
+
+  @Test
+  void testInputStreamConstructorParsesBlanksCommentsAndMultiWordLines() throws Exception {
+    final String contents = "# this is a comment header\n"
+        + "\n"
+        + "the\n"
+        + "  and  \n"
+        + "# another comment\n"
+        + "of the\n"
+        + "\n"
+        + "by\n";
+
+    final DictionaryStopwordFilter filter;
+    try (ByteArrayInputStream in =
+             new ByteArrayInputStream(contents.getBytes(StandardCharsets.UTF_8))) {
+      filter = new DictionaryStopwordFilter(in, StandardCharsets.UTF_8, false);
+    }
+
+    Assertions.assertTrue(filter.isStopword("the"));
+    Assertions.assertTrue(filter.isStopword("and"));
+    Assertions.assertTrue(filter.isStopword("by"));
+    Assertions.assertTrue(filter.isStopword("of", "the"));
+
+    Assertions.assertFalse(filter.isStopword("#"));
+    Assertions.assertFalse(filter.isStopword(""));
+    Assertions.assertFalse(filter.isStopword("dog"));
+  }
+
+  @Test
+  void testBuilderLoadParsesStream() throws Exception {
+    final String contents = "# bundled-style file\nthe\nof the\n";
+    final DictionaryStopwordFilter filter;
+    try (ByteArrayInputStream in =
+             new ByteArrayInputStream(contents.getBytes(StandardCharsets.UTF_8))) {
+      filter = DictionaryStopwordFilter.builder()
+          .load(in, StandardCharsets.UTF_8)
+          .add("extra")
+          .build();
+    }
+    Assertions.assertTrue(filter.isStopword("the"));
+    Assertions.assertTrue(filter.isStopword("of", "the"));
+    Assertions.assertTrue(filter.isStopword("extra"));
+  }
+
+  @Test
+  void testStopwordsViewIsUnmodifiable() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"the"});
+    final Set<String> view = filter.stopwords();
+    Assertions.assertTrue(view.contains("the"));
+    Assertions.assertThrows(UnsupportedOperationException.class, () -> view.add("foo"));
+  }
+
+  @Test
+  void testFilterOnEmptyInput() {
+    final DictionaryStopwordFilter filter = withEntries(new String[] {"the"});
+    final String[] result = filter.filter(new String[0]);
+    Assertions.assertEquals(0, result.length);
+  }
+
+  @Test
+  void testDictionaryConstructorDefensiveCopy() {
+    final Dictionary source = new Dictionary(false);
+    source.put(new StringList("the"));
+    final DictionaryStopwordFilter filter = new DictionaryStopwordFilter(source);
+
+    // Mutating the source after construction must not affect the filter.
+    source.put(new StringList("foo"));
+    Assertions.assertTrue(filter.isStopword("the"));
+    Assertions.assertFalse(filter.isStopword("foo"));
+  }
+
+  @Test
+  void testNullArgsToInputStreamConstructorThrowIae() {
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> new DictionaryStopwordFilter(null, StandardCharsets.UTF_8, false));
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> new DictionaryStopwordFilter(new ByteArrayInputStream(new byte[0]), null, false));
+  }
+
+  @Test
+  void testNullSourceToDictionaryConstructorThrowsIae() {
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> new DictionaryStopwordFilter((Dictionary) null));
+  }
+
+  @Test
+  void testBuilderAddRejectsNullOrEmpty() {
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> DictionaryStopwordFilter.builder().add((String[]) null));
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> DictionaryStopwordFilter.builder().add(new String[0]));
+  }
+
+  @Test
+  void testBuilderRemoveRejectsNullOrEmpty() {
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> DictionaryStopwordFilter.builder().remove((String[]) null));
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> DictionaryStopwordFilter.builder().remove(new String[0]));
+  }
+
+  @Test
+  void testLoadUncheckedSuccessfullyParsesStream() {
+    final String contents = "# header\nthe\nof the\nand\n";
+    try (ByteArrayInputStream in =
+             new ByteArrayInputStream(contents.getBytes(StandardCharsets.UTF_8))) {
+      final DictionaryStopwordFilter filter =
+          DictionaryStopwordFilter.loadUnchecked(in, StandardCharsets.UTF_8, false);
+      Assertions.assertTrue(filter.isStopword("the"));
+      Assertions.assertTrue(filter.isStopword("and"));
+      Assertions.assertTrue(filter.isStopword("of", "the"));
+      Assertions.assertFalse(filter.isStopword("dog"));
+    } catch (IOException e) {
+      throw new AssertionError("close() should not have thrown", e);
+    }
+  }
+
+  @Test
+  void testLoadUncheckedNullArgsThrowIae() {
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> DictionaryStopwordFilter.loadUnchecked(
+            null, StandardCharsets.UTF_8, false));
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> DictionaryStopwordFilter.loadUnchecked(
+            new ByteArrayInputStream(new byte[0]), null, false));
+  }
+
+  @Test
+  void testLoadUncheckedWrapsIoExceptionAsUnchecked() {
+    final InputStream broken = new InputStream() {
+      @Override
+      public int read() throws IOException {
+        throw new IOException("simulated read failure");
+      }
+    };
+    final UncheckedIOException ex = Assertions.assertThrows(UncheckedIOException.class,
+        () -> DictionaryStopwordFilter.loadUnchecked(broken, StandardCharsets.UTF_8, false));
+    Assertions.assertNotNull(ex.getCause());
+    Assertions.assertEquals("simulated read failure", ex.getCause().getMessage());
+  }
+
+  @Test
+  void testLoadUncheckedRespectsCaseSensitivity() {
+    final String contents = "The\n";
+    try (ByteArrayInputStream in =
+             new ByteArrayInputStream(contents.getBytes(StandardCharsets.UTF_8))) {
+      final DictionaryStopwordFilter ci =
+          DictionaryStopwordFilter.loadUnchecked(in, StandardCharsets.UTF_8, false);
+      Assertions.assertTrue(ci.isStopword("the"));
+      Assertions.assertTrue(ci.isStopword("The"));
+    } catch (IOException e) {
+      throw new AssertionError(e);
+    }
+
+    try (ByteArrayInputStream in =
+             new ByteArrayInputStream(contents.getBytes(StandardCharsets.UTF_8))) {
+      final DictionaryStopwordFilter cs =
+          DictionaryStopwordFilter.loadUnchecked(in, StandardCharsets.UTF_8, true);
+      Assertions.assertTrue(cs.isStopword("The"));
+      Assertions.assertFalse(cs.isStopword("the"));
+    } catch (IOException e) {
+      throw new AssertionError(e);
+    }
+  }
+}
diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/stopword/StopwordFilterStreamTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/stopword/StopwordFilterStreamTest.java
new file mode 100644
index 000000000..23f3a8311
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/stopword/StopwordFilterStreamTest.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.stopword;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import opennlp.tools.util.CollectionObjectStream;
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * Unit tests for {@link StopwordFilterStream}.
+ */
+public class StopwordFilterStreamTest {
+
+  private static final Set<String> STOP = Set.of("the", "quick");
+
+  private static StopwordFilter newFilter() {
+    return new StopwordFilter() {
+      @Override
+      public boolean isStopword(CharSequence token) {
+        return token != null && STOP.contains(token.toString().toLowerCase());
+      }
+
+      @Override
+      public boolean isStopword(String... tokens) {
+        if (tokens == null || tokens.length == 0) {
+          return false;
+        }
+        if (tokens.length == 1) {
+          return isStopword((CharSequence) tokens[0]);
+        }
+        return false;
+      }
+
+      @Override
+      public String[] filter(String[] tokens) {
+        if (tokens == null) {
+          return new String[0];
+        }
+        List<String> kept = new ArrayList<>(tokens.length);
+        for (String t : tokens) {
+          if (!isStopword((CharSequence) t)) {
+            kept.add(t);
+          }
+        }
+        return kept.toArray(new String[0]);
+      }
+
+      @Override
+      public boolean isCaseSensitive() {
+        return false;
+      }
+
+      @Override
+      public Set<String> stopwords() {
+        return Collections.unmodifiableSet(STOP);
+      }
+    };
+  }
+
+  private static ObjectStream<String[]> samplesStream() {
+    return new CollectionObjectStream<>(Arrays.asList(
+        new String[] {"The", "quick", "brown", "fox"},
+        new String[] {"The", "lazy", "dog"}
+    ));
+  }
+
+  @Test
+  void readReturnsFilteredArraysInOrderAndThenNull() throws IOException {
+    try (StopwordFilterStream stream = new StopwordFilterStream(samplesStream(), newFilter())) {
+      Assertions.assertArrayEquals(new String[] {"brown", "fox"}, stream.read());
+      Assertions.assertArrayEquals(new String[] {"lazy", "dog"}, stream.read());
+      Assertions.assertNull(stream.read());
+    }
+  }
+
+  @Test
+  void resetRewindsToFirstFilteredArray() throws IOException {
+    try (StopwordFilterStream stream = new StopwordFilterStream(samplesStream(), newFilter())) {
+      Assertions.assertArrayEquals(new String[] {"brown", "fox"}, stream.read());
+      Assertions.assertArrayEquals(new String[] {"lazy", "dog"}, stream.read());
+      Assertions.assertNull(stream.read());
+
+      stream.reset();
+
+      Assertions.assertArrayEquals(new String[] {"brown", "fox"}, stream.read());
+    }
+  }
+
+  @Test
+  void nullSamplesThrowsIae() {
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> new StopwordFilterStream(null, newFilter()));
+  }
+
+  @Test
+  void nullFilterThrowsIae() {
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> new StopwordFilterStream(samplesStream(), null));
+  }
+}
diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/stopword/StopwordFilteringTokenizerTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/stopword/StopwordFilteringTokenizerTest.java
new file mode 100644
index 000000000..7ecf4d246
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/stopword/StopwordFilteringTokenizerTest.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.stopword;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import opennlp.tools.tokenize.SimpleTokenizer;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.util.Span;
+
+/**
+ * Unit tests for {@link StopwordFilteringTokenizer}.
+ */
+public class StopwordFilteringTokenizerTest {
+
+  private static final Set<String> STOP = Set.of("the", "quick");
+
+  /**
+   * Creates a minimal {@link StopwordFilter} instance independent of any
+   * concrete implementation so the test compiles even before the runtime
+   * implementation is in place.
+   */
+  private static StopwordFilter newFilter() {
+    return new StopwordFilter() {
+      @Override
+      public boolean isStopword(CharSequence token) {
+        return token != null && STOP.contains(token.toString().toLowerCase());
+      }
+
+      @Override
+      public boolean isStopword(String... tokens) {
+        if (tokens == null || tokens.length == 0) {
+          return false;
+        }
+        if (tokens.length == 1) {
+          return isStopword((CharSequence) tokens[0]);
+        }
+        return false;
+      }
+
+      @Override
+      public String[] filter(String[] tokens) {
+        if (tokens == null) {
+          return new String[0];
+        }
+        List<String> kept = new ArrayList<>(tokens.length);
+        for (String t : tokens) {
+          if (!isStopword((CharSequence) t)) {
+            kept.add(t);
+          }
+        }
+        return kept.toArray(new String[0]);
+      }
+
+      @Override
+      public boolean isCaseSensitive() {
+        return false;
+      }
+
+      @Override
+      public Set<String> stopwords() {
+        return Collections.unmodifiableSet(STOP);
+      }
+    };
+  }
+
+  @Test
+  void tokenizeRemovesStopwords() {
+    Tokenizer t = new StopwordFilteringTokenizer(SimpleTokenizer.INSTANCE, newFilter());
+
+    String[] tokens = t.tokenize("The quick brown fox");
+
+    Assertions.assertArrayEquals(new String[] {"brown", "fox"}, tokens);
+  }
+
+  @Test
+  void tokenizePosKeepsOffsetsForNonStopwordSpans() {
+    String input = "The quick brown fox";
+    Tokenizer t = new StopwordFilteringTokenizer(SimpleTokenizer.INSTANCE, newFilter());
+
+    Span[] spans = t.tokenizePos(input);
+
+    Assertions.assertEquals(2, spans.length);
+    Assertions.assertEquals("brown", spans[0].getCoveredText(input).toString());
+    Assertions.assertEquals("fox", spans[1].getCoveredText(input).toString());
+    Assertions.assertEquals(10, spans[0].getStart());
+    Assertions.assertEquals(15, spans[0].getEnd());
+    Assertions.assertEquals(16, spans[1].getStart());
+    Assertions.assertEquals(19, spans[1].getEnd());
+  }
+
+  @Test
+  void tokenizePosReturnsEmptyArrayWhenAllAreStopwords() {
+    Tokenizer t = new StopwordFilteringTokenizer(SimpleTokenizer.INSTANCE, newFilter());
+
+    Span[] spans = t.tokenizePos("the quick");
+
+    Assertions.assertEquals(0, spans.length);
+  }
+
+  @Test
+  void tokenizePosDropsMultiWordEntry() {
+    final StopwordFilter filter = DictionaryStopwordFilter.builder()
+        .add("new", "york")
+        .build();
+    final Tokenizer t = new StopwordFilteringTokenizer(SimpleTokenizer.INSTANCE, filter);
+
+    final String input = "I love New York city";
+    final Span[] spans = t.tokenizePos(input);
+
+    final String[] kept = new String[spans.length];
+    for (int i = 0; i < spans.length; i++) {
+      kept[i] = spans[i].getCoveredText(input).toString();
+    }
+    // "New" and "York" form a registered multi-word entry and are dropped
+    // together, mirroring tokenize() and StopwordFilterStream.
+    Assertions.assertArrayEquals(new String[] {"I", "love", "city"}, kept);
+    Assertions.assertArrayEquals(kept, t.tokenize(input));
+    // Surviving span offsets still refer to positions in the original string.
+    final Span citySpan = spans[spans.length - 1];
+    Assertions.assertEquals("city", input.substring(citySpan.getStart(), citySpan.getEnd()));
+  }
+
+  @Test
+  void tokenizePosRemovesFinnishParadigmFormsFromBundledList() {
+    // End-to-end check tying the fi.txt expansion to the Span-based path:
+    // the expanded pronoun forms (minä, sinä) and the conjunction (ja) must
+    // be dropped by tokenizePos exactly as by tokenize.
+    final StopwordFilter fi = StopwordLists.forLanguage("fi");
+    final Tokenizer t = new StopwordFilteringTokenizer(SimpleTokenizer.INSTANCE, fi);
+
+    final String input = "minä ja sinä koira";
+
+    Assertions.assertArrayEquals(new String[] {"koira"}, t.tokenize(input));
+
+    final Span[] spans = t.tokenizePos(input);
+    Assertions.assertEquals(1, spans.length);
+    Assertions.assertEquals("koira", spans[0].getCoveredText(input).toString());
+    // The surviving span offset still refers to the original input string.
+    Assertions.assertEquals(13, spans[0].getStart());
+    Assertions.assertEquals(18, spans[0].getEnd());
+  }
+
+  @Test
+  void nullDelegateThrowsIae() {
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> new StopwordFilteringTokenizer(null, newFilter()));
+  }
+
+  @Test
+  void nullFilterThrowsIae() {
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> new StopwordFilteringTokenizer(SimpleTokenizer.INSTANCE, null));
+  }
+}
diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/stopword/StopwordListsTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/stopword/StopwordListsTest.java
new file mode 100644
index 000000000..067e6288f
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/stopword/StopwordListsTest.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.stopword;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.stream.Stream;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+public class StopwordListsTest {
+
+  /**
+   * Returns one well-known stopword for each bundled language. The chosen
+   * tokens are common function words (typically the conjunction "and" or a
+   * very common preposition/article) verified to be present in the bundled
+   * Lucene snowball lists shipped with OpenNLP.
+   */
+  static Stream<Arguments> bundledLanguages() {
+    return Stream.of(
+        Arguments.of("bg", "и"),     // Bulgarian: and
+        Arguments.of("da", "og"),    // Danish: and
+        Arguments.of("de", "und"),   // German: and
+        Arguments.of("en", "the"),   // English: the
+        Arguments.of("es", "y"),     // Spanish: and
+        Arguments.of("fi", "ja"),    // Finnish: and
+        Arguments.of("fr", "et"),    // French: and
+        Arguments.of("it", "e"),     // Italian: and
+        Arguments.of("nl", "de"),    // Dutch: the
+        Arguments.of("pt", "e"),     // Portuguese: and
+        Arguments.of("ru", "и")      // Russian: and
+    );
+  }
+
+  @ParameterizedTest(name = "forLanguage(\"{0}\") contains stopword \"{1}\"")
+  @MethodSource("bundledLanguages")
+  void testBundledLanguageContainsKnownStopword(final String code, final String stopword) {
+    final StopwordFilter filter = StopwordLists.forLanguage(code);
+    Assertions.assertNotNull(filter, "filter for '" + code + "' must not be null");
+    Assertions.assertFalse(filter.isCaseSensitive(),
+        "bundled filter for '" + code + "' should be case-insensitive");
+    Assertions.assertTrue(filter.isStopword(stopword),
+        "expected '" + stopword + "' to be a stopword in '" + code + "'");
+  }
+
+  @ParameterizedTest(name = "forLanguage(\"{0}\") loads a non-empty list")
+  @ValueSource(strings = {"bg", "da", "de", "en", "es", "fi", "fr", "it", "nl", "pt", "ru"})
+  void testBundledLanguageLoadsNonEmptyList(final String code) {
+    final StopwordFilter filter = StopwordLists.forLanguage(code);
+    Assertions.assertFalse(filter.stopwords().isEmpty(),
+        "bundled stopword list for '" + code + "' must not be empty");
+  }
+
+  @ParameterizedTest(name = "forLanguage(\"{0}\") is case-insensitive: uppercase matches lowercase")
+  @MethodSource("bundledLanguages")
+  void testBundledLanguageIsCaseInsensitive(final String code, final String stopword) {
+    final StopwordFilter filter = StopwordLists.forLanguage(code);
+    final String upper = stopword.toUpperCase(java.util.Locale.ROOT);
+    Assertions.assertTrue(filter.isStopword(upper),
+        "expected uppercase '" + upper + "' to match in '" + code + "' (case-insensitive)");
+  }
+
+  @Test
+  void testEnglishContainsCommonStopwords() {
+    final StopwordFilter en = StopwordLists.forLanguage("en");
+    Assertions.assertTrue(en.isStopword("the"));
+    Assertions.assertTrue(en.isStopword("and"));
+    Assertions.assertTrue(en.isStopword("of"));
+  }
+
+  @Test
+  void testEnglishRejectsContentWord() {
+    final StopwordFilter en = StopwordLists.forLanguage("en");
+    Assertions.assertFalse(en.isStopword("dog"));
+  }
+
+  @Test
+  void testUnsupportedLanguageThrows() {
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> StopwordLists.forLanguage("xx"));
+  }
+
+  @Test
+  void testInvalidLanguageCodeThrows() {
+    Assertions.assertThrows(IllegalArgumentException.class,
+        () -> StopwordLists.forLanguage("not-a-language"));
+  }
+
+  @Test
+  void testThreeLetterCodeMapsToTwoLetterEquivalent() {
+    final StopwordFilter en2 = StopwordLists.forLanguage("en");
+    final StopwordFilter en3 = StopwordLists.forLanguage("eng");
+
+    // Both lists should contain the same set of common stopwords.
+    Assertions.assertEquals(en2.stopwords(), en3.stopwords());
+
+    // Spot-check.
+    Assertions.assertTrue(en3.isStopword("the"));
+    Assertions.assertTrue(en3.isStopword("and"));
+  }
+
+  /**
+   * The Finnish pronoun and determiner paradigms ship in the Snowball list as
+   * whitespace-separated columns. OpenNLP's loader treats whitespace on a line
+   * as one multi-word entry, so those rows were expanded to one token per line.
+   * This verifies the individual forms are now registered as 1-gram stopwords
+   * (not only {@code ja}, which already had its own line).
+   */
+  @ParameterizedTest(name = "fi stopword \"{0}\"")
+  @ValueSource(strings = {
+      "minä", "sinä", "hän", "me", "te", "he",  // personal pronouns
+      "minun", "sinun", "hänen",                // genitive forms
+      "tämä", "tuo", "se", "nämä", "nuo", "ne", // demonstratives
+      "kuka", "mikä", "ketkä", "mitkä",         // interrogatives
+      "joka", "jotka"                           // relatives
+  })
+  void testFinnishParadigmFormsAreIndividualStopwords(final String form) {
+    final StopwordFilter fi = StopwordLists.forLanguage("fi");
+    Assertions.assertTrue(fi.isStopword(form),
+        "expected Finnish form '" + form + "' to be recognized as a stopword");
+  }
+
+  @Test
+  void testForLanguageCachesInstancePerNormalizedCode() {
+    final StopwordFilter a = StopwordLists.forLanguage("en");
+    final StopwordFilter b = StopwordLists.forLanguage("en");
+    Assertions.assertSame(a, b,
+        "repeated forLanguage calls should return the cached instance");
+
+    // A three-letter code normalizes to the same key and shares the cached filter.
+    final StopwordFilter c = StopwordLists.forLanguage("eng");
+    Assertions.assertSame(a, c,
+        "three-letter code should resolve to the cached two-letter filter");
+  }
+
+  @Test
+  void testSupportedLanguagesReturnsElevenCodes() {
+    final Set<String> supported = StopwordLists.supportedLanguages();
+    Assertions.assertEquals(11, supported.size());
+
+    final Set<String> expected = new HashSet<>();
+    expected.add("bg");
+    expected.add("da");
+    expected.add("de");
+    expected.add("en");
+    expected.add("es");
+    expected.add("fi");
+    expected.add("fr");
+    expected.add("it");
+    expected.add("nl");
+    expected.add("pt");
+    expected.add("ru");
+    Assertions.assertEquals(expected, new HashSet<>(supported));
+  }
+}
diff --git a/opennlp-docs/src/docbkx/opennlp.xml b/opennlp-docs/src/docbkx/opennlp.xml
index c40692f10..a857545bb 100644
--- a/opennlp-docs/src/docbkx/opennlp.xml
+++ b/opennlp-docs/src/docbkx/opennlp.xml
@@ -101,6 +101,7 @@ under the License.
 	<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="./langdetect.xml" />
 	<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="./sentdetect.xml"/>
 	<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="./tokenizer.xml" />
+	<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="./stopword.xml" />
 	<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="./namefinder.xml" />
 	<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="./doccat.xml" />
 	<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="./sentiment.xml" />
diff --git a/opennlp-docs/src/docbkx/stopword.xml b/opennlp-docs/src/docbkx/stopword.xml
new file mode 100644
index 000000000..4ece713c7
--- /dev/null
+++ b/opennlp-docs/src/docbkx/stopword.xml
@@ -0,0 +1,334 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V5.0//EN"
+"https://cdn.docbook.org/schema/5.0/dtd/docbook.dtd"[
+]>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+	license agreements. See the NOTICE file distributed with this work for additional
+	information regarding copyright ownership. The ASF licenses this file to
+	you under the Apache License, Version 2.0 (the "License"); you may not use
+	this file except in compliance with the License. You may obtain a copy of
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+	by applicable law or agreed to in writing, software distributed under the
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+	OF ANY KIND, either express or implied. See the License for the specific
+	language governing permissions and limitations under the License. -->
+
+<chapter xml:id="tools.stopword">
+
+	<title>Stopword Filtering</title>
+
+	<section xml:id="tools.stopword.overview">
+		<title>Overview</title>
+		<para>
+			Stopwords are very common words (such as articles, prepositions, pronouns
+			or conjunctions) that carry little semantic content of their own and are
+			frequently removed prior to downstream processing such as indexing,
+			classification, topic modeling, keyword extraction or similarity
+			computation. Filtering stopwords can reduce noise, lower memory
+			footprint and improve the signal-to-noise ratio of subsequent NLP
+			components.
+		</para>
+		<para>
+			OpenNLP provides a small, dependency-free stopword filtering API in the
+			<code>opennlp.tools.stopword</code> package. The central abstraction is
+			the <code>StopwordFilter</code> interface, with a
+			<code>Dictionary</code>-backed default implementation
+			<code>DictionaryStopwordFilter</code>. Bundled stopword lists are
+			available for eleven languages and can be loaded by ISO 639-1 code via
+			the <code>StopwordLists</code> factory. Users may also load custom lists
+			from any <code>InputStream</code>, mix them with the bundled defaults
+			and add or remove individual entries at runtime.
+		</para>
+		<para>
+			OpenNLP further offers a tokenizer decorator
+			(<code>StopwordFilteringTokenizer</code>) and an
+			<code>ObjectStream</code> adapter (<code>StopwordFilterStream</code>) to
+			plug stopword filtering into existing tokenization or training data
+			pipelines, plus a command-line tool for ad-hoc filtering.
+		</para>
+	</section>
+
+	<section xml:id="tools.stopword.bundled">
+		<title>Loading a bundled list</title>
+		<para>
+			A bundled stopword list can be obtained from the
+			<code>StopwordLists</code> factory by passing the desired ISO 639-1
+			language code. The returned filter is a
+			<code>DictionaryStopwordFilter</code> that is case-insensitive by
+			default.
+			<programlisting language="java">
+<![CDATA[StopwordFilter filter = StopwordLists.forLanguage("en");
+
+boolean isStop = filter.isStopword("the");        // true
+String[] kept  = filter.filter(new String[] {
+    "the", "quick", "brown", "fox"
+});
+// kept => { "quick", "brown", "fox" }]]>
+			</programlisting>
+			The set of supported languages can be queried at runtime:
+			<programlisting language="java">
+<![CDATA[Set<String> supported = StopwordLists.supportedLanguages();]]>
+			</programlisting>
+		</para>
+	</section>
+
+	<section xml:id="tools.stopword.custom">
+		<title>Loading a custom list</title>
+		<para>
+			Custom stopword lists can be loaded from any <code>InputStream</code>
+			via <code>StopwordLists.load</code>. The method takes the input stream,
+			a character set and a flag indicating whether matching should be
+			case-sensitive.
+			<programlisting language="java">
+<![CDATA[try (InputStream in = new FileInputStream("my-stopwords.txt")) {
+  StopwordFilter filter = StopwordLists.load(
+      in, StandardCharsets.UTF_8, false /* case-insensitive */);
+}]]>
+			</programlisting>
+			The expected file format is a plain UTF-8 text file with the following
+			conventions:
+			<itemizedlist>
+				<listitem>
+					<para>One stopword entry per line.</para>
+				</listitem>
+				<listitem>
+					<para>Whitespace on a line separates the tokens of a multi-word
+						entry, i.e. a line containing more than one token defines an
+						n-gram stopword.</para>
+				</listitem>
+				<listitem>
+					<para>Blank lines are ignored.</para>
+				</listitem>
+				<listitem>
+					<para>Lines starting with <code>#</code> are treated as comments
+						and ignored.</para>
+				</listitem>
+			</itemizedlist>
+			A small example file:
+			<programlisting>
+<![CDATA[# my custom English stopwords
+the
+a
+an
+
+# multi-word entries are defined by whitespace
+of the
+in spite of]]>
+			</programlisting>
+		</para>
+	</section>
+
+	<section xml:id="tools.stopword.extending">
+		<title>Extending or overriding defaults</title>
+		<para>
+			A <code>DictionaryStopwordFilter</code> is immutable once constructed.
+			To tailor a bundled list to a specific domain &#8212; for example to add
+			project-specific noise terms or to retain a particular word that would
+			otherwise be filtered &#8212; use the nested
+			<code>DictionaryStopwordFilter.Builder</code>. The builder loads the
+			bundled resource, layers user-supplied <code>add</code> /
+			<code>remove</code> operations on top, and produces a fresh immutable
+			filter from <code>build()</code>.
+			<programlisting language="java">
+<![CDATA[InputStream bundled =
+    StopwordLists.class.getResourceAsStream("/opennlp/tools/stopword/en.txt");
+
+StopwordFilter filter = DictionaryStopwordFilter.builder()
+    .load(bundled, StandardCharsets.UTF_8)
+    .add("foo")            // mark a new word as a stopword
+    .remove("the")         // keep "the" in the output
+    .build();]]>
+			</programlisting>
+			Bulk operations are also available via
+			<code>addAll(Collection&lt;String[]&gt;)</code> and
+			<code>removeAll(Collection&lt;String[]&gt;)</code>, which accept
+			multi-token entries. <code>build()</code> applies all queued additions
+			first, then all queued removals.
+		</para>
+	</section>
+
+	<section xml:id="tools.stopword.multiword">
+		<title>Multi-word stopwords</title>
+		<para>
+			Beyond single tokens, a <code>StopwordFilter</code> can match n-grams.
+			The overloaded <code>isStopword(String...)</code> method accepts a
+			sequence of tokens and returns <code>true</code> if the entire sequence
+			is registered as a multi-word stopword entry.
+			<programlisting language="java">
+<![CDATA[StopwordFilter filter = DictionaryStopwordFilter.builder()
+    .add("of", "the")
+    .add("in", "spite", "of")
+    .build();
+
+filter.isStopword("of", "the");        // true
+filter.isStopword("in", "spite", "of"); // true
+filter.isStopword("of");                // false]]>
+			</programlisting>
+			Multi-word entries are matched as exact, contiguous sequences and are
+			subject to the same case-sensitivity setting as the rest of the
+			dictionary.
+		</para>
+		<para>
+			The <code>filter(String[])</code> method honors both 1-gram and n-gram
+			entries. It performs a greedy, left-to-right window scan: at each
+			position the longest registered window is tried first; if it matches,
+			the entire window is dropped and scanning resumes after it. Otherwise
+			the head token is kept and scanning advances by one. Tokens that are
+			<code>null</code> are kept in place and never participate in a window
+			match.
+			<programlisting language="java">
+<![CDATA[StopwordFilter filter = DictionaryStopwordFilter.builder()
+    .add("the")               // 1-gram
+    .add("of", "the")         // 2-gram
+    .add("in", "spite", "of") // 3-gram
+    .build();
+
+String[] kept = filter.filter(new String[] {
+    "the", "king", "of", "the", "hill",
+    "won", "in", "spite", "of", "rain"
+});
+// kept => { "king", "hill", "won", "rain" }]]>
+			</programlisting>
+			When both a shorter and a longer registered entry could match at the
+			same position, the longer match wins. If the longest window does not
+			match, shorter windows down to length one are tried in turn.
+		</para>
+	</section>
+
+	<section xml:id="tools.stopword.tokenizer">
+		<title>Decorating a Tokenizer</title>
+		<para>
+			Any existing <code>Tokenizer</code> implementation can be wrapped in a
+			<code>StopwordFilteringTokenizer</code> to transparently remove
+			stopwords from its output. The decorator delegates tokenization to the
+			wrapped instance and then runs the resulting token array through the
+			provided <code>StopwordFilter</code>.
+			<programlisting language="java">
+<![CDATA[StopwordFilter filter = StopwordLists.forLanguage("en");
+Tokenizer tokenizer = new StopwordFilteringTokenizer(
+    SimpleTokenizer.INSTANCE, filter);
+
+String[] tokens = tokenizer.tokenize("The quick brown fox jumps over the lazy dog");
+// tokens => { "quick", "brown", "fox", "jumps", "lazy", "dog" }]]>
+			</programlisting>
+			The decorated tokenizer can be used anywhere a <code>Tokenizer</code>
+			is expected, including in downstream OpenNLP pipelines.
+		</para>
+	</section>
+
+	<section xml:id="tools.stopword.stream">
+		<title>Streaming usage</title>
+		<para>
+			For training-data and bulk-processing scenarios that already operate on
+			<code>ObjectStream&lt;String[]&gt;</code> (for example tokenized
+			sentences), the <code>StopwordFilterStream</code> adapter provides a
+			drop-in filter that strips stopwords from each emitted token array.
+			<programlisting language="java">
+<![CDATA[StopwordFilter filter = StopwordLists.forLanguage("en");
+
+try (ObjectStream<String[]> tokenStream = ...;
+     ObjectStream<String[]> filtered = new StopwordFilterStream(tokenStream, filter)) {
+
+  String[] tokens;
+  while ((tokens = filtered.read()) != null) {
+    // tokens no longer contains any stopwords
+  }
+}]]>
+			</programlisting>
+			Empty arrays produced by removing every token of a short sentence are
+			preserved so that the stream's record boundaries remain aligned with
+			the source.
+		</para>
+	</section>
+
+	<section xml:id="tools.stopword.cmdline">
+		<title>Command-line usage</title>
+		<para>
+			Stopword filtering is also exposed as a CLI tool. The
+			<code>StopwordFilter</code> command reads whitespace-separated tokens
+			from standard input and writes the non-stopword tokens to standard
+			output. The single argument is either the ISO 639-1 code of a bundled
+			list, or a path to a custom stopword list file (same format as the
+			Java API: one entry per line, with <code>#</code> comments and blank
+			lines ignored, loaded case-insensitively).
+			<screen>
+<![CDATA[$ echo "the quick brown fox" | opennlp StopwordFilter en
+quick brown fox]]>
+			</screen>
+			To use a custom list, pass its file path instead of a language code:
+			<screen>
+<![CDATA[$ echo "the quick brown fox" | opennlp StopwordFilter my-stopwords.txt]]>
+			</screen>
+			A bundled language code takes precedence over a file of the same name;
+			to force loading a file whose name happens to be a language code,
+			qualify it with a path, e.g. <code>./en</code>.
+			The tool is intended for quick interactive checks and for use inside
+			shell pipelines, for example chained behind a tokenizer:
+			<screen>
+<![CDATA[$ opennlp SimpleTokenizer < article.txt | opennlp StopwordFilter en > filtered.txt]]>
+			</screen>
+		</para>
+	</section>
+
+	<section xml:id="tools.stopword.languages">
+		<title>Bundled languages</title>
+		<para>
+			The following stopword lists are bundled with OpenNLP and can be
+			loaded by their ISO 639-1 code through
+			<code>StopwordLists.forLanguage</code>:
+			<informaltable frame="all">
+				<tgroup cols="2">
+					<colspec colname="code" colwidth="1*"/>
+					<colspec colname="lang" colwidth="3*"/>
+					<thead>
+						<row>
+							<entry>ISO 639-1 code</entry>
+							<entry>Language</entry>
+						</row>
+					</thead>
+					<tbody>
+						<row><entry>bg</entry><entry>Bulgarian</entry></row>
+						<row><entry>da</entry><entry>Danish</entry></row>
+						<row><entry>de</entry><entry>German</entry></row>
+						<row><entry>en</entry><entry>English</entry></row>
+						<row><entry>es</entry><entry>Spanish</entry></row>
+						<row><entry>fi</entry><entry>Finnish</entry></row>
+						<row><entry>fr</entry><entry>French</entry></row>
+						<row><entry>it</entry><entry>Italian</entry></row>
+						<row><entry>nl</entry><entry>Dutch</entry></row>
+						<row><entry>pt</entry><entry>Portuguese</entry></row>
+						<row><entry>ru</entry><entry>Russian</entry></row>
+					</tbody>
+				</tgroup>
+			</informaltable>
+		</para>
+		<para>
+			The bundled lists are derived from the snowball stopword files shipped
+			with Apache Lucene and retain their original BSD license; see the
+			<filename>NOTICE</filename> and <filename>LICENSE</filename> files for
+			the attribution and license text.
+		</para>
+	</section>
+
+	<section xml:id="tools.stopword.threadsafety">
+		<title>Thread-safety</title>
+		<para>
+			<code>DictionaryStopwordFilter</code> is immutable once constructed and
+			is therefore safe to share across threads without external
+			synchronization. A filter returned by
+			<code>StopwordLists.forLanguage(...)</code> or assembled via the
+			<code>Builder</code> can be stored in a static field and accessed
+			concurrently from any number of readers.
+		</para>
+		<para>
+			<code>StopwordFilteringTokenizer</code> and
+			<code>StopwordFilterStream</code> are also stateless decorators with
+			only <code>final</code> fields, so they inherit the thread-safety of the
+			components they wrap. When paired with a
+			<code>DictionaryStopwordFilter</code> and a thread-safe delegate
+			tokenizer (e.g. <code>SimpleTokenizer.INSTANCE</code> or
+			<code>WhitespaceTokenizer.INSTANCE</code>) the resulting pipeline is
+			fully thread-safe.
+		</para>
+	</section>
+</chapter>
diff --git a/rat-excludes b/rat-excludes
index 3ad3ae4df..888a3d387 100644
--- a/rat-excludes
+++ b/rat-excludes
@@ -58,3 +58,4 @@ src/test/resources/*.info
 
 <!-- other licence -->
 src/main/java/opennlp/tools/stemmer/snowball/*.java
+src/main/resources/opennlp/tools/stopword/*.txt