fix: Go back to excluding fields in element_text. Remove full LazyHTML.text (#273)

germsvel · web-flow · commit bb3380f56439 · 2025-11-11T09:02:44.000-05:00
What changed? ============= Commits 37bc521 (released in 0.8.3) and d19c6cf (released in 0.9.0) tried to improve what was `HTML.inner_text` to be more coherent in text parsing (i.e. what is `Html.element_text` vs `Html.text`). But in doing so, we introduced regressions from the behavior we had in 0.8.2. This commit goes back to behavior that more closely matches 0.8.2 by making `Html.element_text` (what `inner_text` is now called) to always include text inside nested tags (with the exception of `select` and `textarea` if they aren't the top-level tag in the query). And we no longer use `LazyHTML.text` at all -- since that doesnt' seem to exclude newlines the same way we were doing so in our internal parsing.
diff --git a/lib/phoenix_test/html.ex b/lib/phoenix_test/html.ex
@@ -13,10 +13,6 @@ defmodule PhoenixTest.Html do
     LazyHTML.from_fragment(html)
   end
 
-  def text(%LazyHTML{} = element) do
-    LazyHTML.text(element)
-  end
-
   def element_text(%LazyHTML{} = element) do
     element
     |> LazyHTML.to_tree(skip_whitespace_nodes: true)
@@ -25,8 +21,7 @@ defmodule PhoenixTest.Html do
     |> normalize_whitespace()
   end
 
-  # combination of tags listed in "Text Content" and "Inline Text Semantics" in https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements
-  @text_tags ~w[a abbr b bdo blockquote br cite code dfn dd div dl dt em i figcaption figure hr kbd li mark menu ol p pre q rp rt s samp small span strong sub sup time u ul var wbr]
+  @dont_include_children_tags ~w[select textarea]
   defp text_from_text_nodes(tree, acc \\ "")
 
   defp text_from_text_nodes([], acc), do: acc
@@ -37,7 +32,7 @@ defmodule PhoenixTest.Html do
         text when is_binary(text) ->
           acc <> text
 
-        {tag, _, children} when tag in @text_tags ->
+        {tag, _, children} when tag not in @dont_include_children_tags ->
           acc <> " " <> text_from_text_nodes(children)
 
         {_tag, _, children} ->
diff --git a/lib/phoenix_test/query.ex b/lib/phoenix_test/query.ex
@@ -79,16 +79,9 @@ defmodule PhoenixTest.Query do
       |> Html.parse_fragment()
       |> Html.all(selector)
 
-    text_filter_fun =
-      if selector == "label" do
-        &filter_by_element_text(&1, text, opts)
-      else
-        &filter_by_text(&1, text, opts)
-      end
-
     elements_matched_selector
     |> filter_by_position(opts)
-    |> text_filter_fun.()
+    |> filter_by_element_text(text, opts)
     |> case do
       [] -> {:not_found, elements_matched_selector}
       [found] -> {:found, found}
@@ -542,19 +535,6 @@ defmodule PhoenixTest.Query do
         &(Html.element_text(&1) =~ text)
       end
 
-    Enum.filter(elements, &(&1 |> LazyHTML.filter(":not(select)") |> filter_fun.()))
-  end
-
-  defp filter_by_text(elements, text, opts) do
-    exact_match = Keyword.get(opts, :exact, false)
-
-    filter_fun =
-      if exact_match do
-        &(Html.text(&1) == text)
-      else
-        &(Html.text(&1) =~ text)
-      end
-
     Enum.filter(elements, filter_fun)
   end
 
diff --git a/test/phoenix_test/html_test.exs b/test/phoenix_test/html_test.exs
@@ -4,10 +4,11 @@ defmodule PhoenixTest.HtmlTest do
   alias PhoenixTest.Html
 
   describe "element_text" do
-    test "extracts text from parsed html, removing extra whitespace" do
+    test "extracts text from parsed html, removing extra tags & whitespace" do
       html = """
       <label>
         hello
+        <br />
         <em>world!</em>
       </label>
       """
@@ -20,18 +21,18 @@ defmodule PhoenixTest.HtmlTest do
       assert result == "hello world!"
     end
 
-    test "extracts the text from the top level element, but includes known text elements)" do
+    test "extracts the text from the top level element along with nested text" do
       html = """
       <div>
         hello
         <a href="/">elixir</a>
         <span>and</span>
         <small>phoenix</small>
-        <em>world!</em>
-
-        <label>excluded text</label>
-        <form>also excluded</form>
-        <textarea>also excluded</textarea>
+        </br>
+        <em>
+          test
+          world!
+        </em>
       </div>
       """
 
@@ -40,7 +41,7 @@ defmodule PhoenixTest.HtmlTest do
         |> Html.parse_fragment()
         |> Html.element_text()
 
-      assert result == "hello elixir and phoenix world!"
+      assert result == "hello elixir and phoenix test world!"
     end
 
     test "extracts text but excludes select elements and their options" do