Skip to content

Commit bb3380f

Browse files
authored
fix: Go back to excluding fields in element_text. Remove full LazyHTML.text (#273)
What changed? ============= Commits 37bc521 (released in 0.8.3) and d19c6cf (released in 0.9.0) tried to improve what was `HTML.inner_text` to be more coherent in text parsing (i.e. what is `Html.element_text` vs `Html.text`). But in doing so, we introduced regressions from the behavior we had in 0.8.2. This commit goes back to behavior that more closely matches 0.8.2 by making `Html.element_text` (what `inner_text` is now called) to always include text inside nested tags (with the exception of `select` and `textarea` if they aren't the top-level tag in the query). And we no longer use `LazyHTML.text` at all -- since that doesnt' seem to exclude newlines the same way we were doing so in our internal parsing.
1 parent 0a4fa39 commit bb3380f

File tree

3 files changed

+12
-36
lines changed

3 files changed

+12
-36
lines changed

lib/phoenix_test/html.ex

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,6 @@ defmodule PhoenixTest.Html do
1313
LazyHTML.from_fragment(html)
1414
end
1515

16-
def text(%LazyHTML{} = element) do
17-
LazyHTML.text(element)
18-
end
19-
2016
def element_text(%LazyHTML{} = element) do
2117
element
2218
|> LazyHTML.to_tree(skip_whitespace_nodes: true)
@@ -25,8 +21,7 @@ defmodule PhoenixTest.Html do
2521
|> normalize_whitespace()
2622
end
2723

28-
# combination of tags listed in "Text Content" and "Inline Text Semantics" in https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements
29-
@text_tags ~w[a abbr b bdo blockquote br cite code dfn dd div dl dt em i figcaption figure hr kbd li mark menu ol p pre q rp rt s samp small span strong sub sup time u ul var wbr]
24+
@dont_include_children_tags ~w[select textarea]
3025
defp text_from_text_nodes(tree, acc \\ "")
3126

3227
defp text_from_text_nodes([], acc), do: acc
@@ -37,7 +32,7 @@ defmodule PhoenixTest.Html do
3732
text when is_binary(text) ->
3833
acc <> text
3934

40-
{tag, _, children} when tag in @text_tags ->
35+
{tag, _, children} when tag not in @dont_include_children_tags ->
4136
acc <> " " <> text_from_text_nodes(children)
4237

4338
{_tag, _, children} ->

lib/phoenix_test/query.ex

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,9 @@ defmodule PhoenixTest.Query do
7979
|> Html.parse_fragment()
8080
|> Html.all(selector)
8181

82-
text_filter_fun =
83-
if selector == "label" do
84-
&filter_by_element_text(&1, text, opts)
85-
else
86-
&filter_by_text(&1, text, opts)
87-
end
88-
8982
elements_matched_selector
9083
|> filter_by_position(opts)
91-
|> text_filter_fun.()
84+
|> filter_by_element_text(text, opts)
9285
|> case do
9386
[] -> {:not_found, elements_matched_selector}
9487
[found] -> {:found, found}
@@ -542,19 +535,6 @@ defmodule PhoenixTest.Query do
542535
&(Html.element_text(&1) =~ text)
543536
end
544537

545-
Enum.filter(elements, &(&1 |> LazyHTML.filter(":not(select)") |> filter_fun.()))
546-
end
547-
548-
defp filter_by_text(elements, text, opts) do
549-
exact_match = Keyword.get(opts, :exact, false)
550-
551-
filter_fun =
552-
if exact_match do
553-
&(Html.text(&1) == text)
554-
else
555-
&(Html.text(&1) =~ text)
556-
end
557-
558538
Enum.filter(elements, filter_fun)
559539
end
560540

test/phoenix_test/html_test.exs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ defmodule PhoenixTest.HtmlTest do
44
alias PhoenixTest.Html
55

66
describe "element_text" do
7-
test "extracts text from parsed html, removing extra whitespace" do
7+
test "extracts text from parsed html, removing extra tags & whitespace" do
88
html = """
99
<label>
1010
hello
11+
<br />
1112
<em>world!</em>
1213
</label>
1314
"""
@@ -20,18 +21,18 @@ defmodule PhoenixTest.HtmlTest do
2021
assert result == "hello world!"
2122
end
2223

23-
test "extracts the text from the top level element, but includes known text elements)" do
24+
test "extracts the text from the top level element along with nested text" do
2425
html = """
2526
<div>
2627
hello
2728
<a href="/">elixir</a>
2829
<span>and</span>
2930
<small>phoenix</small>
30-
<em>world!</em>
31-
32-
<label>excluded text</label>
33-
<form>also excluded</form>
34-
<textarea>also excluded</textarea>
31+
</br>
32+
<em>
33+
test
34+
world!
35+
</em>
3536
</div>
3637
"""
3738

@@ -40,7 +41,7 @@ defmodule PhoenixTest.HtmlTest do
4041
|> Html.parse_fragment()
4142
|> Html.element_text()
4243

43-
assert result == "hello elixir and phoenix world!"
44+
assert result == "hello elixir and phoenix test world!"
4445
end
4546

4647
test "extracts text but excludes select elements and their options" do

0 commit comments

Comments
 (0)