Skip to content

Commit

Permalink
Use Iterator.remove to avoid ConcurrentModificationException
Browse files Browse the repository at this point in the history
Add improved test coverage
  • Loading branch information
jhy committed Jan 23, 2023
1 parent da20d2b commit b5de45f
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 6 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Expand Up @@ -44,6 +44,9 @@ Release 1.15.4 [PENDING]
body.
<https://github.com/jhy/jsoup/issues/1778>

* Bugfix: fixed an issue in Safelist.removeAttributes which could throw a ConcurrentModificationException when using
the ":all" pseudo-attribute.

* Change: deprecated the unused Document#normalise() method. Normalization occurs during the HTML tree construction,
and no longer as a distinct phase.

Expand Down
16 changes: 10 additions & 6 deletions src/main/java/org/jsoup/safety/Safelist.java
Expand Up @@ -12,6 +12,7 @@ Thank you to Ryan Grove (wonko.com) for the Ruby HTML cleaner http://github.com/

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

Expand Down Expand Up @@ -63,6 +64,7 @@ XSS attack examples (that jsoup will safegaurd against the default Cleaner and S
</p>
*/
public class Safelist {
private static final String All = ":all";
private final Set<TagName> tagNames; // tags allowed, lower case. e.g. [p, br, span]
private final Map<TagName, Set<AttributeKey>> attributes; // tag -> attribute[]. allowed attributes [href] for a tag.
private final Map<TagName, Map<AttributeKey, AttributeValue>> enforcedAttributes; // always set these attribute values
Expand Down Expand Up @@ -342,14 +344,16 @@ public Safelist removeAttributes(String tag, String... attributes) {
if(currentSet.isEmpty()) // Remove tag from attribute map if no attributes are allowed for tag
this.attributes.remove(tagName);
}
if(tag.equals(":all")) // Attribute needs to be removed from all individually set tags
for(TagName name: this.attributes.keySet()) {
Set<AttributeKey> currentSet = this.attributes.get(name);
if(tag.equals(All)) { // Attribute needs to be removed from all individually set tags
Iterator<Map.Entry<TagName, Set<AttributeKey>>> it = this.attributes.entrySet().iterator();
while (it.hasNext()) {
Map.Entry<TagName, Set<AttributeKey>> entry = it.next();
Set<AttributeKey> currentSet = entry.getValue();
currentSet.removeAll(attributeSet);

if(currentSet.isEmpty()) // Remove tag from attribute map if no attributes are allowed for tag
this.attributes.remove(name);
it.remove();
}
}
return this;
}

Expand Down Expand Up @@ -555,7 +559,7 @@ protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) {
}
}
// no attributes defined for tag, try :all tag
return !tagName.equals(":all") && isSafeAttribute(":all", el, attr);
return !tagName.equals(All) && isSafeAttribute(All, el, attr);
}

private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) {
Expand Down
26 changes: 26 additions & 0 deletions src/test/java/org/jsoup/safety/CleanerTest.java
Expand Up @@ -67,6 +67,32 @@ public class CleanerTest {
assertEquals("<p>Nice</p><blockquote>Hello</blockquote>", TextUtil.stripNewlines(cleanHtml));
}

@Test void allAttributes() {
String h = "<div class=foo data=true><p class=bar>Text</p></div><blockquote cite='https://example.com'>Foo";
Safelist safelist = Safelist.relaxed();
safelist.addAttributes(":all", "class");
safelist.addAttributes("div", "data");

String clean1 = Jsoup.clean(h, safelist);
assertEquals("<div class=\"foo\" data=\"true\"><p class=\"bar\">Text</p></div><blockquote cite=\"https://example.com\">Foo</blockquote>", TextUtil.stripNewlines(clean1));

safelist.removeAttributes(":all", "class", "cite");

String clean2 = Jsoup.clean(h, safelist);
assertEquals("<div data=\"true\"><p>Text</p></div><blockquote>Foo</blockquote>", TextUtil.stripNewlines(clean2));
}

@Test void removeProtocols() {
String h = "<a href='any://example.com'>Link</a>";
Safelist safelist = Safelist.relaxed();
String clean1 = Jsoup.clean(h, safelist);
assertEquals("<a>Link</a>", clean1);

safelist.removeProtocols("a", "href", "ftp", "http", "https", "mailto");
String clean2 = Jsoup.clean(h, safelist); // all removed means any will work
assertEquals("<a href=\"any://example.com\">Link</a>", clean2);
}

@Test public void testRemoveEnforcedAttributes() {
String h = "<div><p><A HREF='HTTP://nice.com'>Nice</a></p><blockquote>Hello</blockquote>";
String cleanHtml = Jsoup.clean(h, Safelist.basic().removeEnforcedAttribute("a", "rel"));
Expand Down

0 comments on commit b5de45f

Please sign in to comment.