Skip to content

Commit

Permalink
ISSUE-1773 Tokeniser: recycle string builders too
Browse files Browse the repository at this point in the history
  • Loading branch information
chibenwa committed Jul 1, 2022
1 parent 32ebe91 commit d09a106
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 2 deletions.
23 changes: 23 additions & 0 deletions src/main/java/org/jsoup/helper/StringBuilderRecycler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package org.jsoup.helper;

import java.util.ArrayList;

public class StringBuilderRecycler {
protected final ArrayList<StringBuilder> stringBuilders = new ArrayList<>();

public StringBuilder get(int minSize) {
if (!stringBuilders.isEmpty()) {
StringBuilder stringBuilder = stringBuilders.remove(stringBuilders.size() - 1);
// Too small string builders are thrown away
if (stringBuilder.capacity() >= minSize) {
stringBuilder.setLength(0);
return stringBuilder;
}
}
return new StringBuilder(minSize);
}

public void releaseByteBuffer(StringBuilder stringBuilder) {
stringBuilders.add(stringBuilder);
}
}
27 changes: 25 additions & 2 deletions src/main/java/org/jsoup/parser/Tokeniser.java
Original file line number Diff line number Diff line change
@@ -1,16 +1,33 @@
package org.jsoup.parser;

import org.jsoup.helper.StringBuilderRecycler;
import org.jsoup.helper.Validate;
import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Entities;

import javax.annotation.Nullable;

import java.lang.ref.SoftReference;
import java.util.Arrays;

/**
* Readers the input stream into tokens.
*/
final class Tokeniser {
protected static final ThreadLocal<SoftReference<StringBuilderRecycler>> stringCacheRef = new ThreadLocal<>();

public static StringBuilderRecycler getStringBuilderCache() {
SoftReference<StringBuilderRecycler> ref = stringCacheRef.get();
StringBuilderRecycler br = (ref == null) ? null : ref.get();

if (br == null) {
br = new StringBuilderRecycler();
ref = new SoftReference<>(br);
stringCacheRef.set(ref);
}
return br;
}

static final char replacementChar = '\uFFFD'; // replaces null character
private static final char[] notCharRefCharsSorted = new char[]{'\t', '\n', '\r', '\f', ' ', '<', '&'};

Expand All @@ -37,8 +54,8 @@ final class Tokeniser {
@Nullable private Token emitPending = null; // the token we are about to emit on next read
private boolean isEmitPending = false;
@Nullable private String charsString = null; // characters pending an emit. Will fall to charsBuilder if more than one
private final StringBuilder charsBuilder = new StringBuilder(1024); // buffers characters to output as one token, if more than one emit per read
StringBuilder dataBuffer = new StringBuilder(1024); // buffers data looking for </script>
private StringBuilder charsBuilder = getStringBuilderCache().get(1024); // buffers characters to output as one token, if more than one emit per read
StringBuilder dataBuffer = getStringBuilderCache().get(1024); // buffers data looking for </script>

Token.StartTag startPending = new Token.StartTag();
Token.EndTag endPending = new Token.EndTag();
Expand Down Expand Up @@ -360,4 +377,10 @@ String unescapeEntities(boolean inAttribute) {
}
return StringUtil.releaseBuilder(builder);
}

void release() {
StringBuilderRecycler stringBuilderCache = getStringBuilderCache();
stringBuilderCache.releaseByteBuffer(charsBuilder);
stringBuilderCache.releaseByteBuffer(dataBuffer);
}
}
1 change: 1 addition & 0 deletions src/main/java/org/jsoup/parser/TreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ Document parse(Reader input, String baseUri, Parser parser) {
// tidy up - as the Parser and Treebuilder are retained in document for settings / fragments
reader.close();
reader = null;
tokeniser.release();
tokeniser = null;
stack = null;
seenTags = null;
Expand Down

0 comments on commit d09a106

Please sign in to comment.