Skip to content

Commit

Permalink
Bug 59885 - Optimize css parsing for embedded resources download by i…
Browse files Browse the repository at this point in the history
…ntroducing a cache

Based on PR 219 contributed by Benoit Wiart (b.wiart at ubik-ingenierie.com)
This closes #219 on github.
Bugzilla Id: 59885

git-svn-id: https://svn.apache.org/repos/asf/jmeter/trunk@1754678 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
pmouawad committed Jul 31, 2016
1 parent db1a75c commit d0abd88
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 40 deletions.
7 changes: 7 additions & 0 deletions bin/jmeter.properties
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,13 @@ HTTPResponse.parsers=htmlParser wmlParser cssParser
# CSS Parser based on ph-css
cssParser.className=org.apache.jmeter.protocol.http.parser.CssParser
cssParser.types=text/css

# CSS parser LRU cache size
# This cache stores the URLs found in a CSS to avoid continuously parsing the CSS
# By default the cache size is 400
# It can be disabled by setting its value to 0
#css.parser.cache.size=400

#---------------------------------------------------------------------------
# HTML Parser configuration
#---------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,13 @@
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.collections.map.LRUMap;
import org.apache.commons.lang3.StringUtils;
import org.apache.jmeter.util.JMeterUtils;
import org.apache.jorphan.logging.LoggingManager;
Expand All @@ -50,7 +54,20 @@
public class CssParser implements LinkExtractorParser {
private static final boolean IGNORE_UNRECOVERABLE_PARSING_ERROR = JMeterUtils.getPropDefault("httpsampler.ignore_failed_embedded_resource", false); //$NON-NLS-1$
private static final Logger LOG = LoggingManager.getLoggerForClass();

/**
*
*/
private static final int CSS_URL_CACHE_MAX_SIZE = JMeterUtils.getPropDefault("css.parser.cache.size", 400);

/**
*
*/
@SuppressWarnings("unchecked")
private static Map<String, URLCollection> CSS_URL_CACHE =
CSS_URL_CACHE_MAX_SIZE > 0 ? Collections.synchronizedMap(new LRUMap(CSS_URL_CACHE_MAX_SIZE)) : null;


private static final class CustomLoggingCSSParseExceptionCallback extends LoggingCSSParseExceptionCallback {
/**
*
Expand All @@ -76,6 +93,7 @@ public void onException(ParseException ex) {
}
}
}

/**
*
*/
Expand All @@ -93,40 +111,55 @@ public CssParser() {
public Iterator<URL> getEmbeddedResourceURLs(String userAgent, byte[] data,
final URL baseUrl, String encoding) throws LinkExtractorParseException {
try {
String cssContent = new String(data, encoding);
final CascadingStyleSheet aCSS = CSSReader.readFromStringStream(cssContent,
new CSSReaderSettings()
.setBrowserCompliantMode(true)
.setFallbackCharset(Charset.forName(encoding))
.setCSSVersion (ECSSVersion.CSS30)
.setCustomErrorHandler(new LoggingCSSParseErrorHandler())
.setCustomExceptionHandler (new CustomLoggingCSSParseExceptionCallback(baseUrl)));
final List<URLString> list = new ArrayList<>();
final URLCollection urlCollection = new URLCollection(list);
if(aCSS != null) {
CSSVisitor.visitCSSUrl(aCSS, new DefaultCSSUrlVisitor() {
@Override
public void onImport(final CSSImportRule importRule) {
String location = importRule.getLocationString();
if(!StringUtils.isEmpty(location)) {
urlCollection.addURL(location, baseUrl);
boolean cacheEnabled = CSS_URL_CACHE_MAX_SIZE > 0;
String md5Key = null;
URLCollection urlCollection = null;
if(cacheEnabled) {
md5Key = DigestUtils.md5Hex(data);
urlCollection = CSS_URL_CACHE.get(md5Key);
}

if(urlCollection == null) {
String cssContent = new String(data, encoding);
final CascadingStyleSheet aCSS = CSSReader.readFromStringStream(cssContent,
new CSSReaderSettings()
.setBrowserCompliantMode(true)
.setFallbackCharset(Charset.forName(encoding))
.setCSSVersion (ECSSVersion.CSS30)
.setCustomErrorHandler(new LoggingCSSParseErrorHandler())
.setCustomExceptionHandler (new CustomLoggingCSSParseExceptionCallback(baseUrl)));
final List<URLString> list = new ArrayList<>();
urlCollection = new URLCollection(list);
final URLCollection localCollection = urlCollection;
if(aCSS != null) {
CSSVisitor.visitCSSUrl(aCSS, new DefaultCSSUrlVisitor() {
@Override
public void onImport(final CSSImportRule importRule) {
String location = importRule.getLocationString();
if(!StringUtils.isEmpty(location)) {
localCollection.addURL(location, baseUrl);
}
}
// Call for URLs outside of URLs
@Override
public void onUrlDeclaration(
final ICSSTopLevelRule aTopLevelRule,
final CSSDeclaration aDeclaration,
final CSSExpressionMemberTermURI aURITerm) {
// NOOP
// Browser fetch such urls only when CSS rule matches
// so we disable this code
//urlCollection.addURL(aURITerm.getURIString(), baseUrl);
}
});
if(cacheEnabled) {
CSS_URL_CACHE.put(md5Key, urlCollection);
}
// Call for URLs outside of URLs
@Override
public void onUrlDeclaration(
final ICSSTopLevelRule aTopLevelRule,
final CSSDeclaration aDeclaration,
final CSSExpressionMemberTermURI aURITerm) {
// NOOP
// Browser fetch such urls only when CSS rule matches
// so we disable this code
//urlCollection.addURL(aURITerm.getURIString(), baseUrl);
}
});
} else {
LOG.warn("Failed parsing url:"+baseUrl+", got null CascadingStyleSheet");
} else {
LOG.warn("Failed parsing url:"+baseUrl+", got null CascadingStyleSheet");
}
}

if(LOG.isDebugEnabled()) {
StringBuilder builder = new StringBuilder();
for (Iterator<URL> iterator = urlCollection.iterator(); iterator.hasNext();) {
Expand All @@ -135,6 +168,7 @@ public void onUrlDeclaration(
}
LOG.debug("Parsed:"+baseUrl+", got:"+builder.toString());
}

return urlCollection.iterator();
} catch (Exception e) {
throw new LinkExtractorParseException(e);
Expand Down
3 changes: 2 additions & 1 deletion xdocs/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ Summary
<ch_section>Incompatible changes</ch_section>

<ul>
<li>Sample change...</li>
<li>A cache for CSS Parsing of URLs has been introduced in this version, it is enabled by default. It is controlled by property <code>css.parser.cache.size</code>. It can be disabled by setting its value to 0. See <bugzilla>59885</bugzilla></li>
</ul>

<h3>Deprecated and removed elements</h3>
Expand All @@ -80,6 +80,7 @@ Summary
<h3>HTTP Samplers and Test Script Recorder</h3>
<ul>
<li><bug>59882</bug>Reduce memory allocations for better throughput. Contributed by Benoit Wiart (b.wiart at ubik-ingenierie.com) through <pr>217</pr></li>
<li><bug>59885</bug>Optimize css parsing for embedded resources download by introducing a cache. Contributed by Benoit Wiart (b.wiart at ubik-ingenierie.com) through <pr>219</pr></li>
</ul>

<h3>Other samplers</h3>
Expand Down
20 changes: 12 additions & 8 deletions xdocs/usermanual/properties_reference.xml
Original file line number Diff line number Diff line change
Expand Up @@ -445,14 +445,18 @@ Uncomment this line if you put anything in httpclient.parameters file</property>
</section>
<section name="&sect-num;.24 HTML Parser configuration" anchor="parser_config">
<properties>
<property name="HTTPResponse.parsers"> Space-separated list of parser groups<br/>, defaults to:htmlParser wmlParser cssParser</property>
<property name="cssParser.className"> for each parser, there should be a parser.types and a parser.className property<br/> CSS Parser based on ph-css<br/>, defaults to:org.apache.jmeter.protocol.http.parser.CssParser</property>
<property name="cssParser.types">, defaults to:text/css</property>
<property name=" see https://bz.apache.org/bugzilla/show_bug.cgi?id"> Define the HTML parser to be used.<br/> Default parser:<br/> This new parser (since 2.10) should perform better than all others<br/>, defaults to:55632</property>
<property name="htmlParser.className"> Do not comment this property<br/>, defaults to:org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser</property>
<property name="htmlParser.className"> Other parsers:<br/> Default parser before 2.10<br/>, defaults to:org.apache.jmeter.protocol.http.parser.JTidyHTMLParser</property>
<property name="htmlParser.className"> Note that Regexp extractor may detect references that have been commented out.<br/> In many cases it will work OK, but you should be aware that it may generate <br/> additional references.<br/>, defaults to:org.apache.jmeter.protocol.http.parser.RegexpHTMLParser</property>
<property name="htmlParser.className"> This parser is based on JSoup, it should be the most accurate but less performant<br/> than LagartoBasedHtmlParser<br/>, defaults to:org.apache.jmeter.protocol.http.parser.JsoupBasedHtmlParser</property>
<property name="HTTPResponse.parsers">Space-separated list of parser groups<br/>, defaults to:htmlParser wmlParser cssParser. For each parser, there should be a parser.types and a parser.className property</property>
<property name="cssParser.className"> CSS Parser based on ph-css<br/>, defaults to:org.apache.jmeter.protocol.http.parser.CssParser</property>
<property name="cssParser.types">content types handled by cssParser, defaults to:text/css</property>
<property name="css.parser.cache.size">CSS parser LRU cache size. This cache stores the URLs found in a CSS to avoid continuously parsing the CSS. By default the cache size is 400. It can be disabled by setting its value to 0.</property>
<property name="htmlParser.className">Define the HTML parser to be used. This new parser (since 2.10) should perform better than all others. see https://bz.apache.org/bugzilla/show_bug.cgi?id=55632. Do not comment this property<br/>, defaults to:org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser</property>
Other parsers:<br/>
<ul>
<li>org.apache.jmeter.protocol.http.parser.JTidyHTMLParser : Default parser before 2.10<br/>
<li>org.apache.jmeter.protocol.http.parser.RegexpHTMLParser : Note that Regexp extractor may detect references that have been commented out.<br/> In many cases it will work OK, but you should be aware that it may generate additional references.</li>
<li>org.apache.jmeter.protocol.http.parser.JsoupBasedHtmlParser:This parser is based on JSoup, it should be the most accurate but less performant than LagartoBasedHtmlParser, defaults to:org.apache.jmeter.protocol.http.parser.JsoupBasedHtmlParser</li>
</li>
</ul>
<property name="htmlParser.types">Used by HTTPSamplerBase to associate htmlParser with content types below <br/>, defaults to:text/html application/xhtml+xml application/xml text/xml</property>
<property name="wmlParser.className">, defaults to:org.apache.jmeter.protocol.http.parser.RegexpHTMLParser</property>
<property name="wmlParser.types">Used by HTTPSamplerBase to associate wmlParser with content types below <br/>, defaults to:text/vnd.wap.wml </property>
Expand Down

0 comments on commit d0abd88

Please sign in to comment.