Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge pull request #284 from opax/neko-enhancements

Neko enhancements
  • Loading branch information...
commit cac6e19675a76010df0e01aea55eae01153de049 2 parents 0c19163 + 879c7e0
@dizzzz dizzzz authored
View
17 extensions/modules/src/org/exist/xquery/modules/httpclient/BaseHTTPClientFunction.java
@@ -111,6 +111,7 @@
final static String NAMESPACE_URI = HTTPClientModule.NAMESPACE_URI;
final static String PREFIX = HTTPClientModule.PREFIX;
final static String HTTP_MODULE_PERSISTENT_STATE = HTTPClientModule.HTTP_MODULE_PERSISTENT_STATE;
+ final static String HTTP_MODULE_PERSISTENT_OPTIONS = HTTPClientModule.HTTP_MODULE_PERSISTENT_OPTIONS;
final static String HTTP_EXCEPTION_STATUS_CODE = "500";
@@ -153,21 +154,29 @@ protected void setHeaders(final HttpMethod method, final Node headers) throws XP
/**
* Performs a HTTP Request.
*
- * @param context The context of the calling XQuery
- * @param method The HTTP method for the request
- * @param persistState If true existing HTTP state (cookies, credentials, etc) are re-used and athe state is persisted for future HTTP Requests
+ * @param context The context of the calling XQuery
+ * @param method The HTTP method for the request
+ * @param persistState If true existing HTTP state (cookies, credentials, etc) are re-used and athe state is persisted for future HTTP Requests
+ * @param parserFeatures Map of NekoHtml parser features to be used for the HTML parser. If null, the session-wide options will be used.
+ * @param parserProperties Map of NekoHtml parser properties to be used for the HTML parser. If null, the session-wide options will be used.
*
* @return DOCUMENT ME!
*
* @throws IOException
* @throws XPathException
*/
- protected Sequence doRequest(final XQueryContext context, final HttpMethod method, final boolean persistState, final Map<String, Boolean> parserFeatures, final Map<String, String> parserProperties) throws IOException, XPathException {
+ protected Sequence doRequest(final XQueryContext context, final HttpMethod method, boolean persistState, Map<String, Boolean> parserFeatures, Map<String, String> parserProperties) throws IOException, XPathException {
Sequence encodedResponse = null;
final HttpClient http = new HttpClient();
+ FeaturesAndProperties defaultFeaturesAndProperties = (FeaturesAndProperties) context.getXQueryContextVar(HTTP_MODULE_PERSISTENT_OPTIONS);
+ if (defaultFeaturesAndProperties != null) {
+ if (parserFeatures == null) parserFeatures = defaultFeaturesAndProperties.getFeatures();
+ if (parserProperties == null) parserProperties = defaultFeaturesAndProperties.getProperties();
+ }
+
//execute the request
try {
View
6 extensions/modules/src/org/exist/xquery/modules/httpclient/HTTPClientModule.java
@@ -44,7 +44,8 @@
public final static String INCLUSION_DATE = "2007-09-06";
public final static String RELEASED_IN_VERSION = "eXist-1.2";
- public final static String HTTP_MODULE_PERSISTENT_STATE = "_eXist_httpclient_module_persistent_state";
+ public final static String HTTP_MODULE_PERSISTENT_STATE = "_eXist_httpclient_module_persistent_state";
+ public final static String HTTP_MODULE_PERSISTENT_OPTIONS = "_eXist_httpclient_module_persistent_options";
private final static FunctionDef[] functions = {
@@ -57,7 +58,8 @@
new FunctionDef( POSTFunction.signatures[1], POSTFunction.class ),
new FunctionDef( HEADFunction.signature, HEADFunction.class ),
new FunctionDef( OPTIONSFunction.signature, OPTIONSFunction.class ),
- new FunctionDef( ClearFunction.signatures[0], ClearFunction.class )
+ new FunctionDef( ClearFunction.signatures[0], ClearFunction.class ),
+ new FunctionDef( SetOptionsFunction.signatures[0], SetOptionsFunction.class)
};
View
66 extensions/modules/src/org/exist/xquery/modules/httpclient/SetOptionsFunction.java
@@ -0,0 +1,66 @@
+package org.exist.xquery.modules.httpclient;
+
+import org.apache.log4j.Logger;
+
+import org.exist.dom.QName;
+
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.NodeValue;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.Type;
+
+import org.apache.commons.httpclient.HttpState;
+
+/**
+ * Set default options for the NekoHtml parser for all subsequent requests in
+ * the same XQuery context
+ *
+ * @see <a href='http://nekohtml.sourceforge.net/settings.html'>NekoHtml Parser Settings</a>
+ *
+ * @author O.Pax <o.pax@web.de>
+ * @version 2.1
+ * @serial 20140526
+ */
+public class SetOptionsFunction extends BaseHTTPClientFunction {
+
+ protected static final Logger logger = Logger.getLogger( SetOptionsFunction.class );
+
+ public final static FunctionSignature[] signatures = {
+ new FunctionSignature(
+ new QName( "set-parser-options", NAMESPACE_URI, PREFIX ),
+ "Sets default options for the HTML parser for all subsequent requests in this session",
+ new SequenceType[] {
+ OPTIONS_PARAM
+ },
+ new SequenceType( Type.ITEM, Cardinality.EMPTY )
+ )
+ };
+
+
+ public SetOptionsFunction(XQueryContext context, FunctionSignature signature)
+ {
+ super( context, signature );
+ }
+
+
+ public Sequence eval( Sequence[] args, Sequence contextSequence ) throws XPathException
+ {
+ if( isCalledAs( "set-parser-options" ) ) {
+
+ FeaturesAndProperties featuresAndProperties = null;
+
+ if (args.length > 0 && !args[0].isEmpty()) {
+ featuresAndProperties = getParserFeaturesAndProperties(((NodeValue)args[0].itemAt(0)).getNode());
+ }
+
+ context.setXQueryContextVar( HTTP_MODULE_PERSISTENT_OPTIONS, featuresAndProperties );
+ }
+
+ return( Sequence.EMPTY_SEQUENCE );
+ }
+
+}
View
12 src/org/exist/memtree/SAXAdapter.java
@@ -45,6 +45,7 @@
{
private MemTreeBuilder builder;
private HashMap<String, String> namespaces = null;
+ private boolean replaceAttributeFlag;
public SAXAdapter() {
setBuilder(new MemTreeBuilder());
@@ -80,6 +81,9 @@ public void endDocument() throws SAXException
public void startDocument() throws SAXException
{
builder.startDocument();
+
+ if (replaceAttributeFlag)
+ builder.setReplaceAttributeFlag(replaceAttributeFlag);
}
@@ -239,4 +243,12 @@ public void startEntity( String name ) throws SAXException
public void startDTD( String name, String publicId, String systemId ) throws SAXException
{
}
+
+ public void setReplaceAttributeFlag(boolean replaceAttributeFlag) {
+ this.replaceAttributeFlag = replaceAttributeFlag;
+ }
+
+ public boolean isReplaceAttributeFlag() {
+ return replaceAttributeFlag;
+ }
}
View
5 src/org/exist/xquery/modules/ModuleUtils.java
@@ -277,6 +277,11 @@ public static DocumentImpl htmlToXHtml(XQueryContext context, String url, InputS
}
final SAXAdapter adapter = new SAXAdapter();
+
+ // allow multiple attributes of the same name attached to the same element
+ // to enhance resilience against bad HTML. The last attribute value wins.
+ adapter.setReplaceAttributeFlag(true);
+
reader.setContentHandler(adapter);
reader.parse(srcHtml);
final Document doc = adapter.getDocument();
Please sign in to comment.
Something went wrong with that request. Please try again.