Skip to content

Commit

Permalink
Bad SYSTEM for DTD DocType and Entity breaks the XML validation
Browse files Browse the repository at this point in the history
Fixes #1169

Signed-off-by: azerr <azerr@redhat.com>
  • Loading branch information
angelozerr authored and fbricon committed Feb 8, 2022
1 parent 4a1b529 commit b46cb1f
Show file tree
Hide file tree
Showing 8 changed files with 460 additions and 104 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@
import java.util.HashMap;
import java.util.Map;

import org.apache.xerces.impl.XMLEntityManager;
import org.apache.xerces.util.URI.MalformedURIException;
import org.apache.xerces.xni.XMLLocator;
import org.eclipse.lemminx.commons.BadLocationException;
import org.eclipse.lemminx.dom.DOMDocument;
import org.eclipse.lemminx.dom.DOMDocumentType;
import org.eclipse.lemminx.dom.DOMElement;
import org.eclipse.lemminx.dom.DOMRange;
import org.eclipse.lemminx.dom.DTDEntityDecl;
import org.eclipse.lemminx.extensions.contentmodel.participants.codeactions.ElementDeclUnterminatedCodeAction;
import org.eclipse.lemminx.extensions.contentmodel.participants.codeactions.EntityNotDeclaredCodeAction;
import org.eclipse.lemminx.extensions.contentmodel.participants.codeactions.FixMissingSpaceCodeAction;
Expand All @@ -35,6 +38,7 @@
import org.eclipse.lsp4j.Position;
import org.eclipse.lsp4j.Range;
import org.eclipse.lsp4j.ResourceOperationKind;
import org.w3c.dom.NamedNodeMap;

/**
* DTD error code.
Expand Down Expand Up @@ -212,19 +216,47 @@ public static Range toLSPRange(XMLLocator location, DTDErrorCode code, Object[]
}

case dtd_not_found: {
// Check if DTD location comes from a xml-model/@href

// Check if DTD location is declared with xml-model/@href
// ex : <xml-model href="http://www.docbook.org/xml/4.4/docbookx.dtd" [
String hrefLocation = (String) arguments[1];
DOMRange locationRange = XMLModelUtils.getHrefNode(document, hrefLocation);
if (locationRange != null) {
return XMLPositionUtility.createRange(locationRange);
}

try {
DOMDocumentType docType = document.getDoctype();
return new Range(document.positionAt(docType.getSystemIdNode().getStart()),
document.positionAt(docType.getSystemIdNode().getEnd()));
if (docType != null) {

// Check if DTD location is declared with <!DOCTYPE SYSTEM
// ex : <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
// "http://www.docbook.org/xml/4.4/docbookx.dtd" [
String documentURI = document.getDocumentURI();
String dtdLocation = getResolvedLocation(documentURI, docType.getSystemIdWithoutQuotes());
if (hrefLocation.equals(dtdLocation)) {
return new Range(document.positionAt(docType.getSystemIdNode().getStart()),
document.positionAt(docType.getSystemIdNode().getEnd()));
}

// Check if DTD location is declared with <!ENTITY SYSTEM
// ex : <!ENTITY % document SYSTEM "document.ent">
NamedNodeMap entities = docType.getEntities();
for (int i = 0; i < entities.getLength(); i++) {
DTDEntityDecl entity = (DTDEntityDecl) entities.item(i);
String entityLocation = getResolvedLocation(documentURI, entity.getSystemId());
if (hrefLocation.equals(entityLocation)) {
return XMLPositionUtility.createRange(entity.getSystemIdNode());
}
}
}
} catch (BadLocationException e) {
}
return null;

// DTD location cannot be found (ex : the DTD not found comes from an ENTITY
// which is declared in another included file.
// the error is displayed on the XML root element
return XMLPositionUtility.selectRootStartTag(document);
}
default:
try {
Expand All @@ -235,6 +267,22 @@ public static Range toLSPRange(XMLLocator location, DTDErrorCode code, Object[]
return null;
}

/**
* Returns the expanded system location
*
* @return the expanded system location
*/
private static String getResolvedLocation(String documentURI, String location) {
if (location == null) {
return null;
}
try {
return XMLEntityManager.expandSystemId(location, documentURI, false);
} catch (MalformedURIException e) {
return location;
}
}

public static void registerCodeActionParticipants(Map<String, ICodeActionParticipant> codeActions,
SharedSettings sharedSettings) {
codeActions.put(ElementDeclUnterminated.getCode(), new ElementDeclUnterminatedCodeAction());
Expand All @@ -248,4 +296,4 @@ public static void registerCodeActionParticipants(Map<String, ICodeActionPartici
codeActions.put(MSG_SPACE_REQUIRED_BEFORE_ELEMENT_TYPE_IN_ELEMENTDECL.getCode(), fixMissingSpace);
codeActions.put(MSG_SPACE_REQUIRED_BEFORE_ENTITY_NAME_IN_ENTITYDECL.getCode(), fixMissingSpace);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,28 @@
*******************************************************************************/
package org.eclipse.lemminx.extensions.contentmodel.participants.diagnostics;

import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.List;
import java.io.IOException;

import org.apache.xerces.impl.Constants;
import org.apache.xerces.impl.XMLEntityManager;
import org.apache.xerces.impl.XMLErrorReporter;
import org.apache.xerces.impl.dtd.DTDGrammar;
import org.apache.xerces.impl.dtd.XMLDTDDescription;
import org.apache.xerces.impl.dtd.XMLEntityDecl;
import org.apache.xerces.impl.validation.ValidationManager;
import org.apache.xerces.parsers.SAXParser;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.NamespaceContext;
import org.apache.xerces.xni.XMLLocator;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLInputSource;
import org.apache.xerces.xni.parser.XMLParserConfiguration;
import org.eclipse.lemminx.extensions.contentmodel.participants.DTDErrorCode;
import org.eclipse.lemminx.extensions.xerces.xmlmodel.msg.XMLModelMessageFormatter;
import org.eclipse.lemminx.utils.FilesUtils;
import org.eclipse.lemminx.dom.DOMDocument;
import org.eclipse.lemminx.dom.DOMDocumentType;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;

/**
* Extension of Xerces SAX Parser to fix some Xerces bugs:
*
* <ul>
* <li>[BUG 1]: when the DTD file path is wrong on DOCTYPE, Xerces breaks all
* validation like syntax validation</li>
* <li>[BUG 2]: when Xerces XML grammar pool is used, the second validation
* ignore the existing of entities. See
* https://github.com/redhat-developer/vscode-xml/issues/234</li>
Expand All @@ -56,27 +48,22 @@ public class LSPSAXParser extends SAXParser {

protected static final String ENTITY_MANAGER = Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY;

private final LSPErrorReporterForXML reporter;

private final LSPXMLGrammarPool grammarPool;

private List<XMLDTDDescription> grammarDescs;
private final DOMDocument document;

public LSPSAXParser(LSPErrorReporterForXML reporter, XMLParserConfiguration config, LSPXMLGrammarPool grammarPool) {
public LSPSAXParser(LSPErrorReporterForXML reporter, XMLParserConfiguration config, LSPXMLGrammarPool grammarPool,
DOMDocument document) {
super(config);
this.reporter = reporter;
this.grammarPool = grammarPool;
this.document = document;
init(reporter);
}

private void init(LSPErrorReporterForXML reporter) {
try {
// Add LSP error reporter to fill LSP diagnostics from Xerces errors
super.setProperty("http://apache.org/xml/properties/internal/error-reporter", reporter);
super.setFeature("http://apache.org/xml/features/continue-after-fatal-error", false); //$NON-NLS-1$
super.setFeature("http://xml.org/sax/features/namespace-prefixes", true); //$NON-NLS-1$
super.setFeature("http://xml.org/sax/features/namespaces", true); //$NON-NLS-1$
super.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", true);
} catch (SAXNotRecognizedException | SAXNotSupportedException e) {
// Should never occur.
}
Expand All @@ -88,84 +75,61 @@ private void init(LSPErrorReporterForXML reporter) {
public void startDocument(XMLLocator locator, String encoding, NamespaceContext namespaceContext,
Augmentations augs) throws XNIException {
this.locator = locator;
if (grammarPool != null) {
DOMDocumentType docType = document.getDoctype();
if (docType != null) {
String systemId = document.getDoctype().getSystemIdWithoutQuotes();
if (systemId != null) {
// ex : <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
// "http://www.docbook.org/xml/4.4/docbookx.dtd"
String rootElement = document.getDocumentElement().getTagName();
String publicId = document.getDoctype().getPublicIdWithoutQuotes();
XMLDTDDescription grammarDesc = createGrammarDescription(rootElement, publicId, systemId);

// Try to get the DTD grammar from the xerces cache
DTDGrammar grammar = (DTDGrammar) grammarPool.retrieveGrammar(grammarDesc);
if (grammar != null) {
// Internal subset means:
// ex : <!DOCTYPE chapter... [
// some internal subset content like <!ENTITY ...
//

// Compare the current internal subset with the DTD grammar internal subset
if (grammarPool.setInternalSubset(grammarDesc, docType.getInternalSubset())) {
// The internal subset which can defines some entities changed, remove the
// grammar from the cache
grammarPool.removeGrammar(grammarDesc);
}
}
}
}
}
super.startDocument(locator, encoding, namespaceContext, augs);
}

@Override
public void doctypeDecl(String rootElement, String publicId, String systemId, Augmentations augs)
throws XNIException {
if (systemId != null) {
// There a declared DTD in the DOCTYPE
if (systemId != null && grammarPool != null) {
// There is a declared DTD in the DOCTYPE
// <!DOCTYPE root-element SYSTEM "./extended.dtd" []>

XMLEntityManager entityManager = (XMLEntityManager) fConfiguration.getProperty(ENTITY_MANAGER);
XMLDTDDescription grammarDesc = createGrammarDescription(rootElement, publicId, systemId);

String eid = grammarDesc.getExpandedSystemId();

try {
XMLInputSource input = entityManager.resolveEntity(grammarDesc);
String resolvedSystemId = input.getSystemId();
if (resolvedSystemId != null && resolvedSystemId.startsWith(FilesUtils.FILE_SCHEME)) {
// The resolved DTD is a file, check if the file exists.
if (!FilesUtils.toFile(resolvedSystemId).exists()) {
// The declared DTD file doesn't exist
// <!DOCTYPE root-element SYSTEM "./dtd-doesnt-exist.dtd" []>
throw new FileNotFoundException(resolvedSystemId);
}
}

if (grammarPool != null) {
// FIX [BUG 2]
// DTD exists, get the DTD grammar from the cache
// FIX [BUG 2]
// DTD exists, get the DTD grammar from the cache

DTDGrammar grammar = (DTDGrammar) grammarPool.retrieveGrammar(grammarDesc);
if (grammar != null) {
// The DTD grammar is in cache, we need to fill XML entity manager with the
// entities declared in the cached DTD grammar
fillEntities(grammar, entityManager);
}
}
} catch (Exception e) {
reporter.reportError(locator, XMLModelMessageFormatter.XML_MODEL_DOMAIN,
DTDErrorCode.dtd_not_found.getCode(), new Object[] { null, eid },
XMLErrorReporter.SEVERITY_ERROR, e);

// FIX [BUG 1]
// To avoid breaking the validation (ex : syntax validation) we mark
// the cache DTD as true to avoid having an IOException error which breaks the
// validation.
// boolean readExternalSubset must be false in
// Xerces
// https://github.com/apache/xerces2-j/blob/e5a239b96fd2cff6566a29e7a4a3a4a2bbf9b0d4/src/org/apache/xerces/impl/XMLDocumentScannerImpl.java#L950
ValidationManager fValidationManager = (ValidationManager) fConfiguration
.getProperty(VALIDATION_MANAGER);
if (fValidationManager != null) {
fValidationManager.setCachedDTD(true);
}

// As we don't throw an error when DTD content is downloaded, we need to remove
// the DTDGrammar from the cache (which is cached by Xerces).
if (grammarPool != null) {
if (grammarDescs == null) {
grammarDescs = new ArrayList<>();
}
grammarDescs.add(grammarDesc);
}
DTDGrammar grammar = (DTDGrammar) grammarPool.retrieveGrammar(grammarDesc);
if (grammar != null) {
// The DTD grammar is in cache, we need to fill XML entity manager with the
// entities declared in the cached DTD grammar
fillEntities(grammar, entityManager);
}
}
super.doctypeDecl(rootElement, publicId, systemId, augs);
}

@Override
public void endDTD(Augmentations augs) throws XNIException {
super.endDTD(augs);
// Remove invalid DTDGrammar from the cache.
if (grammarDescs != null) {
grammarDescs.forEach(desc -> grammarPool.removeGrammar(desc));
}
}

/**
* Create DTD grammar description by expanding the system id.
*
Expand Down Expand Up @@ -198,15 +162,25 @@ private static void fillEntities(DTDGrammar grammar, XMLEntityManager entityMana
@Override
public void setValues(String name, String publicId, String systemId, String baseSystemId, String notation,
String value, boolean isPE, boolean inExternal) {
if (inExternal) {
// Only entities declared in the cached DTD grammar must be added in the XML
// entity manager.
if (systemId != null) {
if (notation != null) {
entityManager.addUnparsedEntity(name, publicId, systemId, baseSystemId, notation);
} else {
try {
entityManager.addExternalEntity(name, publicId, systemId, baseSystemId);
} catch (IOException e) {
// Do nothing
}
}
} else {
entityManager.addInternalEntity(name, value);
}
};
}
};
while (grammar.getEntityDecl(index, entityDecl)) {
while (grammar.getEntityDecl(index, entityDecl))

{
index++;
}
}
}
}
Loading

0 comments on commit b46cb1f

Please sign in to comment.