Skip to content

Commit

Permalink
partial impl of pure Java version for c14n
Browse files Browse the repository at this point in the history
  • Loading branch information
yokolet committed Nov 17, 2011
1 parent b2a6af5 commit 1ca0a1e
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 21 deletions.
22 changes: 22 additions & 0 deletions ext/java/nokogiri/XmlDocument.java
Expand Up @@ -38,6 +38,8 @@
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
import static nokogiri.internals.NokogiriHelpers.stringOrNil;

import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

Expand All @@ -50,11 +52,13 @@
import org.jruby.RubyClass;
import org.jruby.RubyFixnum;
import org.jruby.RubyNil;
import org.jruby.RubyString;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.javasupport.JavaUtil;
import org.jruby.javasupport.util.RuntimeHelpers;
import org.jruby.runtime.Arity;
import org.jruby.runtime.Block;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.w3c.dom.Attr;
Expand Down Expand Up @@ -518,4 +522,22 @@ public static IRubyObject wrapJavaDocument(ThreadContext context, IRubyObject kl
public IRubyObject toJavaDocument(ThreadContext context) {
return JavaUtil.convertJavaToUsableRubyObject(context.getRuntime(), (org.w3c.dom.Document)node);
}

@JRubyMethod(optional=3)
public IRubyObject canonicalize(ThreadContext context, IRubyObject[] args, Block block) {
// 38 = NO_DECL | NO_EMPTY | AS_XML
SaveContextVisitor visitor = new SaveContextVisitor(38, null, "UTF-8", false, false, true);
accept(context, visitor);
Ruby runtime = context.getRuntime();
IRubyObject result = runtime.getTrue();
if (block.isGiven()) {
List<Node> list = visitor.getC14nNodeList();
for (Node n : list) {
IRubyObject currentNode = getCachedNodeOrCreate(runtime, n);
IRubyObject parentNode = getCachedNodeOrCreate(runtime, n.getParentNode());
result = block.call(context, currentNode, parentNode);
}
}
return result.isTrue() ? stringOrNil(runtime, visitor.toString()) : RubyString.newEmptyString(runtime);
}
}
2 changes: 1 addition & 1 deletion ext/java/nokogiri/XmlNode.java
Expand Up @@ -1026,7 +1026,7 @@ public IRubyObject native_write_to(ThreadContext context,

SaveContextVisitor visitor =
new SaveContextVisitor((Integer)options.toJava(Integer.class), rubyStringToString(indentString), encString,
isHtmlDoc(context), isFragment());
isHtmlDoc(context), isFragment(), false);
accept(context, visitor);
IRubyObject rubyString = stringOrNil(context.getRuntime(), visitor.toString());
RuntimeHelpers.invoke(context, io, "write", rubyString);
Expand Down
13 changes: 1 addition & 12 deletions ext/java/nokogiri/XmlSaxParserContext.java
Expand Up @@ -32,7 +32,7 @@

package nokogiri;

import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
import static nokogiri.internals.NokogiriHelpers.isWhitespaceText;
import static org.jruby.javasupport.util.RuntimeHelpers.invoke;

import java.io.IOException;
Expand Down Expand Up @@ -340,17 +340,6 @@ protected void maybeTrimLeadingAndTrailingWhitespace(ThreadContext context,
((XmlNode) doc).normalize();
}

protected boolean isWhitespaceText(ThreadContext context, IRubyObject obj) {
if (obj == null || obj.isNil()) return false;

XmlNode node = (XmlNode) obj;
if (!(node instanceof XmlText))
return false;

String content = rubyStringToString(node.content(context));
return content.trim().length() == 0;
}

@JRubyMethod(name="column")
public IRubyObject column(ThreadContext context) {
Integer number = handler.getColumn();
Expand Down
33 changes: 33 additions & 0 deletions ext/java/nokogiri/internals/NokogiriHelpers.java
Expand Up @@ -58,6 +58,7 @@
import org.jruby.RubyClass;
import org.jruby.RubyEncoding;
import org.jruby.RubyString;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;
import org.w3c.dom.Attr;
Expand Down Expand Up @@ -572,6 +573,38 @@ public static boolean isNonDefaultNamespace(Node node) {
public static boolean isXmlBase(String attrName) {
return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
}

public static boolean isWhitespaceText(ThreadContext context, IRubyObject obj) {
if (obj == null || obj.isNil()) return false;

XmlNode node = (XmlNode) obj;
if (!(node instanceof XmlText))
return false;

String content = rubyStringToString(node.content(context));
return content.trim().length() == 0;
}

public static boolean isWhitespaceText(String s) {
return s.trim().length() == 0;
}

public static String canonicalizeWhitespce(String s) {
StringBuilder sb = new StringBuilder();
char[] chars = s.toCharArray();
boolean newline_added = false;
for (int i=0; i<chars.length; i++) {
if (chars[i] == '\n') {
if (!newline_added) {
sb.append(chars[i]);
newline_added = true;
}
} else {
sb.append(chars[i]);
}
}
return sb.toString();
}

public static String newQName(String newPrefix, Node node) {
if(newPrefix == null) {
Expand Down
96 changes: 88 additions & 8 deletions ext/java/nokogiri/internals/SaveContextVisitor.java
Expand Up @@ -32,9 +32,16 @@

package nokogiri.internals;

import static nokogiri.internals.NokogiriHelpers.canonicalizeWhitespce;
import static nokogiri.internals.NokogiriHelpers.encodeJavaString;
import static nokogiri.internals.NokogiriHelpers.isNamespace;
import static nokogiri.internals.NokogiriHelpers.isNotXmlEscaped;
import static nokogiri.internals.NokogiriHelpers.isWhitespaceText;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Stack;

import org.cyberneko.html.HTMLElements;
Expand Down Expand Up @@ -65,7 +72,8 @@ public class SaveContextVisitor {
private Stack<String> indentation;
private String encoding, indentString;
private boolean format, noDecl, noEmpty, noXhtml, asXhtml, asXml, asHtml, asBuilder, htmlDoc, fragment;

private boolean canonical;
private List<Node> c14nNodeList;
/*
* U can't touch this.
* http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
Expand All @@ -82,12 +90,14 @@ public class SaveContextVisitor {
public static final int AS_HTML = 64;
public static final int AS_BUILDER = 128;

public SaveContextVisitor(int options, String indent, String encoding, boolean htmlDoc, boolean fragment) {
public SaveContextVisitor(int options, String indent, String encoding, boolean htmlDoc, boolean fragment, boolean canonical) {
buffer = new StringBuffer();
this.encoding = encoding;
indentation = new Stack<String>(); indentation.push("");
this.htmlDoc = htmlDoc;
this.fragment = fragment;
this.canonical = canonical;
c14nNodeList = new ArrayList<Node>();
format = (options & FORMAT) == FORMAT;

noDecl = (options & NO_DECL) == NO_DECL;
Expand Down Expand Up @@ -117,6 +127,10 @@ public void setEncoding(String encoding) {
this.encoding = encoding;
}

public List<Node> getC14nNodeList() {
return c14nNodeList;
}

public boolean enter(Node node) {
if (node instanceof Document) {
return enter((Document)node);
Expand Down Expand Up @@ -275,6 +289,10 @@ public void leave(CDATASection cdata) {
}

public boolean enter(Comment comment) {
if (canonical) {
c14nNodeList.add(comment);
return true;
}
buffer.append("<!--");
buffer.append(comment.getData());
buffer.append("-->");
Expand Down Expand Up @@ -306,6 +324,10 @@ public void leave(Document document) {
}

public boolean enter(DocumentType docType) {
if (canonical) {
c14nNodeList.add(docType);
return true;
}
String name = docType.getName();
String pubId = docType.getPublicId();
String sysId = docType.getSystemId();
Expand Down Expand Up @@ -334,6 +356,12 @@ public void leave(DocumentType docType) {
}

public boolean enter(Element element) {
if (canonical) {
c14nNodeList.add(element);
if (element == element.getOwnerDocument().getDocumentElement()) {
c14nNodeList.add(element.getOwnerDocument());
}
}
String current = indentation.peek();
buffer.append(current);
if (needIndent()) {
Expand All @@ -342,12 +370,23 @@ public boolean enter(Element element) {
String name = element.getTagName();
buffer.append("<" + name);
NamedNodeMap attrs = element.getAttributes();
for (int i=0; i<attrs.getLength(); i++) {
Attr attr = (Attr) attrs.item(i);
if (attr.getSpecified()) {
buffer.append(" ");
enter(attr);
leave(attr);
if (canonical) {
Attr[] sorted = canonicalizeAttrOrder(attrs);
for (Attr attr : sorted) {
if (attr.getSpecified()) {
buffer.append(" ");
enter(attr);
leave(attr);
}
}
} else {
for (int i = 0; i < attrs.getLength(); i++) {
Attr attr = (Attr) attrs.item(i);
if (attr.getSpecified()) {
buffer.append(" ");
enter(attr);
leave(attr);
}
}
}
if (element.hasChildNodes()) {
Expand Down Expand Up @@ -388,6 +427,39 @@ private boolean isEmpty(String name) {
return element.isEmpty();
}

private Attr[] canonicalizeAttrOrder(NamedNodeMap attrs) {
if (attrs == null || attrs.getLength() == 0) return new Attr[0];
List<Attr> namespaces = new ArrayList<Attr>();
List<Attr> attributes = new ArrayList<Attr>();
for (int i=0; i<attrs.getLength(); i++) {
Attr attr = (Attr)attrs.item(i);
if (isNamespace(attr.getNodeName())) namespaces.add(attr);
else attributes.add(attr);
}
Attr[] namespaceArray = getSortedArray(namespaces);
Attr[] attributeArray = getSortedArray(attributes);
Attr[] allAttrs = new Attr[namespaceArray.length + attributeArray.length];
for (int i=0; i<allAttrs.length; i++) {
if (i < namespaceArray.length) {
allAttrs[i] = namespaceArray[i];
} else {
allAttrs[i] = attributeArray[i-namespaceArray.length];
}
}
return allAttrs;
}

private Attr[] getSortedArray(List<Attr> attrList) {
Attr[] attrArray = attrList.toArray(new Attr[0]);
Arrays.sort(attrArray, new Comparator<Attr>() {
@Override
public int compare(Attr attr0, Attr attr1) {
return attr0.getNodeName().compareTo(attr1.getNodeName());
}
});
return attrArray;
}

public void leave(Element element) {
String name = element.getTagName();
if (element.hasChildNodes()) {
Expand Down Expand Up @@ -502,6 +574,7 @@ public boolean enter(ProcessingInstruction pi) {
if (asHtml) buffer.append(">");
else buffer.append("?>");
buffer.append("\n");
if (canonical) c14nNodeList.add(pi);
return true;
}

Expand All @@ -512,6 +585,13 @@ public void leave(ProcessingInstruction pi) {
private static char lineSeparator = '\n'; // System.getProperty("line.separator"); ?
public boolean enter(Text text) {
String textContent = text.getNodeValue();
if (canonical) {
c14nNodeList.add(text);
if (isWhitespaceText(textContent)) {
buffer.append(canonicalizeWhitespce(textContent));
return true;
}
}
if (needIndentText() && "".equals(textContent.trim())) return true;
if (needIndentText()) {
String current = indentation.peek();
Expand Down

0 comments on commit 1ca0a1e

Please sign in to comment.