Skip to content

Commit

Permalink
TIKA-1782 allow XHTMLContentHandler to pass attributes of html elemen…
Browse files Browse the repository at this point in the history
…t via Markus Jelsma

git-svn-id: https://svn.apache.org/repos/asf/tika/trunk@1710799 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
tballison committed Oct 27, 2015
1 parent 63351d1 commit f43de5a
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 1 deletion.
3 changes: 3 additions & 0 deletions CHANGES.txt
@@ -1,5 +1,8 @@
Release 1.12 - Current Development

* Allow XHTMLContentHandler to pass attributes of html element
via Markus Jelsma (TIKA-1782).

* Fix regression with spacing in PPT via Andreas Beeker (TIKA-1777).


Expand Down
Expand Up @@ -60,7 +60,7 @@ public class XHTMLContentHandler extends SafeContentHandler {
* skip them if they get sent to startElement/endElement by mistake.
*/
private static final Set<String> AUTO =
unmodifiableSet("html", "head", "frameset");
unmodifiableSet("head", "frameset");

/**
* The elements that get prepended with the {@link #TAB} character.
Expand Down
Expand Up @@ -140,6 +140,23 @@ public void testAttributesOnBody() throws Exception {

assertTrue(toHTMLContentHandler.toString().contains("itemscope"));
}

@Test
public void testAttributesOnHtml() throws Exception {
ToHTMLContentHandler toHTMLContentHandler = new ToHTMLContentHandler();
XHTMLContentHandler xhtmlContentHandler = new XHTMLContentHandler(toHTMLContentHandler, new Metadata());
AttributesImpl attributes = new AttributesImpl();

attributes.addAttribute(XHTMLContentHandler.XHTML, "itemscope", "itemscope", "", "");
attributes.addAttribute(XHTMLContentHandler.XHTML, "itemtype", "itemtype", "", "http://schema.org/Event");

xhtmlContentHandler.startDocument();
xhtmlContentHandler.startElement(XHTMLContentHandler.XHTML, "html", "html", attributes);
xhtmlContentHandler.endElement("html");
xhtmlContentHandler.endDocument();

assertTrue(toHTMLContentHandler.toString().contains("itemscope"));
}

/**
* Return array of non-zerolength words. Splitting on whitespace will get us
Expand Down

0 comments on commit f43de5a

Please sign in to comment.