Skip to content

Commit

Permalink
prep for 1.22 rc2
Browse files Browse the repository at this point in the history
(cherry picked from commit 22ff756)
  • Loading branch information
tballison committed Jul 24, 2019
1 parent ed461e1 commit 81c21ab
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,10 @@ private static PoolDOMBuilder acquireDOMBuilder()
if (builder != null) {
return builder;
}
LOG.log(Level.WARNING, "Contention waiting for a DOMParser. "+
"Consider increasing the XMLReaderUtils.POOL_SIZE");
waiting++;

if (waiting > 3000) {
//freshen the pool. Something went very wrong...
setPoolSize(POOL_SIZE);
Expand Down Expand Up @@ -586,6 +589,9 @@ private static PoolSAXParser acquireSAXParser()
if (parser != null) {
return parser;
}
LOG.log(Level.WARNING, "Contention waiting for a DOMParser. "+
"Consider increasing the XMLReaderUtils.POOL_SIZE");

waiting++;
if (waiting > 3000) {
//freshen the pool. Something went very wrong...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,16 @@ public void parse(InputStream stream, ContentHandler handler,

xhtml.startDocument();
try {
XMLReaderUtils.parseSAX(
//need to get new SAXParser because
//an attachment might require another SAXParser
//mid-parse
XMLReaderUtils.getSAXParser().parse(
new CloseShieldInputStream(stream),
new OfflineContentHandler(new EmbeddedContentHandler(
new Word2006MLDocHandler(xhtml, metadata, context))),
context);
new Word2006MLDocHandler(xhtml, metadata, context))));
} catch (SAXException e) {
throw new TikaException("XML parse error", e);
}
xhtml.endDocument();
xhtml.endDocument();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,13 @@ public void parse(

TaggedContentHandler tagged = new TaggedContentHandler(xhtml);
try {
XMLReaderUtils.parseSAX(
//need to get new SAXParser because
//an attachment might require another SAXParser
//mid-parse
XMLReaderUtils.getSAXParser().parse(
new CloseShieldInputStream(stream),
new OfflineContentHandler(new EmbeddedContentHandler(
getContentHandler(tagged, metadata, context))),
context);
getContentHandler(tagged, metadata, context))));
} catch (SAXException e) {
tagged.throwIfCauseOf(e);
throw new TikaException("XML parse error", e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.iwork.IWorkPackageParser;
import org.apache.tika.sax.OfflineContentHandler;
import org.apache.tika.utils.XMLReaderUtils;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
Expand Down Expand Up @@ -207,7 +208,9 @@ public void startElement(String uri, String localName,
public static MediaType parseOOXMLContentTypes(InputStream is) {
ContentTypeHandler contentTypeHandler = new ContentTypeHandler();
try {
XMLReaderUtils.parseSAX(is, contentTypeHandler, new ParseContext());
XMLReaderUtils.parseSAX(is,
new OfflineContentHandler(contentTypeHandler),
new ParseContext());
} catch (SecurityException e) {
throw e;
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,33 @@

import static org.junit.Assert.assertEquals;

import java.io.File;
import java.io.FileFilter;
import java.util.List;

import org.apache.tika.MultiThreadedTikaTest;
import org.apache.tika.TikaTest;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.RecursiveParserWrapper;
import org.apache.tika.parser.microsoft.OfficeParserConfig;
import org.apache.tika.utils.XMLReaderUtils;
import org.junit.AfterClass;
import org.junit.Test;


public class Word2006MLParserTest extends TikaTest {
public class Word2006MLParserTest extends MultiThreadedTikaTest {

@AfterClass
public static void tearDown() throws TikaException {
XMLReaderUtils.setPoolSize(XMLReaderUtils.DEFAULT_POOL_SIZE);
}

@Test
public void basicTest() throws Exception {
Expand Down Expand Up @@ -167,5 +179,26 @@ public void testSkipDeletedAndMoveFrom() throws Exception {

}

@Test(timeout = 60000)
public void testMultiThreaded() throws Exception {
XMLReaderUtils.setPoolSize(4);
int numThreads = XMLReaderUtils.getPoolSize()*2;
ParseContext[] contexts = new ParseContext[numThreads];
for (int i = 0; i < contexts.length; i++) {
contexts[i] = new ParseContext();
}

testMultiThreaded(new AutoDetectParser(), contexts, numThreads, 2,
new FileFilter() {
@Override
public boolean accept(File pathname) {
if (pathname.getName().equals("testWORD_2006ml.xml")) {
return true;
}
return false;
}
});

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,31 @@

import static org.junit.Assert.assertEquals;

import java.io.File;
import java.io.FileFilter;
import java.util.Arrays;
import java.util.List;

import org.apache.tika.MultiThreadedTikaTest;
import org.apache.tika.TikaTest;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.RecursiveParserWrapper;
import org.apache.tika.utils.XMLReaderUtils;
import org.junit.AfterClass;
import org.junit.Test;

public class XML2003ParserTest extends TikaTest {
public class XML2003ParserTest extends MultiThreadedTikaTest {

@AfterClass
public static void tearDown() throws TikaException {
XMLReaderUtils.setPoolSize(XMLReaderUtils.DEFAULT_POOL_SIZE);
}

@Test
public void testBasicWord() throws Exception {
Expand Down Expand Up @@ -107,4 +119,25 @@ public void testBasicExcel() throws Exception {

}

@Test(timeout = 60000)
public void testMultiThreaded() throws Exception {
XMLReaderUtils.setPoolSize(4);
int numThreads = XMLReaderUtils.getPoolSize()*2;
ParseContext[] contexts = new ParseContext[numThreads];
for (int i = 0; i < contexts.length; i++) {
contexts[i] = new ParseContext();
}

testMultiThreaded(new AutoDetectParser(), contexts, numThreads, 2,
new FileFilter() {
@Override
public boolean accept(File pathname) {
if (pathname.getName().equals("testWORD2003.xml")) {
return true;
}
return false;
}
});

}
}

0 comments on commit 81c21ab

Please sign in to comment.