Permalink
Browse files

TIKA-2692 -- minimal upgrades to pass ossindex-maven module -- except…

… for tika-nlp module, which requires significant work.
  • Loading branch information...
tballison committed Jul 26, 2018
1 parent 57ff879 commit de2057b2326aaa68c913e2a4a929538d6cc507f8
View
@@ -89,7 +89,7 @@
<dependency>
<groupId>org.apache.jackrabbit</groupId>
<artifactId>jackrabbit-jcr-server</artifactId>
<version>2.3.6</version>
<version>2.17.4</version>
<exclusions>
<exclusion>
<groupId>org.apache.tika</groupId>
@@ -108,7 +108,7 @@
<dependency>
<groupId>org.apache.jackrabbit</groupId>
<artifactId>jackrabbit-core</artifactId>
<version>2.3.6</version>
<version>2.17.4</version>
<exclusions>
<exclusion>
<groupId>org.apache.tika</groupId>
View
@@ -309,7 +309,7 @@
<commons.compress.version>1.17</commons.compress.version>
<commons.io.version>2.6</commons.io.version>
<gson.version>2.8.1</gson.version>
<cxf.version>3.0.16</cxf.version>
<cxf.version>3.2.5</cxf.version>
<slf4j.version>1.7.24</slf4j.version>
<jackson.version>2.9.5</jackson.version>
<jaxb.version>2.3.0</jaxb.version>
View
@@ -50,9 +50,9 @@
<sis.version>0.8</sis.version>
<parso.version>2.0.9</parso.version>
<!-- used by POI, PDFBox and Jackcess ...try to sync -->
<bouncycastle.version>1.54</bouncycastle.version>
<bouncycastle.version>1.60</bouncycastle.version>
<commonsexec.version>1.3</commonsexec.version>
<httpcomponents.version>4.5.4</httpcomponents.version>
<httpcomponents.version>4.5.6</httpcomponents.version>
</properties>
<dependencies>
View
@@ -97,17 +97,7 @@
<artifactId>cxf-rt-rs-security-cors</artifactId>
<version>${cxf.version}</version>
</dependency>
<dependency>
<groupId>javax.mail</groupId>
<artifactId>mail</artifactId>
<version>1.4.4</version>
<exclusions>
<exclusion>
<groupId>javax.activation</groupId>
<artifactId>activation</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
@@ -24,17 +24,21 @@
import java.util.List;
import java.util.Map;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.apache.tika.detect.CompositeDetector;
import org.apache.tika.detect.Detector;
import org.apache.tika.server.HTMLHelper;
import org.eclipse.jetty.util.ajax.JSON;
/**
* <p>Provides details of all the {@link Detector}s registered with
* Apache Tika, similar to <em>--list-detectors</em> with the Tika CLI.
*/
@Path("/detectors")
public class TikaDetectors {
private static final Gson GSON = new GsonBuilder().disableHtmlEscaping().create();
private HTMLHelper html;
public TikaDetectors() {
@@ -76,7 +80,7 @@ private void detectorAsHTML(Detector d, StringBuffer html, int level) {
public String getDetectorsJSON() {
Map<String, Object> details = new HashMap<String, Object>();
detectorAsMap(TikaResource.getConfig().getDetector(), details);
return JSON.toString(details);
return GSON.toJson(details);
}
private void detectorAsMap(Detector d, Map<String, Object> details) {
@@ -26,19 +26,23 @@
import java.util.SortedMap;
import java.util.TreeMap;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.Parser;
import org.apache.tika.server.HTMLHelper;
import org.eclipse.jetty.util.ajax.JSON;
/**
* <p>Provides details of all the mimetypes known to Apache Tika,
* similar to <em>--list-supported-types</em> with the Tika CLI.
*/
@Path("/mime-types")
public class TikaMimeTypes {
private static final Gson GSON = new GsonBuilder().disableHtmlEscaping().create();
private HTMLHelper html;
public TikaMimeTypes() {
@@ -96,9 +100,9 @@ public String getMimeTypesJSON() {
for (MediaTypeDetails type : getMediaTypes()) {
Map<String, Object> typeDets = new HashMap<String, Object>();
typeDets.put("alias", type.aliases);
typeDets.put("alias", copyToStringArray(type.aliases));
if (type.supertype != null) {
typeDets.put("supertype", type.supertype);
typeDets.put("supertype", type.supertype.toString());
}
if (type.parser != null) {
typeDets.put("parser", type.parser);
@@ -107,7 +111,15 @@ public String getMimeTypesJSON() {
details.put(type.type.toString(), typeDets);
}
return JSON.toString(details);
return GSON.toJson(details);
}
private static String[] copyToStringArray(MediaType[] aliases) {
String[] strings = new String[aliases.length];
for (int i = 0; i < aliases.length; i++) {
strings[i] = aliases[i].toString();
}
return strings;
}
@GET
@@ -28,13 +28,14 @@
import java.util.Map;
import java.util.Set;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
import org.apache.tika.server.HTMLHelper;
import org.eclipse.jetty.util.ajax.JSON;
/**
* <p>Provides details of all the {@link Parser}s registered with
@@ -44,6 +45,7 @@
@Path("/parsers")
public class TikaParsers {
private static final ParseContext EMPTY_PC = new ParseContext();
private static final Gson GSON = new GsonBuilder().disableHtmlEscaping().create();
private HTMLHelper html;
public TikaParsers() {
@@ -127,7 +129,8 @@ public String getParsersJSON() {
protected String getParsersJSON(boolean withMimeTypes) {
Map<String, Object> details = new HashMap<String, Object>();
parserAsMap(new ParserDetails(TikaResource.getConfig().getParser()), withMimeTypes, details);
return JSON.toString(details);
return GSON.toJson(details);
}
private void parserAsMap(ParserDetails p, boolean withMimeTypes, Map<String, Object> details) {
@@ -17,10 +17,36 @@
package org.apache.tika.server.resource;
import static java.nio.charset.StandardCharsets.UTF_8;
import org.apache.commons.lang.StringUtils;
import org.apache.cxf.attachment.ContentDisposition;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.DigestingParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.parser.html.BoilerpipeContentHandler;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.apache.tika.sax.RichTextContentHandler;
import org.apache.tika.server.InputStreamFactory;
import org.apache.tika.server.TikaServerParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import javax.mail.internet.ContentDisposition;
import javax.mail.internet.ParseException;
import javax.ws.rs.Consumes;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
@@ -51,34 +77,7 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.DigestingParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.parser.html.BoilerpipeContentHandler;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.apache.tika.sax.RichTextContentHandler;
import org.apache.tika.server.InputStreamFactory;
import org.apache.tika.server.TikaServerParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import static java.nio.charset.StandardCharsets.UTF_8;
@Path("/tika")
public class TikaResource {
@@ -133,19 +132,14 @@ public static String detectFilename(MultivaluedMap<String, String> httpHeaders)
String disposition = httpHeaders.getFirst("Content-Disposition");
if (disposition != null) {
try {
ContentDisposition c = new ContentDisposition(disposition);
ContentDisposition c = new ContentDisposition(disposition);
// only support "attachment" dispositions
if ("attachment".equals(c.getDisposition())) {
String fn = c.getParameter("filename");
if (fn != null) {
return fn;
}
// only support "attachment" dispositions
if ("attachment".equals(c.getType())) {
String fn = c.getParameter("filename");
if (fn != null) {
return fn;
}
} catch (ParseException e) {
// not a valid content-disposition field
LOG.warn("Parse exception {} determining content disposition", e.getMessage(), e);
}
}
@@ -23,20 +23,27 @@
import javax.ws.rs.core.Response;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.parser.microsoft.POIFSContainerDetector;
import org.apache.tika.parser.pkg.ZipContainerDetector;
import org.apache.tika.server.resource.TikaDetectors;
import org.eclipse.jetty.util.ajax.JSON;
import org.gagravarr.tika.OggDetector;
import org.junit.Test;
public class TikaDetectorsTest extends CXFTestBase {
private static final Gson GSON = new GsonBuilder().create();
private static final String DETECTORS_PATH = "/detectors";
@Override
@@ -100,7 +107,7 @@ public void testGetJSON() throws Exception {
.get();
String jsonStr = getStringFromInputStream((InputStream) response.getEntity());
Map<String, Object> json = (Map<String, Object>) JSON.parse(jsonStr);
Map<String, Object> json = (Map<String, Object>) GSON.fromJson(jsonStr, Map.class);
// Should have a nested structure
assertTrue(json.containsKey("name"));
@@ -110,8 +117,8 @@ public void testGetJSON() throws Exception {
assertEquals(Boolean.TRUE, json.get("composite"));
// At least 4 child detectors, none of them composite
Object[] children = (Object[]) json.get("children");
assertTrue(children.length >= 4);
List<Object> children = (List) json.get("children");
assertTrue(children.size() >= 4);
boolean hasOgg = false, hasPOIFS = false, hasZIP = false, hasMime = false;
for (Object o : children) {
Map<String, Object> d = (Map<String, Object>) o;
@@ -139,4 +146,5 @@ public void testGetJSON() throws Exception {
assertTrue(hasZIP);
assertTrue(hasMime);
}
}
@@ -23,16 +23,21 @@
import javax.ws.rs.core.Response;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.server.resource.TikaMimeTypes;
import org.eclipse.jetty.util.ajax.JSON;
import org.junit.Test;
public class TikaMimeTypesTest extends CXFTestBase {
private static final Gson GSON = new GsonBuilder().create();
private static final String MIMETYPES_PATH = "/mime-types";
@Override
@@ -97,7 +102,8 @@ public void testGetJSON() throws Exception {
.get();
String jsonStr = getStringFromInputStream((InputStream) response.getEntity());
Map<String, Map<String, Object>> json = (Map<String, Map<String, Object>>) JSON.parse(jsonStr);
Map<String, Map<String, Object>> json = (Map<String, Map<String, Object>>)
GSON.fromJson(jsonStr, Map.class);
assertEquals(true, json.containsKey("text/plain"));
assertEquals(true, json.containsKey("application/xml"));
@@ -106,10 +112,11 @@ public void testGetJSON() throws Exception {
Map<String, Object> bmp = json.get("image/bmp");
assertEquals(true, bmp.containsKey("alias"));
Object[] aliases = (Object[]) bmp.get("alias");
assertEquals(2, aliases.length);
assertEquals("image/x-bmp", aliases[0]);
assertEquals("image/x-ms-bmp", aliases[1]);
List<Object> aliases = (List) bmp.get("alias");
assertEquals(2, aliases.size());
assertEquals("image/x-bmp", aliases.get(0));
assertEquals("image/x-ms-bmp", aliases.get(1));
String whichParser = bmp.get("parser").toString();
assertTrue("Which parser", whichParser.equals("org.apache.tika.parser.ocr.TesseractOCRParser") ||
Oops, something went wrong.

0 comments on commit de2057b

Please sign in to comment.