From 4557a3cc6c56b98420b2389a44b2b4fc3c133a5d Mon Sep 17 00:00:00 2001 From: Lewis John McGibbney Date: Tue, 19 Apr 2016 17:17:50 -0700 Subject: [PATCH 1/5] SOLR-8716 Upgrade to Apache Tika 1.12 --- lucene/ivy-versions.properties | 25 ++++++++++++++----------- solr/contrib/extraction/ivy.xml | 2 +- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties index 000a8ab84ac8..25029d31c1bf 100644 --- a/lucene/ivy-versions.properties +++ b/lucene/ivy-versions.properties @@ -18,11 +18,11 @@ com.codahale.metrics.version = 3.0.1 /com.codahale.metrics/metrics-healthchecks = ${com.codahale.metrics.version} /com.cybozu.labs/langdetect = 1.1-20120112 -/com.drewnoakes/metadata-extractor = 2.6.2 +/com.drewnoakes/metadata-extractor = 2.8.1 /com.facebook.presto/presto-parser = 0.122 -com.fasterxml.jackson.core.version = 2.5.4 +com.fasterxml.jackson.core.version = 2.7.1 /com.fasterxml.jackson.core/jackson-annotations = ${com.fasterxml.jackson.core.version} /com.fasterxml.jackson.core/jackson-core = ${com.fasterxml.jackson.core.version} /com.fasterxml.jackson.core/jackson-databind = ${com.fasterxml.jackson.core.version} @@ -38,7 +38,7 @@ com.google.inject.guice.version = 3.0 /com.google.protobuf/protobuf-java = 2.5.0 /com.googlecode.juniversalchardet/juniversalchardet = 1.0.3 -/com.googlecode.mp4parser/isoparser = 1.0.2 +/com.googlecode.mp4parser/isoparser = 1.1.7 /com.ibm.icu/icu4j = 56.1 /com.pff/java-libpst = 0.8.1 @@ -50,6 +50,7 @@ com.sun.jersey.version = 1.9 /com.sun.jersey/jersey-server = ${com.sun.jersey.version} /com.sun.mail/gimap = 1.5.1 +# Tika uses /javax.mail/mail/1.4.4 /com.sun.mail/javax.mail = 1.5.1 /com.sun.xml.bind/jaxb-impl = 2.2.3-1 @@ -58,7 +59,7 @@ com.sun.jersey.version = 1.9 /com.thoughtworks.paranamer/paranamer = 2.3 /com.typesafe/config = 1.0.2 /commons-beanutils/commons-beanutils = 1.8.3 -/commons-cli/commons-cli = 1.2 +/commons-cli/commons-cli = 1.3.1 /commons-codec/commons-codec = 1.10 /commons-collections/commons-collections = 3.2.2 /commons-configuration/commons-configuration = 1.6 @@ -92,7 +93,7 @@ com.sun.jersey.version = 1.9 /org.apache.ant/ant = 1.8.2 /org.apache.avro/avro = 1.7.5 -/org.apache.commons/commons-compress = 1.8.1 +/org.apache.commons/commons-compress = 1.11 /org.apache.commons/commons-exec = 1.3 /org.apache.commons/commons-math3 = 3.4.1 @@ -173,18 +174,19 @@ org.apache.james.apache.mime4j.version = 0.7.2 /org.apache.mina/mina-core = 2.0.0-M5 /org.apache.mrunit/mrunit = 1.0.0 -org.apache.pdfbox.version = 1.8.8 +#jempbox needs to be upgraded in sync with pdfbox +org.apache.pdfbox.version = 2.0.0 /org.apache.pdfbox/fontbox = ${org.apache.pdfbox.version} -/org.apache.pdfbox/jempbox = ${org.apache.pdfbox.version} +/org.apache.pdfbox/jempbox = 1.8.11 /org.apache.pdfbox/pdfbox = ${org.apache.pdfbox.version} -org.apache.poi.version = 3.11 +org.apache.poi.version = 3.15-beta1 /org.apache.poi/poi = ${org.apache.poi.version} /org.apache.poi/poi-ooxml = ${org.apache.poi.version} /org.apache.poi/poi-ooxml-schemas = ${org.apache.poi.version} /org.apache.poi/poi-scratchpad = ${org.apache.poi.version} -org.apache.tika.version = 1.7 +org.apache.tika.version = 1.12 /org.apache.tika/tika-core = ${org.apache.tika.version} /org.apache.tika/tika-java7 = ${org.apache.tika.version} /org.apache.tika/tika-parsers = ${org.apache.tika.version} @@ -246,7 +248,7 @@ org.eclipse.jetty.version = 9.3.8.v20160314 /org.fusesource.leveldbjni/leveldbjni = 1.8 -org.gagravarr.vorbis.java.version = 0.6 +org.gagravarr.vorbis.java.version = 0.8 /org.gagravarr/vorbis-java-core = ${org.gagravarr.vorbis.java.version} /org.gagravarr/vorbis-java-tika = ${org.gagravarr.vorbis.java.version} @@ -269,6 +271,7 @@ org.kitesdk.kite-morphlines.version = 1.1.0 /org.locationtech.spatial4j/spatial4j = 0.6 +#Tika has /org.mockito/mockito-core = 1.7 /org.mockito/mockito-core = 1.9.5 org.mortbay.jetty.version = 6.1.26 @@ -296,6 +299,6 @@ org.slf4j.version = 1.7.7 /org.tukaani/xz = 1.5 /org.xerial.snappy/snappy-java = 1.0.5 -/rome/rome = 1.0 +/com.rometools/rome = 1.5.1 /xerces/xercesImpl = 2.9.1 diff --git a/solr/contrib/extraction/ivy.xml b/solr/contrib/extraction/ivy.xml index 8fafb0a5b9de..7f91428ce0a2 100644 --- a/solr/contrib/extraction/ivy.xml +++ b/solr/contrib/extraction/ivy.xml @@ -52,7 +52,7 @@ - + From 006fa6f73ae2211fab0e20acafacca7fb3a4d304 Mon Sep 17 00:00:00 2001 From: Lewis John McGibbney Date: Tue, 19 Apr 2016 17:20:21 -0700 Subject: [PATCH 2/5] SOLR-8716 Upgrade to Apache Tika 1.12 --- solr/contrib/extraction/ivy.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/contrib/extraction/ivy.xml b/solr/contrib/extraction/ivy.xml index 7f91428ce0a2..de1ecade7aa3 100644 --- a/solr/contrib/extraction/ivy.xml +++ b/solr/contrib/extraction/ivy.xml @@ -28,7 +28,7 @@ - + From 09247cb01dce9563905e9cd84ae5d9c64181bc5b Mon Sep 17 00:00:00 2001 From: Lewis John McGibbney Date: Wed, 20 Apr 2016 15:14:07 -0700 Subject: [PATCH 3/5] SOLR-8716 Upgrade to Apache Tika 1.12 --- lucene/ivy-versions.properties | 20 +++++++++++++++++++- solr/contrib/extraction/ivy.xml | 10 ++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties index 25029d31c1bf..a491ec414b1f 100644 --- a/lucene/ivy-versions.properties +++ b/lucene/ivy-versions.properties @@ -30,6 +30,8 @@ com.fasterxml.jackson.core.version = 2.7.1 /com.github.ben-manes.caffeine/caffeine = 1.0.1 +/com.github.junrar/junrar = 0.7 + /com.google.guava/guava = 14.0.1 com.google.inject.guice.version = 3.0 @@ -39,8 +41,13 @@ com.google.inject.guice.version = 3.0 /com.google.protobuf/protobuf-java = 2.5.0 /com.googlecode.juniversalchardet/juniversalchardet = 1.0.3 /com.googlecode.mp4parser/isoparser = 1.1.7 + +/com.healthmarketscience.jackcess/jackcess = 2.1.3 +/com.healthmarketscience.jackcess/jackcess-encrypt = 2.1.1 + /com.ibm.icu/icu4j = 56.1 /com.pff/java-libpst = 0.8.1 +/com.rometools/rome = 1.5.1 com.sun.jersey.version = 1.9 /com.sun.jersey.contribs/jersey-guice = ${com.sun.jersey.version} @@ -101,6 +108,8 @@ org.apache.curator.version = 2.8.0 /org.apache.curator/curator-client = ${org.apache.curator.version} /org.apache.curator/curator-framework = ${org.apache.curator.version} +/org.apache.cxf/cxf-rt-rs-client = 3.0.3 + /org.apache.derby/derby = 10.9.1.0 /org.apache.directory.api/api-all = 1.0.0-M20 @@ -174,6 +183,8 @@ org.apache.james.apache.mime4j.version = 0.7.2 /org.apache.mina/mina-core = 2.0.0-M5 /org.apache.mrunit/mrunit = 1.0.0 +/org.apache.opennlp/opennlp-tools = 1.5.3 + #jempbox needs to be upgraded in sync with pdfbox org.apache.pdfbox.version = 2.0.0 /org.apache.pdfbox/fontbox = ${org.apache.pdfbox.version} @@ -186,6 +197,11 @@ org.apache.poi.version = 3.15-beta1 /org.apache.poi/poi-ooxml-schemas = ${org.apache.poi.version} /org.apache.poi/poi-scratchpad = ${org.apache.poi.version} +org.apache.sis.version = 0.6 +/org.apache.sis.core/sis-utility = ${org.apache.sis.version} +/org.apache.sis.storage/sis-netcdf = ${org.apache.sis.version} +/org.apache.sis.core/sis-metadata = ${org.apache.sis.version} + org.apache.tika.version = 1.12 /org.apache.tika/tika-core = ${org.apache.tika.version} /org.apache.tika/tika-java7 = ${org.apache.tika.version} @@ -280,6 +296,7 @@ org.mortbay.jetty.version = 6.1.26 /org.noggit/noggit = 0.6 /org.objenesis/objenesis = 1.2 +/org.opengis/geoapi = 3.0.0 org.ow2.asm.version = 5.0.4 /org.ow2.asm/asm = ${org.ow2.asm.version} @@ -298,7 +315,8 @@ org.slf4j.version = 1.7.7 /org.slf4j/slf4j-log4j12 = ${org.slf4j.version} /org.tukaani/xz = 1.5 +/org.xerial/sqlite-jdbc = 3.8.11.2 /org.xerial.snappy/snappy-java = 1.0.5 -/com.rometools/rome = 1.5.1 + /xerces/xercesImpl = 2.9.1 diff --git a/solr/contrib/extraction/ivy.xml b/solr/contrib/extraction/ivy.xml index de1ecade7aa3..6c597777865e 100644 --- a/solr/contrib/extraction/ivy.xml +++ b/solr/contrib/extraction/ivy.xml @@ -59,6 +59,16 @@ + + + + + + + + + + From 77428cd3e1a38a8a3bc3920d3a1e80116440e3c6 Mon Sep 17 00:00:00 2001 From: Lewis John McGibbney Date: Fri, 22 Apr 2016 10:58:21 -0700 Subject: [PATCH 4/5] SOLR-8716 Upgrade to Apache Tika 1.12 --- lucene/ivy-versions.properties | 8 -------- solr/NOTICE.txt | 29 +++++++++++++++++++++++++++-- solr/contrib/extraction/ivy.xml | 5 ----- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties index a491ec414b1f..638ff698f304 100644 --- a/lucene/ivy-versions.properties +++ b/lucene/ivy-versions.properties @@ -30,8 +30,6 @@ com.fasterxml.jackson.core.version = 2.7.1 /com.github.ben-manes.caffeine/caffeine = 1.0.1 -/com.github.junrar/junrar = 0.7 - /com.google.guava/guava = 14.0.1 com.google.inject.guice.version = 3.0 @@ -197,11 +195,6 @@ org.apache.poi.version = 3.15-beta1 /org.apache.poi/poi-ooxml-schemas = ${org.apache.poi.version} /org.apache.poi/poi-scratchpad = ${org.apache.poi.version} -org.apache.sis.version = 0.6 -/org.apache.sis.core/sis-utility = ${org.apache.sis.version} -/org.apache.sis.storage/sis-netcdf = ${org.apache.sis.version} -/org.apache.sis.core/sis-metadata = ${org.apache.sis.version} - org.apache.tika.version = 1.12 /org.apache.tika/tika-core = ${org.apache.tika.version} /org.apache.tika/tika-java7 = ${org.apache.tika.version} @@ -296,7 +289,6 @@ org.mortbay.jetty.version = 6.1.26 /org.noggit/noggit = 0.6 /org.objenesis/objenesis = 1.2 -/org.opengis/geoapi = 3.0.0 org.ow2.asm.version = 5.0.4 /org.ow2.asm/asm = ${org.ow2.asm.version} diff --git a/solr/NOTICE.txt b/solr/NOTICE.txt index 25e2aa377c8a..9939335cbba2 100644 --- a/solr/NOTICE.txt +++ b/solr/NOTICE.txt @@ -380,8 +380,6 @@ Copyright (c) 1995-2005 International Business Machines Corporation and others Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved. -Copyright 2004 Sun Microsystems, Inc. (Rome JAR) - Copyright 2002-2008 by John Cowan (TagSoup -- http://ccil.org/~cowan/XML/tagsoup/) Copyright (C) 1994-2007 by the Xiph.org Foundation, http://www.xiph.org/ (OggVorbis) @@ -396,6 +394,33 @@ https://github.com/rjohnsondev/java-libpst JMatIO is a JAVA library to read/write/manipulate with Matlab binary MAT-files. http://www.sourceforge.net/projects/jmatio +metadata-extractor is a straightforward Java library for reading metadata +from image files. +https://github.com/drewnoakes/metadata-extractor + +Java MP4 Parser; A Java API to read, write and create MP4 container +https://github.com/sannies/mp4parser + +Jackcess; is a pure Java library for reading from and writing to MS Access +databases +http://jackcess.sourceforge.net/ + +Jackcess Encrypt; an extension library for the Jackcess project which +implements support for some forms of Microsoft Access and Microsoft +Money encryption +http://jackcessencrypt.sourceforge.net/ + +ROME; is a Java framework for RSS and Atom feeds +(https://github.com/rometools/rome) + +VorbisJava; Ogg and Vorbis Tools for Java +Copyright 2012 Nick Burch +https://github.com/Gagravarr/VorbisJava + +SQLite JSDC Driver; is a library for accessing and creating SQLite +database files in Java +https://bitbucket.org/xerial/sqlite-jdbc + ========================================================================= == Language Detection Notices == ========================================================================= diff --git a/solr/contrib/extraction/ivy.xml b/solr/contrib/extraction/ivy.xml index 6c597777865e..b03ea40dd893 100644 --- a/solr/contrib/extraction/ivy.xml +++ b/solr/contrib/extraction/ivy.xml @@ -59,15 +59,10 @@ - - - - - From ae7123eadfa86d7be1288c21923e2bce5eb58b54 Mon Sep 17 00:00:00 2001 From: Lewis John McGibbney Date: Fri, 22 Apr 2016 14:12:49 -0700 Subject: [PATCH 5/5] SOLR-8716 Upgrade to Apache Tika 1.12 --- solr/NOTICE.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/NOTICE.txt b/solr/NOTICE.txt index 9939335cbba2..0236293aa5f4 100644 --- a/solr/NOTICE.txt +++ b/solr/NOTICE.txt @@ -417,7 +417,7 @@ VorbisJava; Ogg and Vorbis Tools for Java Copyright 2012 Nick Burch https://github.com/Gagravarr/VorbisJava -SQLite JSDC Driver; is a library for accessing and creating SQLite +SQLite JDBC Driver; is a library for accessing and creating SQLite database files in Java https://bitbucket.org/xerial/sqlite-jdbc