Skip to content

Commit

Permalink
TIKA-2773 upgrade sqlite version
Browse files Browse the repository at this point in the history
  • Loading branch information
tballison committed Nov 8, 2018
1 parent 33d960c commit 0c49c85
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 9 deletions.
5 changes: 5 additions & 0 deletions CHANGES.txt
@@ -1,5 +1,10 @@
Release 1.20 - ???


* Upgrade sqlite "provided" dependency to 3.25.2 (TIKA-2773).

* Remove duplication of notes in PPT slides (TIKA-2735)

* Use -javaHome or $JAVA_HOME (if they exist) when
spawning child in tika-server's -spawnChild mode.

Expand Down
@@ -0,0 +1,31 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.exception;

/**
* This exception should be thrown when the parse absolutely, positively has to stop.
* This exception must not be caught and swallowed if an embedded parser throws it.
*/
public class CorruptedFileException extends TikaException {
public CorruptedFileException(String msg) {
super(msg);
}

public CorruptedFileException(String msg, Throwable cause) {
super(msg, cause);
}
}
Expand Up @@ -22,8 +22,10 @@
import java.io.InputStream;

import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.CorruptedFileException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.io.IOExceptionWithCause;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
Expand Down Expand Up @@ -106,6 +108,8 @@ public void parseEmbedded(
} catch (EncryptedDocumentException ede) {
// TODO: can we log a warning that we lack the password?
// For now, just skip the content
} catch (CorruptedFileException e) {
throw new IOExceptionWithCause(e);
} catch (TikaException e) {
// TODO: can we log a warning somehow?
// Could not parse the entry, just skip the content
Expand Down
Expand Up @@ -17,6 +17,7 @@
* limitations under the License.
*/

import org.apache.tika.exception.CorruptedFileException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.FilenameUtils;
import org.apache.tika.metadata.Metadata;
Expand Down Expand Up @@ -147,7 +148,8 @@ public RecursiveParserWrapper(Parser wrappedParser) {
* @param wrappedParser parser to wrap
* @param catchEmbeddedExceptions whether or not to catch+record embedded exceptions.
* If set to <code>false</code>, embedded exceptions will be thrown and
* the rest of the file will not be parsed
* the rest of the file will not be parsed. The following will not be ignored:
* {@link CorruptedFileException}, {@link RuntimeException}
*/
public RecursiveParserWrapper(Parser wrappedParser, boolean catchEmbeddedExceptions) {
super(wrappedParser);
Expand Down Expand Up @@ -378,6 +380,8 @@ public void parse(InputStream stream, ContentHandler ignore,
throw e;
}
}
} catch(CorruptedFileException e) {
throw e;
} catch (TikaException e) {
if (catchEmbeddedExceptions) {
ParserUtils.recordParserFailure(this, e, metadata);
Expand Down
2 changes: 1 addition & 1 deletion tika-parsers/pom.xml
Expand Up @@ -375,7 +375,7 @@
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.19.3</version>
<version>3.25.2</version>
<scope>provided</scope>
</dependency>

Expand Down
Expand Up @@ -25,7 +25,9 @@
import java.util.Set;

import org.apache.commons.io.IOExceptionWithCause;
import org.apache.tika.exception.CorruptedFileException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.metadata.Database;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
Expand Down Expand Up @@ -54,6 +56,7 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
connection = getConnection(stream, metadata, context);
XHTMLContentHandler xHandler = null;
List<String> tableNames = null;
EmbeddedDocumentUtil embeddedDocumentUtil = new EmbeddedDocumentUtil(context);
try {
tableNames = getTableNames(connection, metadata, context);
} catch (SQLException e) {
Expand All @@ -62,6 +65,11 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
} catch (SQLException sqlE) {
//swallow
}
if (e.getClass().toString().contains("SQLiteException") && e.getMessage() != null
&& (e.getMessage().contains("[SQLITE_ERROR]") || e.getMessage().contains("[SQLITE_CORRUPT]"))) {
throw new CorruptedFileException("Corrupt SQLITE", e);
}

throw new IOExceptionWithCause(e);
}
for (String tableName : tableNames) {
Expand All @@ -73,7 +81,7 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata,

try {
for (String tableName : tableNames) {
JDBCTableReader tableReader = getTableReader(connection, tableName, context);
JDBCTableReader tableReader = getTableReader(connection, tableName, embeddedDocumentUtil);
xHandler.startElement("table", "name", tableReader.getTableName());
xHandler.startElement("thead");
xHandler.startElement("tr");
Expand Down Expand Up @@ -180,8 +188,22 @@ abstract protected List<String> getTableNames(Connection connection, Metadata me
*
* @param connection
* @param tableName
* @return
* @return a reader
* @deprecated use {@link #getTableReader(Connection, String, EmbeddedDocumentUtil)}
*/
@Deprecated
abstract protected JDBCTableReader getTableReader(Connection connection, String tableName, ParseContext parseContext);

/**
* Given a connection and a table name, return the JDBCTableReader for this db.
*
* @param connection
* @param tableName
* @param embeddedDocumentUtil embedded doc util
* @return
*/
abstract protected JDBCTableReader getTableReader(Connection connection,
String tableName,
EmbeddedDocumentUtil embeddedDocumentUtil);

}
Expand Up @@ -62,10 +62,11 @@ class JDBCTableReader {
ResultSet results = null;
int rows = 0;
private final EmbeddedDocumentUtil embeddedDocumentUtil;
public JDBCTableReader(Connection connection, String tableName, ParseContext context) {
public JDBCTableReader(Connection connection, String tableName, EmbeddedDocumentUtil embeddedDocumentUtil) {
System.out.println("new table: "+tableName);
this.connection = connection;
this.tableName = tableName;
embeddedDocumentUtil = new EmbeddedDocumentUtil(context);
this.embeddedDocumentUtil = embeddedDocumentUtil;
}

public boolean nextRow(ContentHandler handler, ParseContext context) throws IOException, SAXException {
Expand Down
Expand Up @@ -30,6 +30,7 @@
import java.util.Set;

import org.apache.commons.io.IOExceptionWithCause;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
Expand Down Expand Up @@ -130,6 +131,11 @@ protected List<String> getTableNames(Connection connection, Metadata metadata,

@Override
public JDBCTableReader getTableReader(Connection connection, String tableName, ParseContext context) {
return new SQLite3TableReader(connection, tableName, context);
return new SQLite3TableReader(connection, tableName, new EmbeddedDocumentUtil(context));
}

@Override
protected JDBCTableReader getTableReader(Connection connection, String tableName, EmbeddedDocumentUtil embeddedDocumentUtil) {
return new SQLite3TableReader(connection, tableName, embeddedDocumentUtil);
}
}
Expand Up @@ -23,6 +23,7 @@
import java.sql.ResultSet;
import java.sql.SQLException;

import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.ContentHandler;
Expand All @@ -39,8 +40,8 @@
class SQLite3TableReader extends JDBCTableReader {


public SQLite3TableReader(Connection connection, String tableName, ParseContext context) {
super(connection, tableName, context);
public SQLite3TableReader(Connection connection, String tableName, EmbeddedDocumentUtil embeddedDocumentUtil) {
super(connection, tableName, embeddedDocumentUtil);
}


Expand Down

0 comments on commit 0c49c85

Please sign in to comment.