Skip to content
Permalink
Browse files
Merge pull request #570 from apache/OAK-9758
OAK-9758 Oak run indexing silently broken without tika.jar
  • Loading branch information
thomasmueller committed May 17, 2022
2 parents 43b90a1 + 0021d83 commit 4f99576158aa9b190879291d79002e20aef28ee4
Showing 2 changed files with 16 additions and 26 deletions.
@@ -79,8 +79,6 @@ class TextExtractor implements Closeable {
private boolean initialized;
private BinaryStats stats;
private boolean closed;
private boolean linkageErrorFound;
private boolean throwableErrorFound;

public TextExtractor(TextWriter textWriter) {
this.textWriter = textWriter;
@@ -272,24 +270,21 @@ public InputStream get() {
// not being present. This is equivalent to disabling
// selected media types in configuration, so we can simply
// ignore these errors.
String format = "Failed to extract text from a binary property: {}."
+ " This often happens when some media types are disabled by configuration."
+ " The stack trace is included to flag some 'unintended' failures";
log.warn(format, linkageErrorFound ? path : new Object[]{path, e});
linkageErrorFound = true;
String format = "Failed to extract text from a binary property: {}. "
+ "This often happens when the media types is disabled by configuration.";
log.info(format, path);
log.debug(format, path, e);
parserErrorCount.incrementAndGet();
return ERROR_TEXT;
} catch (Throwable t) {
// Capture and report any other full text extraction problems.
// The special STOP exception is used for normal termination.
if (!handler.isWriteLimitReached(t)) {
parserErrorCount.incrementAndGet();
String format = "Failed to extract text from a binary property: {}"
+ " This is a fairly common case, and nothing to"
+ " worry about. The stack trace is included to"
+ " help improve the text extraction feature.";
parserError.info(format, throwableErrorFound ? path : new Object[]{path, t});
throwableErrorFound = true;
String format = "Failed to extract text from a binary property: {}. "
+ "This is quite common, and usually nothing to worry about.";
parserError.info(format, path);
parserError.debug(format, path, t);
return ERROR_TEXT;
} else {
parserError.debug("Extracted text size exceeded configured limit({})", maxExtractedLength);
@@ -74,8 +74,6 @@ public class FulltextBinaryTextExtractor {
private final boolean reindex;
private Parser parser;
private TikaConfigHolder tikaConfig;
private boolean linkageErrorFound;
private boolean throwableErrorFound;
/**
* The media types supported by the parser used.
*/
@@ -177,13 +175,12 @@ public Void call() throws Exception {
// not being present. This is equivalent to disabling
// selected media types in configuration, so we can simply
// ignore these errors.
String format = "[{}] Failed to extract text from a binary property: {}."
+ " This often happens when some media types are disabled by configuration."
+ " The stack trace is included to flag some 'unintended' failures";
String format = "[{}] Failed to extract text from a binary property: {}. "
+ "This often happens when the media types is disabled by configuration.";
String indexName = getIndexName();
log.warn(format, linkageErrorFound ? new Object[]{indexName, path} : new Object[]{indexName, path, e});
log.info(format, indexName, path);
log.debug(format, indexName, path, e);
extractedTextCache.put(v, ExtractedText.ERROR);
linkageErrorFound = true;
return TEXT_EXTRACTION_ERROR;
} catch (TimeoutException t) {
log.warn(
@@ -196,14 +193,12 @@ public Void call() throws Exception {
// Capture and report any other full text extraction problems.
// The special STOP exception is used for normal termination.
if (!handler.isWriteLimitReached(t)) {
String format = "[{}] Failed to extract text from a binary property: {}."
+ " This is a fairly common case, and nothing to"
+ " worry about. The stack trace is included to"
+ " help improve the text extraction feature.";
String format = "[{}] Failed to extract text from a binary property: {}. "
+ "This is quite common, and usually nothing to worry about.";
String indexName = getIndexName();
log.info(format, throwableErrorFound ? new Object[]{indexName, path} : new Object[]{indexName, path, t});
log.info(format, indexName, path);
log.debug(format, indexName, path, t);
extractedTextCache.put(v, ExtractedText.ERROR);
throwableErrorFound = true;
return TEXT_EXTRACTION_ERROR;
} else {
log.debug("Extracted text size exceeded configured limit({})", definition.getMaxExtractLength());

0 comments on commit 4f99576

Please sign in to comment.