Permalink
Browse files

NUTCH-1383 IndexingFiltersChecker to show error message instead of nu…

…ll pointer exception

git-svn-id: https://svn.apache.org/repos/asf/nutch/trunk@1397308 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
sebastian-nagel committed Oct 11, 2012
1 parent 8edb66f commit 1cd24babb126d4a670b1ebc724d8c04a38746094
Showing with 11 additions and 4 deletions.
  1. +2 −0 CHANGES.txt
  2. +9 −4 src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
View
@@ -2,6 +2,8 @@ Nutch Change Log
(trunk) Current Development:
+* NUTCH-1383 IndexingFiltersChecker to show error message instead of null pointer exception (snagel)
+
* NUTCH-1476 SegmentReader getStats should set parsed = -1 if no parsing took place (snagel)
* NUTCH-1252 SegmentReader -get shows wrong data (snagel)
@@ -84,9 +84,6 @@ public int run(String[] args) throws Exception {
Content content = protocol.getProtocolOutput(new Text(url), datum)
.getContent();
- // store the guessed content type in the crawldatum
- if (content.getContentType() != null) datum.getMetaData().put(new Text(Metadata.CONTENT_TYPE), new Text(content.getContentType()));
-
if (content == null) {
System.out.println("No content for " + url);
return 0;
@@ -98,6 +95,9 @@ public int run(String[] args) throws Exception {
return -1;
}
+ // store the guessed content type in the crawldatum
+ datum.getMetaData().put(new Text(Metadata.CONTENT_TYPE), new Text(contentType));
+
if (LOG.isInfoEnabled()) {
LOG.info("parsing: " + url);
LOG.info("contentType: " + contentType);
@@ -111,11 +111,16 @@ public int run(String[] args) throws Exception {
Inlinks inlinks = null;
Parse parse = parseResult.get(urlText);
try {
- indexers.filter(doc, parse, urlText, datum, inlinks);
+ doc = indexers.filter(doc, parse, urlText, datum, inlinks);
} catch (IndexingException e) {
e.printStackTrace();
}
+ if (doc == null) {
+ System.out.println("Document discarded by indexing filter");
+ return 0;
+ }
+
for (String fname : doc.getFieldNames()) {
List<Object> values = doc.getField(fname).getValues();
if (values != null) {

0 comments on commit 1cd24ba

Please sign in to comment.