From 5a6766587c8490e1c7b31eae54bce91b4411a3fa Mon Sep 17 00:00:00 2001 From: Sujen Shah Date: Sat, 16 May 2015 14:02:36 +0530 Subject: [PATCH 1/3] Creation of FetchNodes is off by default if NutchServer is not used --- .../apache/nutch/fetcher/FetcherThread.java | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/java/org/apache/nutch/fetcher/FetcherThread.java b/src/java/org/apache/nutch/fetcher/FetcherThread.java index 53a834b1a6..323677dfa1 100644 --- a/src/java/org/apache/nutch/fetcher/FetcherThread.java +++ b/src/java/org/apache/nutch/fetcher/FetcherThread.java @@ -57,6 +57,7 @@ import org.apache.nutch.protocol.ProtocolStatus; import org.apache.nutch.scoring.ScoringFilterException; import org.apache.nutch.scoring.ScoringFilters; +import org.apache.nutch.service.NutchServer; import org.apache.nutch.util.StringUtil; import org.apache.nutch.util.URLUtil; import org.slf4j.Logger; @@ -192,7 +193,8 @@ public void run() { while (true) { // creating FetchNode for storing in FetchNodeDb - this.fetchNode = new FetchNode(); + if(NutchServer.getInstance().isRunning()) + this.fetchNode = new FetchNode(); // check whether must be stopped if (isHalted()) { @@ -290,9 +292,11 @@ public void run() { String urlString = fit.url.toString(); //used for FetchNode - fetchNode.setStatus(status.getCode()); - fetchNode.setFetchTime(System.currentTimeMillis()); - fetchNode.setUrl(fit.url); + if(fetchNode!=null){ + fetchNode.setStatus(status.getCode()); + fetchNode.setFetchTime(System.currentTimeMillis()); + fetchNode.setUrl(fit.url); + } reporter.incrCounter("FetcherStatus", status.getName(), 1); @@ -620,11 +624,12 @@ private ParseStatus output(Text key, CrawlDatum datum, Content content, fromHost = null; } - //used by fetchNode - fetchNode.setOutlinks(links); - fetchNode.setTitle(parseData.getTitle()); - FetchNodeDb.getInstance().put(fetchNode.getUrl().toString(), fetchNode); - + //used by fetchNode + if(fetchNode!=null){ + fetchNode.setOutlinks(links); + fetchNode.setTitle(parseData.getTitle()); + FetchNodeDb.getInstance().put(fetchNode.getUrl().toString(), fetchNode); + } int validCount = 0; // Process all outlinks, normalize, filter and deduplicate From dc44f2aaa097e251e963e6fbd55581016167b857 Mon Sep 17 00:00:00 2001 From: Sujen Shah Date: Fri, 29 May 2015 16:35:12 -0700 Subject: [PATCH 2/3] Added check for fetcher.parse --- src/java/org/apache/nutch/fetcher/FetcherThread.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/nutch/fetcher/FetcherThread.java b/src/java/org/apache/nutch/fetcher/FetcherThread.java index 323677dfa1..d519b14095 100644 --- a/src/java/org/apache/nutch/fetcher/FetcherThread.java +++ b/src/java/org/apache/nutch/fetcher/FetcherThread.java @@ -127,6 +127,7 @@ public class FetcherThread extends Thread { //Used by the REST service private FetchNode fetchNode; + private boolean reportToNutchServer; public FetcherThread(Configuration conf, AtomicInteger activeThreads, FetchItemQueues fetchQueues, QueueFeeder feeder, AtomicInteger spinWaiting, AtomicLong lastRequestStart, Reporter reporter, @@ -193,8 +194,11 @@ public void run() { while (true) { // creating FetchNode for storing in FetchNodeDb - if(NutchServer.getInstance().isRunning()) + //checking for the server to be running and fetcher.parse to be true + if(parsing && NutchServer.getInstance().isRunning()) this.fetchNode = new FetchNode(); + else + this.fetchNode = null; // check whether must be stopped if (isHalted()) { From 7c616078ee30f95357ce531ccf7eed4d42ee88b4 Mon Sep 17 00:00:00 2001 From: Sujen Shah Date: Fri, 29 May 2015 16:47:36 -0700 Subject: [PATCH 3/3] Put server running check out of the while loop --- src/java/org/apache/nutch/fetcher/FetcherThread.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/java/org/apache/nutch/fetcher/FetcherThread.java b/src/java/org/apache/nutch/fetcher/FetcherThread.java index d519b14095..39f33ec697 100644 --- a/src/java/org/apache/nutch/fetcher/FetcherThread.java +++ b/src/java/org/apache/nutch/fetcher/FetcherThread.java @@ -191,11 +191,13 @@ public void run() { FetchItem fit = null; try { - + //checking for the server to be running and fetcher.parse to be true + if(parsing && NutchServer.getInstance().isRunning()) + reportToNutchServer = true; + while (true) { - // creating FetchNode for storing in FetchNodeDb - //checking for the server to be running and fetcher.parse to be true - if(parsing && NutchServer.getInstance().isRunning()) + // creating FetchNode for storing in FetchNodeDb + if(reportToNutchServer) this.fetchNode = new FetchNode(); else this.fetchNode = null;