Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
NUTCH-2272 Index checker server to optionally keep client connection …
…open
  • Loading branch information
Markus Jelsma committed Jun 3, 2016
1 parent 7956dae commit beb48a8
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGES.txt
Expand Up @@ -37,6 +37,7 @@ Bug

Improvement

[NUTCH-2272] - Index checker server to optionally keep client connection open
[NUTCH-1233] - Rely on Tika for outlink extraction
[NUTCH-1712] - Use MultipleInputs in Injector to make it a single mapreduce job
[NUTCH-2172] - index-more: document format of contenttype-mapping.txt
Expand Down
35 changes: 24 additions & 11 deletions src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
Expand Up @@ -69,6 +69,7 @@ public class IndexingFiltersChecker extends Configured implements Tool {
protected URLNormalizers normalizers = null;
protected boolean dumpText = false;
protected boolean followRedirects = false;
protected boolean keepClientCnxOpen = false;
// used to simulate the metadata propagated from injection
protected HashMap<String, String> metadata = new HashMap<String, String>();
protected int tcpPort = -1;
Expand All @@ -82,7 +83,7 @@ public IndexingFiltersChecker() {

public int run(String[] args) throws Exception {
String url = null;
String usage = "Usage: IndexingFiltersChecker [-normalize] [-followRedirects] [-dumpText] [-md key=value] [-listen <port>] <url>";
String usage = "Usage: IndexingFiltersChecker [-normalize] [-followRedirects] [-dumpText] [-md key=value] [-listen <port>] [-keepClientCnxOpen]";

if (args.length == 0) {
System.err.println(usage);
Expand All @@ -96,6 +97,8 @@ public int run(String[] args) throws Exception {
tcpPort = Integer.parseInt(args[++i]);
} else if (args[i].equals("-followRedirects")) {
followRedirects = true;
} else if (args[i].equals("-keepClientCnxOpen")) {
keepClientCnxOpen = true;
} else if (args[i].equals("-dumpText")) {
dumpText = true;
} else if (args[i].equals("-md")) {
Expand Down Expand Up @@ -164,7 +167,23 @@ private class Worker implements Runnable {
LOG.info(client.toString());
}

public void run(){
public void run() {
if (keepClientCnxOpen) {
while (true) { // keep connection open until closes
readWrite();
}
} else {
readWrite();

try { // close ourselves
client.close();
} catch (Exception e){
LOG.error(e.toString());
}
}
}

protected void readWrite() {
String line;
BufferedReader in = null;
PrintWriter out = null;
Expand All @@ -185,14 +204,6 @@ public void run(){
}catch (Exception e) {
LOG.error("Read/Write failed: " + e);
}

try {
client.close();
} catch (Exception e){
LOG.error(e.toString());
}

return;
}
}

Expand Down Expand Up @@ -331,6 +342,8 @@ protected int fetch(String url, StringBuilder output) throws Exception {
}
}
}

output.append("\n"); // For readability if keepClientCnxOpen

if (getConf().getBoolean("doIndex", false) && doc != null) {
IndexWriters writers = new IndexWriters(getConf());
Expand All @@ -355,4 +368,4 @@ public static void main(String[] args) throws Exception {
new IndexingFiltersChecker(), args);
System.exit(res);
}
}
}

0 comments on commit beb48a8

Please sign in to comment.