Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

revert NUTCH-1360

git-svn-id: https://svn.apache.org/repos/asf/nutch/trunk@1359760 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
commit 25afea3cc06a5ce4b16f97cb99f85b5053f2faff 1 parent c42ddd7
Lewis John McGibbney authored
View
2  CHANGES.txt
@@ -32,8 +32,6 @@ Nutch Change Log
* NUTCH-1364 Add a counter in Generator for malformed urls (lewismc)
-* NUTCH-1360 Suport the storing of IP address connected to when web crawling (lewismc)
-
* NUTCH-1262 Map `duplicating` content-types to a single type (markus)
* NUTCH-1385 More robust plug-in order properties in nutch-site.xml (Andy Xue via markus)
View
7 conf/nutch-default.xml
@@ -255,13 +255,6 @@
</description>
</property>
-<property>
- <name>http.store.ip.address</name>
- <value>false</value>
- <description>Enables us to capture the specific IP address of the
- host which we connect to to fetch a page.</description>
-</property>
-
<!-- FTP properties -->
<property>
View
2  src/java/org/apache/nutch/metadata/HttpHeaders.java
@@ -48,7 +48,5 @@
public final static String LAST_MODIFIED = "Last-Modified";
public final static String LOCATION = "Location";
-
- public final static String IP_ADDRESS = "_ip";
}
View
9 src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
@@ -80,9 +80,6 @@
/** The "Accept" request header value. */
protected String accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
- /** The "_ip" request header value. */
- protected boolean ip_header = false;
-
/** The default logger */
private final static Logger LOGGER = LoggerFactory.getLogger(HttpBase.class);
@@ -123,7 +120,6 @@ public void setConf(Configuration conf) {
.get("http.agent.description"), conf.get("http.agent.url"), conf.get("http.agent.email"));
this.acceptLanguage = conf.get("http.accept.language", acceptLanguage);
this.accept = conf.get("http.accept", accept);
- this.ip_header = conf.getBoolean("http.store.ip.address", false);
// backward-compatible default setting
this.useHttp11 = conf.getBoolean("http.useHttp11", false);
this.robots.setConf(conf);
@@ -251,10 +247,6 @@ public boolean getUseHttp11() {
return useHttp11;
}
- public boolean getIP_Header(){
- return ip_header;
- }
-
private static String getAgentString(String agentName,
String agentVersion,
String agentDesc,
@@ -309,7 +301,6 @@ protected void logConf() {
logger.info("http.agent = " + userAgent);
logger.info("http.accept.language = " + acceptLanguage);
logger.info("http.accept = " + accept);
- logger.info("http.store.ip.address = " + ip_header);
}
}
View
12 src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
@@ -93,9 +93,7 @@ public HttpResponse(HttpBase http, URL url, CrawlDatum datum)
int sockPort = http.useProxy() ? http.getProxyPort() : port;
InetSocketAddress sockAddr= new InetSocketAddress(sockHost, sockPort);
socket.connect(sockAddr, http.getTimeout());
-
- headers.set("_ip", socket.getInetAddress().getHostAddress());
-
+
// make request
OutputStream req = socket.getOutputStream();
@@ -112,12 +110,6 @@ public HttpResponse(HttpBase http, URL url, CrawlDatum datum)
reqStr.append(host);
reqStr.append(portString);
reqStr.append("\r\n");
-
- if(this.http.getConf().getBoolean("http.store.ip.address", true)) {
- reqStr.append("_ip: ");
- reqStr.append(http.getIP_Header());
- reqStr.append("\r\n");
- }
reqStr.append("Accept-Encoding: x-gzip, gzip, deflate\r\n");
@@ -440,5 +432,5 @@ private static int peek(PushbackInputStream in) throws IOException {
in.unread(value);
return value;
}
-
+
}
Please sign in to comment.
Something went wrong with that request. Please try again.