-
Notifications
You must be signed in to change notification settings - Fork 762
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7 from travisfw/IpAddressSetDecideRule
Ip address set decide rule
- Loading branch information
Showing
1 changed file
with
90 additions
and
0 deletions.
There are no files selected for viewing
90 changes: 90 additions & 0 deletions
90
modules/src/main/java/org/archive/modules/deciderules/IpAddressSetDecideRule.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
package org.archive.modules.deciderules; | ||
|
||
import static org.archive.modules.CoreAttributeConstants.A_DNS_SERVER_IP_LABEL; | ||
|
||
import java.net.InetAddress; | ||
import java.util.Collections; | ||
import java.util.Set; | ||
|
||
import org.archive.modules.CrawlURI; | ||
import org.archive.modules.net.CrawlHost; | ||
import org.archive.modules.net.ServerCache; | ||
import org.springframework.beans.factory.annotation.Autowired; | ||
|
||
/** | ||
* <pre> | ||
* <bean class="org.archive.modules.deciderules.IpAddressSetDecideRule"> | ||
* <property name="ipAddresses"> | ||
* <set> | ||
* <value>127.0.0.1</value> | ||
* <value>69.89.27.209</value> | ||
* </set> | ||
* </property> | ||
* <property name='decision' value='REJECT' /> | ||
* </bean> | ||
* </pre> | ||
* | ||
* @contributor Travis Wellman <travis@archive.org> | ||
*/ | ||
|
||
public class IpAddressSetDecideRule extends PredicatedDecideRule { | ||
|
||
// private static final Logger LOGGER = Logger.getLogger(IpAddressSetDecideRule.class.getCanonicalName()); | ||
private static final long serialVersionUID = -3670434739183271441L; | ||
private Set<String> ipAddresses; | ||
|
||
/** | ||
* @return the addresses being matched | ||
*/ | ||
public Set<String> getIpAddresses() { | ||
return Collections.unmodifiableSet(ipAddresses); | ||
} | ||
|
||
/** | ||
* @param ipAddresses the addresses to match | ||
*/ | ||
public void setIpAddresses(Set<String> ipAddresses) { | ||
this.ipAddresses = ipAddresses; | ||
} | ||
|
||
@Override | ||
protected boolean evaluate(CrawlURI curi) { | ||
String hostAddress = getHostAddress(curi); | ||
return hostAddress != null && | ||
ipAddresses.contains(hostAddress.intern()); | ||
} | ||
|
||
transient protected ServerCache serverCache; | ||
public ServerCache getServerCache() { | ||
return this.serverCache; | ||
} | ||
@Autowired | ||
public void setServerCache(ServerCache serverCache) { | ||
this.serverCache = serverCache; | ||
} | ||
|
||
/** | ||
* from WriterPoolProcessor | ||
* | ||
* @param curi CrawlURI | ||
* @return String of IP address | ||
*/ | ||
protected String getHostAddress(CrawlURI curi) { | ||
// special handling for DNS URIs: want address of DNS server | ||
if (curi.getUURI().getScheme().toLowerCase().equals("dns")) { | ||
return (String)curi.getData().get(A_DNS_SERVER_IP_LABEL); | ||
} | ||
// otherwise, host referenced in URI | ||
// TODO:FIXME: have fetcher insert exact IP contacted into curi, | ||
// use that rather than inferred by CrawlHost lookup | ||
CrawlHost crlh = getServerCache().getHostFor(curi.getUURI()); | ||
if (crlh == null) { | ||
return null; | ||
} | ||
InetAddress inetadd = crlh.getIP(); | ||
if (inetadd == null) { | ||
return null; | ||
} | ||
return inetadd.getHostAddress(); | ||
} | ||
} |