From b0ce4a157dbd0bfd8ea368f3fa230a90c7117ae2 Mon Sep 17 00:00:00 2001
From: Asitang Mishra <asitang@gmail.com>
Date: Wed, 17 Jun 2015 09:11:42 -0700
Subject: [PATCH 1/5] patch 1.0 for NUTCH-2038

---
 .classpath                                    | 855 ++++++++++++++++++
 .project                                      |  17 +
 build.xml                                     |   3 +
 conf/nutch-default.xml                        |  22 +
 ivy/ivy.xml                                   |   8 +-
 src/java/org/apache/nutch/net/URLFilters.java |  58 +-
 .../nutch/parse/ModelURLFilterAbstract.java   |  12 +
 .../org/apache/nutch/parse/ParseSegment.java  |  46 +-
 src/plugin/build.xml                          |   2 +
 src/plugin/urlfilter-model/build.xml          |  22 +
 src/plugin/urlfilter-model/ivy.xml            |  41 +
 src/plugin/urlfilter-model/plugin.xml         |  41 +
 .../nutch/urlfilter/model/ModelURLFilter.java | 158 ++++
 .../nutch/urlfilter/model/NBClassifier.java   | 234 +++++
 .../nutch/urlfilter/model/package-info.java   |  25 +
 15 files changed, 1524 insertions(+), 20 deletions(-)
 create mode 100644 .classpath
 create mode 100644 .project
 create mode 100644 src/java/org/apache/nutch/parse/ModelURLFilterAbstract.java
 create mode 100644 src/plugin/urlfilter-model/build.xml
 create mode 100644 src/plugin/urlfilter-model/ivy.xml
 create mode 100644 src/plugin/urlfilter-model/plugin.xml
 create mode 100644 src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/ModelURLFilter.java
 create mode 100644 src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/NBClassifier.java
 create mode 100644 src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/package-info.java
diff --git a/.classpath b/.classpath
new file mode 100644
index 0000000000..51cf515586
--- /dev/null
+++ b/.classpath
@@ -0,0 +1,855 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" path="src/java"/>
+	<classpathentry kind="src" path="src/plugin/creativecommons/src/java"/>
+	<classpathentry kind="src" path="src/plugin/creativecommons/src/test"/>
+	<classpathentry kind="src" path="src/plugin/feed/src/java"/>
+	<classpathentry kind="src" path="src/plugin/feed/src/test"/>
+	<classpathentry kind="src" path="src/plugin/headings/src/java"/>
+	<classpathentry kind="src" path="src/plugin/index-anchor/src/java"/>
+	<classpathentry kind="src" path="src/plugin/index-anchor/src/test"/>
+	<classpathentry kind="src" path="src/plugin/index-basic/src/java"/>
+	<classpathentry kind="src" path="src/plugin/index-basic/src/test"/>
+	<classpathentry kind="src" path="src/plugin/index-geoip/src/java"/>
+	<classpathentry kind="src" path="src/plugin/index-metadata/src/java"/>
+	<classpathentry kind="src" path="src/plugin/index-more/src/java"/>
+	<classpathentry kind="src" path="src/plugin/index-more/src/test"/>
+	<classpathentry kind="src" path="src/plugin/index-static/src/java"/>
+	<classpathentry kind="src" path="src/plugin/index-static/src/test"/>
+	<classpathentry kind="src" path="src/plugin/indexer-dummy/src/java"/>
+	<classpathentry kind="src" path="src/plugin/indexer-elastic/src/java"/>
+	<classpathentry kind="src" path="src/plugin/indexer-solr/src/java"/>
+	<classpathentry kind="src" path="src/plugin/language-identifier/src/java"/>
+	<classpathentry kind="src" path="src/plugin/language-identifier/src/test"/>
+	<classpathentry kind="src" path="src/plugin/lib-http/src/java"/>
+	<classpathentry kind="src" path="src/plugin/lib-http/src/test"/>
+	<classpathentry kind="src" path="src/plugin/lib-regex-filter/src/java"/>
+	<classpathentry kind="src" path="src/plugin/lib-regex-filter/src/test"/>
+	<classpathentry kind="src" path="src/plugin/lib-selenium/src/java"/>
+	<classpathentry kind="src" path="src/plugin/microformats-reltag/src/java"/>
+	<classpathentry kind="src" path="src/plugin/mimetype-filter/src/java"/>
+	<classpathentry kind="src" path="src/plugin/mimetype-filter/src/test"/>
+	<classpathentry kind="src" path="src/plugin/parse-ext/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-ext/src/test"/>
+	<classpathentry kind="src" path="src/plugin/parse-html/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-html/src/test"/>
+	<classpathentry kind="src" path="src/plugin/parse-js/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-metatags/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-metatags/src/test"/>
+	<classpathentry kind="src" path="src/plugin/parse-swf/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-swf/src/test"/>
+	<classpathentry kind="src" path="src/plugin/parse-tika/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-tika/src/test"/>
+	<classpathentry kind="src" path="src/plugin/parse-zip/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-zip/src/test"/>
+	<classpathentry kind="src" path="src/plugin/protocol-file/src/java"/>
+	<classpathentry kind="src" path="src/plugin/protocol-file/src/test"/>
+	<classpathentry kind="src" path="src/plugin/protocol-ftp/src/java"/>
+	<classpathentry kind="src" path="src/plugin/protocol-http/src/java"/>
+	<classpathentry kind="src" path="src/plugin/protocol-http/src/test"/>
+	<classpathentry kind="src" path="src/plugin/protocol-httpclient/src/java"/>
+	<classpathentry kind="src" path="src/plugin/protocol-httpclient/src/test"/>
+	<classpathentry kind="src" path="src/plugin/protocol-selenium/src/java"/>
+	<classpathentry kind="src" path="src/plugin/scoring-depth/src/java"/>
+	<classpathentry kind="src" path="src/plugin/scoring-link/src/java"/>
+	<classpathentry kind="src" path="src/plugin/scoring-opic/src/java"/>
+	<classpathentry kind="src" path="src/plugin/subcollection/src/java"/>
+	<classpathentry kind="src" path="src/plugin/subcollection/src/test"/>
+	<classpathentry kind="src" path="src/plugin/tld/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-automaton/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-automaton/src/test"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-domain/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-domain/src/test"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-domainblacklist/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-domainblacklist/src/test"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-model/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-prefix/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-prefix/src/test"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-regex/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-regex/src/test"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-suffix/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-suffix/src/test"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-validator/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-validator/src/test"/>
+	<classpathentry kind="src" path="src/plugin/urlmeta/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-ajax/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-ajax/src/test"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-basic/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-basic/src/test"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-host/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-host/src/test"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-pass/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-pass/src/test"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-querystring/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-querystring/src/test"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-regex/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-regex/src/test"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-slash/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-slash/src/test"/>
+	<classpathentry kind="src" path="src/test"/>
+	<classpathentry kind="lib" path="build/apache-nutch-1.11-SNAPSHOT.jar"/>
+	<classpathentry kind="lib" path="build/creativecommons/creativecommons.jar"/>
+	<classpathentry kind="lib" path="build/feed/feed.jar"/>
+	<classpathentry kind="lib" path="build/headings/headings.jar"/>
+	<classpathentry kind="lib" path="build/index-anchor/index-anchor.jar"/>
+	<classpathentry kind="lib" path="build/index-basic/index-basic.jar"/>
+	<classpathentry kind="lib" path="build/index-geoip/index-geoip.jar"/>
+	<classpathentry kind="lib" path="build/index-metadata/index-metadata.jar"/>
+	<classpathentry kind="lib" path="build/index-more/index-more.jar"/>
+	<classpathentry kind="lib" path="build/index-static/index-static.jar"/>
+	<classpathentry kind="lib" path="build/indexer-dummy/indexer-dummy.jar"/>
+	<classpathentry kind="lib" path="build/indexer-elastic/indexer-elastic.jar"/>
+	<classpathentry kind="lib" path="build/indexer-solr/indexer-solr.jar"/>
+	<classpathentry kind="lib" path="build/language-identifier/language-identifier.jar"/>
+	<classpathentry kind="lib" path="build/lib-http/lib-http.jar"/>
+	<classpathentry kind="lib" path="build/lib-regex-filter/lib-regex-filter.jar"/>
+	<classpathentry kind="lib" path="build/lib-selenium/lib-selenium.jar"/>
+	<classpathentry kind="lib" path="build/lib-xml/lib-xml.jar"/>
+	<classpathentry kind="lib" path="build/lib/activation-1.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/asm-3.3.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-beanutils-1.7.0.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-beanutils-core-1.8.0.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-cli-1.2.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-cli-2.0-mahout.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-codec-1.6.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-collections-3.2.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-compress-1.9.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-configuration-1.6.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-digester-1.8.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-el-1.0.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-httpclient-3.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-io-2.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-lang-2.6.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-lang3-3.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-logging-1.1.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-math-2.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-math3-3.2.jar"/>
+	<classpathentry kind="lib" path="build/lib/commons-net-1.4.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/crawler-commons-0.5.jar"/>
+	<classpathentry kind="lib" path="build/lib/cxf-core-3.0.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/cxf-rt-bindings-soap-3.0.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/cxf-rt-bindings-xml-3.0.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/cxf-rt-databinding-jaxb-3.0.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/cxf-rt-frontend-jaxrs-3.0.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/cxf-rt-frontend-jaxws-3.0.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/cxf-rt-frontend-simple-3.0.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/cxf-rt-transports-http-3.0.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/cxf-rt-transports-http-jetty-3.0.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/cxf-rt-ws-addr-3.0.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/cxf-rt-ws-policy-3.0.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/cxf-rt-wsdl-3.0.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/geronimo-servlet_3.0_spec-1.0.jar"/>
+	<classpathentry kind="lib" path="build/lib/guava-14.0.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/hadoop-core-1.2.0.jar"/>
+	<classpathentry kind="lib" path="build/lib/httpclient-4.2.6.jar"/>
+	<classpathentry kind="lib" path="build/lib/httpcore-4.2.5.jar"/>
+	<classpathentry kind="lib" path="build/lib/icu4j-55.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/jackson-annotations-2.5.0.jar"/>
+	<classpathentry kind="lib" path="build/lib/jackson-core-2.5.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/jackson-core-asl-1.9.12.jar"/>
+	<classpathentry kind="lib" path="build/lib/jackson-databind-2.5.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/jackson-dataformat-cbor-2.5.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/jackson-jaxrs-1.7.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/jackson-jaxrs-base-2.5.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/jackson-jaxrs-json-provider-2.5.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/jackson-mapper-asl-1.9.12.jar"/>
+	<classpathentry kind="lib" path="build/lib/jackson-module-jaxb-annotations-2.5.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/jackson-xc-1.7.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/jasper-compiler-5.5.12.jar"/>
+	<classpathentry kind="lib" path="build/lib/jasper-runtime-5.5.12.jar"/>
+	<classpathentry kind="lib" path="build/lib/javax.annotation-api-1.2.jar"/>
+	<classpathentry kind="lib" path="build/lib/javax.ws.rs-api-2.0.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/jaxb-api-2.2.2.jar"/>
+	<classpathentry kind="lib" path="build/lib/jaxb-core-2.1.14.jar"/>
+	<classpathentry kind="lib" path="build/lib/jaxb-impl-2.2.3-1.jar"/>
+	<classpathentry kind="lib" path="build/lib/jersey-core-1.8.jar"/>
+	<classpathentry kind="lib" path="build/lib/jersey-json-1.8.jar"/>
+	<classpathentry kind="lib" path="build/lib/jersey-server-1.8.jar"/>
+	<classpathentry kind="lib" path="build/lib/jettison-1.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/jetty-6.1.26.jar"/>
+	<classpathentry kind="lib" path="build/lib/jetty-continuation-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/lib/jetty-http-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/lib/jetty-io-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/lib/jetty-security-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/lib/jetty-server-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/lib/jetty-util-6.1.26.jar"/>
+	<classpathentry kind="lib" path="build/lib/jetty-util-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/lib/jsp-2.1-6.1.14.jar"/>
+	<classpathentry kind="lib" path="build/lib/jsp-api-2.1-6.1.14.jar"/>
+	<classpathentry kind="lib" path="build/lib/log4j-1.2.15.jar"/>
+	<classpathentry kind="lib" path="build/lib/lucene-analyzers-common-4.3.0.jar"/>
+	<classpathentry kind="lib" path="build/lib/lucene-core-4.3.0.jar"/>
+	<classpathentry kind="lib" path="build/lib/mahout-core-0.8.jar" sourcepath="/Users/asitangmishra/.m2/repository/org/apache/mahout/mahout-core/0.8/mahout-core-0.8-sources.jar"/>
+	<classpathentry kind="lib" path="build/lib/mahout-math-0.8.jar"/>
+	<classpathentry kind="lib" path="build/lib/neethi-3.0.3.jar"/>
+	<classpathentry kind="lib" path="build/lib/oro-2.0.8.jar"/>
+	<classpathentry kind="lib" path="build/lib/servlet-api-2.5-20081211.jar"/>
+	<classpathentry kind="lib" path="build/lib/servlet-api-2.5-6.1.14.jar"/>
+	<classpathentry kind="lib" path="build/lib/slf4j-api-1.7.9.jar"/>
+	<classpathentry kind="lib" path="build/lib/slf4j-log4j12-1.6.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/stax-api-1.0-2.jar"/>
+	<classpathentry kind="lib" path="build/lib/stax-api-1.0.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/stax2-api-3.1.4.jar"/>
+	<classpathentry kind="lib" path="build/lib/tika-core-1.8.jar"/>
+	<classpathentry kind="lib" path="build/lib/woodstox-core-asl-4.4.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/wsdl4j-1.6.3.jar"/>
+	<classpathentry kind="lib" path="build/lib/xercesImpl-2.9.1.jar"/>
+	<classpathentry kind="lib" path="build/lib/xml-apis-1.3.04.jar"/>
+	<classpathentry kind="lib" path="build/lib/xml-resolver-1.2.jar"/>
+	<classpathentry kind="lib" path="build/lib/xmlenc-0.52.jar"/>
+	<classpathentry kind="lib" path="build/lib/xmlParserAPIs-2.6.2.jar"/>
+	<classpathentry kind="lib" path="build/lib/xmlschema-core-2.2.1.jar"/>
+	<classpathentry kind="lib" path="build/microformats-reltag/microformats-reltag.jar"/>
+	<classpathentry kind="lib" path="build/mimetype-filter/mimetype-filter.jar"/>
+	<classpathentry kind="lib" path="build/nutch-extensionpoints/nutch-extensionpoints.jar"/>
+	<classpathentry kind="lib" path="build/parse-ext/parse-ext.jar"/>
+	<classpathentry kind="lib" path="build/parse-html/parse-html.jar"/>
+	<classpathentry kind="lib" path="build/parse-js/parse-js.jar"/>
+	<classpathentry kind="lib" path="build/parse-metatags/parse-metatags.jar"/>
+	<classpathentry kind="lib" path="build/parse-swf/parse-swf.jar"/>
+	<classpathentry kind="lib" path="build/parse-tika/parse-tika.jar"/>
+	<classpathentry kind="lib" path="build/parse-zip/parse-zip.jar"/>
+	<classpathentry kind="lib" path="build/plugins/creativecommons/creativecommons.jar"/>
+	<classpathentry kind="lib" path="build/plugins/feed/feed.jar"/>
+	<classpathentry kind="lib" path="build/plugins/feed/jdom-1.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/feed/rome-0.9.jar"/>
+	<classpathentry kind="lib" path="build/plugins/headings/headings.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-anchor/index-anchor.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-basic/index-basic.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-geoip/commons-codec-1.6.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-geoip/commons-logging-1.1.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-geoip/geoip2-2.1.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-geoip/google-http-client-1.19.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-geoip/httpclient-4.0.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-geoip/httpcore-4.0.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-geoip/index-geoip.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-geoip/jackson-annotations-2.4.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-geoip/jackson-core-2.4.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-geoip/jackson-databind-2.4.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-geoip/jsr305-1.3.9.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-geoip/maxmind-db-1.0.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-metadata/index-metadata.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-more/index-more.jar"/>
+	<classpathentry kind="lib" path="build/plugins/index-static/index-static.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-dummy/indexer-dummy.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/antlr-runtime-3.5.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/asm-4.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/asm-commons-4.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/elasticsearch-1.4.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/indexer-elastic.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-analyzers-common-4.10.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-core-4.10.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-grouping-4.10.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-highlighter-4.10.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-join-4.10.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-memory-4.10.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-misc-4.10.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-queries-4.10.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-queryparser-4.10.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-sandbox-4.10.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-spatial-4.10.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-suggest-4.10.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-elastic/spatial4j-0.4.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/activation-1.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/commons-codec-1.4.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/commons-httpclient-3.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/commons-io-1.4.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/commons-logging-1.1.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/geronimo-stax-api_1.0_spec-1.0.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/indexer-solr.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/jline-0.9.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/log4j-1.2.15.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/lucene-core-3.4.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/mail-1.4.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/slf4j-api-1.6.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/solr-solrj-3.4.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/stax-api-1.0.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/wstx-asl-3.2.7.jar"/>
+	<classpathentry kind="lib" path="build/plugins/indexer-solr/zookeeper-3.3.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/language-identifier/language-identifier.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-http/lib-http.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-nekohtml/nekohtml-1.9.19.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-regex-filter/lib-regex-filter.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/cglib-nodep-2.1_3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-codec-1.9.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-collections-3.2.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-exec-1.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-io-2.4.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-jxpath-1.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-lang3-3.3.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-logging-1.1.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/cssparser-0.9.14.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/gson-2.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/guava-18.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/htmlunit-2.15.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/htmlunit-core-js-2.15.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/httpclient-4.3.4.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/httpcore-4.3.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/httpmime-4.3.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/ini4j-0.5.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/jetty-http-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/jetty-io-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/jetty-util-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/jetty-websocket-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/jna-3.4.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/lib-selenium.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/nekohtml-1.9.21.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/netty-3.5.2.Final.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/operadriver-1.5.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/operalaunchers-1.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/platform-3.4.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/protobuf-java-2.4.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/sac-1.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-api-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-chrome-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-firefox-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-htmlunit-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-ie-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-java-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-remote-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-safari-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-support-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/serializer-2.7.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/webbit-0.4.14.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/xalan-2.7.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/xercesImpl-2.11.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-selenium/xml-apis-1.4.01.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-xml/jaxen-1.1.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-xml/jdom-1.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-xml/lib-xml.jar"/>
+	<classpathentry kind="lib" path="build/plugins/lib-xml/xercesImpl-2.9.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/microformats-reltag/microformats-reltag.jar"/>
+	<classpathentry kind="lib" path="build/plugins/mimetype-filter/mimetype-filter.jar"/>
+	<classpathentry kind="lib" path="build/plugins/nutch-extensionpoints/nutch-extensionpoints.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-ext/parse-ext.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-html/parse-html.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-html/tagsoup-1.2.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-js/parse-js.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-metatags/parse-metatags.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-swf/javaswf.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-swf/parse-swf.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/apache-mime4j-core-0.7.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/apache-mime4j-dom-0.7.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/asm-debug-all-4.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/aspectjrt-1.8.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/bcmail-jdk15on-1.52.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/bcpkix-jdk15on-1.52.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/bcprov-jdk15on-1.52.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/boilerpipe-1.1.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/bzip2-0.9.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/c3p0-0.9.1.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/cdm-4.5.5.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/commons-codec-1.6.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/commons-compress-1.9.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/commons-csv-1.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/commons-logging-1.1.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/commons-logging-api-1.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/commons-vfs2-2.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/ehcache-core-2.6.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/fontbox-1.8.9.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/grib-4.5.5.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/guava-10.0.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/httpclient-4.2.6.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/httpcore-4.2.5.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/httpmime-4.2.6.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/httpservices-4.5.5.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/isoparser-1.0.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/java-libpst-0.8.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/jcip-annotations-1.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/jcommander-1.35.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/jdom-1.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/jdom2-2.0.4.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/jempbox-1.8.9.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/jhighlight-1.0.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/jj2000-5.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/jmatio-1.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/jna-4.1.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/joda-time-2.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/jsoup-1.7.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/jsr305-1.3.9.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/juniversalchardet-1.0.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/junrar-0.7.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/maven-scm-api-1.4.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/maven-scm-provider-svn-commons-1.4.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/maven-scm-provider-svnexe-1.4.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/metadata-extractor-2.8.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/netcdf4-4.5.5.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/parse-tika.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/pdfbox-1.8.9.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/plexus-utils-1.5.6.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/poi-3.12-beta1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/poi-ooxml-3.12-beta1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/poi-ooxml-schemas-3.12-beta1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/poi-scratchpad-3.12-beta1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/protobuf-java-2.5.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/quartz-2.2.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/regexp-1.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/rome-0.9.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/slf4j-api-1.7.12.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/sqlite-jdbc-3.8.6.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/tagsoup-1.2.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/tika-parsers-1.8.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/udunits-4.5.5.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/vorbis-java-core-0.6.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/vorbis-java-tika-0.6.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/xmlbeans-2.6.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/xmpcore-5.1.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-tika/xz-1.5.jar"/>
+	<classpathentry kind="lib" path="build/plugins/parse-zip/parse-zip.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-file/protocol-file.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-ftp/commons-net-1.2.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-ftp/protocol-ftp.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-http/protocol-http.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-httpclient/jsoup-1.8.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-httpclient/protocol-httpclient.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/cglib-nodep-2.1_3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-codec-1.9.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-collections-3.2.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-exec-1.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-io-2.4.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-jxpath-1.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-lang3-3.3.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-logging-1.1.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/cssparser-0.9.14.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/gson-2.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/guava-18.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/htmlunit-2.15.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/htmlunit-core-js-2.15.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/httpclient-4.3.4.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/httpcore-4.3.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/httpmime-4.3.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/ini4j-0.5.2.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/jetty-http-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/jetty-io-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/jetty-util-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/jetty-websocket-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/jna-3.4.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/nekohtml-1.9.21.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/netty-3.5.2.Final.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/operadriver-1.5.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/operalaunchers-1.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/platform-3.4.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/protobuf-java-2.4.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/protocol-selenium.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/sac-1.3.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-api-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-chrome-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-firefox-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-htmlunit-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-ie-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-java-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-remote-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-safari-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-support-2.44.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/serializer-2.7.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/webbit-0.4.14.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/xalan-2.7.1.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/xercesImpl-2.11.0.jar"/>
+	<classpathentry kind="lib" path="build/plugins/protocol-selenium/xml-apis-1.4.01.jar"/>
+	<classpathentry kind="lib" path="build/plugins/scoring-depth/scoring-depth.jar"/>
+	<classpathentry kind="lib" path="build/plugins/scoring-link/scoring-link.jar"/>
+	<classpathentry kind="lib" path="build/plugins/scoring-opic/scoring-opic.jar"/>
+	<classpathentry kind="lib" path="build/plugins/subcollection/subcollection.jar"/>
+	<classpathentry kind="lib" path="build/plugins/tld/tld.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlfilter-automaton/automaton-1.11-8.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlfilter-automaton/urlfilter-automaton.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlfilter-domain/urlfilter-domain.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlfilter-domainblacklist/urlfilter-domainblacklist.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlfilter-model/urlfilter-model.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlfilter-prefix/urlfilter-prefix.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlfilter-regex/urlfilter-regex.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlfilter-suffix/urlfilter-suffix.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlfilter-validator/urlfilter-validator.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlmeta/urlmeta.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlnormalizer-ajax/urlnormalizer-ajax.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlnormalizer-basic/urlnormalizer-basic.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlnormalizer-host/urlnormalizer-host.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlnormalizer-pass/urlnormalizer-pass.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlnormalizer-querystring/urlnormalizer-querystring.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlnormalizer-regex/urlnormalizer-regex.jar"/>
+	<classpathentry kind="lib" path="build/plugins/urlnormalizer-slash/urlnormalizer-slash.jar"/>
+	<classpathentry kind="lib" path="build/protocol-file/protocol-file.jar"/>
+	<classpathentry kind="lib" path="build/protocol-ftp/protocol-ftp.jar"/>
+	<classpathentry kind="lib" path="build/protocol-http/protocol-http.jar"/>
+	<classpathentry kind="lib" path="build/protocol-httpclient/protocol-httpclient.jar"/>
+	<classpathentry kind="lib" path="build/protocol-selenium/protocol-selenium.jar"/>
+	<classpathentry kind="lib" path="build/scoring-depth/scoring-depth.jar"/>
+	<classpathentry kind="lib" path="build/scoring-link/scoring-link.jar"/>
+	<classpathentry kind="lib" path="build/scoring-opic/scoring-opic.jar"/>
+	<classpathentry kind="lib" path="build/subcollection/subcollection.jar"/>
+	<classpathentry kind="lib" path="build/tld/tld.jar"/>
+	<classpathentry kind="lib" path="build/urlfilter-automaton/urlfilter-automaton.jar"/>
+	<classpathentry kind="lib" path="build/urlfilter-domain/urlfilter-domain.jar"/>
+	<classpathentry kind="lib" path="build/urlfilter-domainblacklist/urlfilter-domainblacklist.jar"/>
+	<classpathentry kind="lib" path="build/urlfilter-model/urlfilter-model.jar"/>
+	<classpathentry kind="lib" path="build/urlfilter-prefix/urlfilter-prefix.jar"/>
+	<classpathentry kind="lib" path="build/urlfilter-regex/urlfilter-regex.jar"/>
+	<classpathentry kind="lib" path="build/urlfilter-suffix/urlfilter-suffix.jar"/>
+	<classpathentry kind="lib" path="build/urlfilter-validator/urlfilter-validator.jar"/>
+	<classpathentry kind="lib" path="build/urlmeta/urlmeta.jar"/>
+	<classpathentry kind="lib" path="build/urlnormalizer-ajax/urlnormalizer-ajax.jar"/>
+	<classpathentry kind="lib" path="build/urlnormalizer-basic/urlnormalizer-basic.jar"/>
+	<classpathentry kind="lib" path="build/urlnormalizer-host/urlnormalizer-host.jar"/>
+	<classpathentry kind="lib" path="build/urlnormalizer-pass/urlnormalizer-pass.jar"/>
+	<classpathentry kind="lib" path="build/urlnormalizer-querystring/urlnormalizer-querystring.jar"/>
+	<classpathentry kind="lib" path="build/urlnormalizer-regex/urlnormalizer-regex.jar"/>
+	<classpathentry kind="lib" path="build/urlnormalizer-slash/urlnormalizer-slash.jar"/>
+	<classpathentry kind="lib" path="ivy/ivy-2.2.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/activation-1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/apache-nutch-1.11-SNAPSHOT.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/asm-3.3.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-beanutils-1.7.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-beanutils-core-1.8.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-cli-1.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-cli-2.0-mahout.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-codec-1.6.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-collections-3.2.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-compress-1.9.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-configuration-1.6.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-digester-1.8.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-el-1.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-httpclient-3.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-io-2.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-lang-2.6.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-lang3-3.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-logging-1.1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-math-2.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-math3-3.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/commons-net-1.4.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/crawler-commons-0.5.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/cxf-core-3.0.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-bindings-soap-3.0.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-bindings-xml-3.0.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-databinding-jaxb-3.0.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-frontend-jaxrs-3.0.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-frontend-jaxws-3.0.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-frontend-simple-3.0.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-transports-http-3.0.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-transports-http-jetty-3.0.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-ws-addr-3.0.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-ws-policy-3.0.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-wsdl-3.0.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/geronimo-servlet_3.0_spec-1.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/guava-14.0.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/hadoop-core-1.2.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/httpclient-4.2.6.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/httpcore-4.2.5.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/icu4j-55.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jackson-annotations-2.5.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jackson-core-2.5.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jackson-core-asl-1.9.12.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jackson-databind-2.5.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jackson-dataformat-cbor-2.5.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jackson-jaxrs-1.7.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jackson-jaxrs-base-2.5.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jackson-jaxrs-json-provider-2.5.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jackson-mapper-asl-1.9.12.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jackson-module-jaxb-annotations-2.5.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jackson-xc-1.7.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jasper-compiler-5.5.12.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jasper-runtime-5.5.12.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/javax.annotation-api-1.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/javax.ws.rs-api-2.0.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jaxb-api-2.2.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jaxb-core-2.1.14.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jaxb-impl-2.2.3-1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jersey-core-1.8.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jersey-json-1.8.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jersey-server-1.8.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jettison-1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jetty-6.1.26.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jetty-continuation-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jetty-http-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jetty-io-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jetty-security-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jetty-server-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jetty-util-6.1.26.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jetty-util-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jsp-2.1-6.1.14.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/jsp-api-2.1-6.1.14.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/log4j-1.2.15.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/lucene-analyzers-common-4.3.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/lucene-core-4.3.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/mahout-core-0.8.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/mahout-math-0.8.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/neethi-3.0.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/oro-2.0.8.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/servlet-api-2.5-20081211.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/servlet-api-2.5-6.1.14.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/slf4j-api-1.7.9.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/slf4j-log4j12-1.6.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/stax-api-1.0-2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/stax-api-1.0.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/stax2-api-3.1.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/tika-core-1.8.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/woodstox-core-asl-4.4.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/wsdl4j-1.6.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/xercesImpl-2.9.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/xml-apis-1.3.04.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/xml-resolver-1.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/xmlenc-0.52.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/xmlParserAPIs-2.6.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/lib/xmlschema-core-2.2.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/creativecommons/creativecommons.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/feed/feed.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/feed/jdom-1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/feed/rome-0.9.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/headings/headings.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-anchor/index-anchor.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-basic/index-basic.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/commons-codec-1.6.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/commons-logging-1.1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/geoip2-2.1.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/google-http-client-1.19.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/httpclient-4.0.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/httpcore-4.0.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/index-geoip.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/jackson-annotations-2.4.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/jackson-core-2.4.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/jackson-databind-2.4.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/jsr305-1.3.9.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/maxmind-db-1.0.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-metadata/index-metadata.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-more/index-more.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/index-static/index-static.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-dummy/indexer-dummy.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/antlr-runtime-3.5.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/asm-4.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/asm-commons-4.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/elasticsearch-1.4.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/indexer-elastic.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-analyzers-common-4.10.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-core-4.10.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-grouping-4.10.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-highlighter-4.10.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-join-4.10.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-memory-4.10.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-misc-4.10.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-queries-4.10.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-queryparser-4.10.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-sandbox-4.10.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-spatial-4.10.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-suggest-4.10.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/spatial4j-0.4.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/activation-1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/commons-codec-1.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/commons-httpclient-3.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/commons-io-1.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/commons-logging-1.1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/geronimo-stax-api_1.0_spec-1.0.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/indexer-solr.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/jline-0.9.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/log4j-1.2.15.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/lucene-core-3.4.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/mail-1.4.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/slf4j-api-1.6.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/solr-solrj-3.4.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/stax-api-1.0.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/wstx-asl-3.2.7.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/zookeeper-3.3.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/language-identifier/language-identifier.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-http/lib-http.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-nekohtml/nekohtml-1.9.19.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-regex-filter/lib-regex-filter.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/cglib-nodep-2.1_3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-codec-1.9.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-collections-3.2.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-exec-1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-io-2.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-jxpath-1.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-lang3-3.3.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-logging-1.1.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/cssparser-0.9.14.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/gson-2.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/guava-18.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/htmlunit-2.15.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/htmlunit-core-js-2.15.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/httpclient-4.3.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/httpcore-4.3.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/httpmime-4.3.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/ini4j-0.5.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/jetty-http-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/jetty-io-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/jetty-util-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/jetty-websocket-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/jna-3.4.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/lib-selenium.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/nekohtml-1.9.21.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/netty-3.5.2.Final.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/operadriver-1.5.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/operalaunchers-1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/platform-3.4.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/protobuf-java-2.4.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/sac-1.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-api-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-chrome-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-firefox-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-htmlunit-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-ie-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-java-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-remote-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-safari-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-support-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/serializer-2.7.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/webbit-0.4.14.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/xalan-2.7.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/xercesImpl-2.11.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/xml-apis-1.4.01.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-xml/jaxen-1.1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-xml/jdom-1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-xml/lib-xml.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/lib-xml/xercesImpl-2.9.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/microformats-reltag/microformats-reltag.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/mimetype-filter/mimetype-filter.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/nutch-extensionpoints/nutch-extensionpoints.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-ext/parse-ext.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-html/parse-html.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-html/tagsoup-1.2.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-js/parse-js.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-metatags/parse-metatags.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-swf/javaswf.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-swf/parse-swf.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/apache-mime4j-core-0.7.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/apache-mime4j-dom-0.7.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/asm-debug-all-4.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/aspectjrt-1.8.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/bcmail-jdk15on-1.52.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/bcpkix-jdk15on-1.52.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/bcprov-jdk15on-1.52.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/boilerpipe-1.1.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/bzip2-0.9.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/c3p0-0.9.1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/cdm-4.5.5.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/commons-codec-1.6.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/commons-compress-1.9.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/commons-csv-1.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/commons-logging-1.1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/commons-logging-api-1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/commons-vfs2-2.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/ehcache-core-2.6.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/fontbox-1.8.9.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/grib-4.5.5.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/guava-10.0.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/httpclient-4.2.6.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/httpcore-4.2.5.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/httpmime-4.2.6.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/httpservices-4.5.5.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/isoparser-1.0.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/java-libpst-0.8.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jcip-annotations-1.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jcommander-1.35.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jdom-1.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jdom2-2.0.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jempbox-1.8.9.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jhighlight-1.0.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jj2000-5.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jmatio-1.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jna-4.1.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/joda-time-2.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jsoup-1.7.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jsr305-1.3.9.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/juniversalchardet-1.0.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/junrar-0.7.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/maven-scm-api-1.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/maven-scm-provider-svn-commons-1.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/maven-scm-provider-svnexe-1.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/metadata-extractor-2.8.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/netcdf4-4.5.5.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/parse-tika.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/pdfbox-1.8.9.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/plexus-utils-1.5.6.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/poi-3.12-beta1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/poi-ooxml-3.12-beta1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/poi-ooxml-schemas-3.12-beta1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/poi-scratchpad-3.12-beta1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/protobuf-java-2.5.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/quartz-2.2.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/regexp-1.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/rome-0.9.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/slf4j-api-1.7.12.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/sqlite-jdbc-3.8.6.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/tagsoup-1.2.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/tika-parsers-1.8.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/udunits-4.5.5.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/vorbis-java-core-0.6.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/vorbis-java-tika-0.6.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/xmlbeans-2.6.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/xmpcore-5.1.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/xz-1.5.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/parse-zip/parse-zip.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-file/protocol-file.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-ftp/commons-net-1.2.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-ftp/protocol-ftp.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-http/protocol-http.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-httpclient/jsoup-1.8.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-httpclient/protocol-httpclient.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/cglib-nodep-2.1_3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-codec-1.9.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-collections-3.2.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-exec-1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-io-2.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-jxpath-1.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-lang3-3.3.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-logging-1.1.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/cssparser-0.9.14.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/gson-2.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/guava-18.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/htmlunit-2.15.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/htmlunit-core-js-2.15.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/httpclient-4.3.4.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/httpcore-4.3.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/httpmime-4.3.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/ini4j-0.5.2.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/jetty-http-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/jetty-io-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/jetty-util-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/jetty-websocket-8.1.15.v20140411.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/jna-3.4.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/nekohtml-1.9.21.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/netty-3.5.2.Final.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/operadriver-1.5.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/operalaunchers-1.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/platform-3.4.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/protobuf-java-2.4.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/protocol-selenium.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/sac-1.3.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-api-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-chrome-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-firefox-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-htmlunit-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-ie-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-java-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-remote-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-safari-driver-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-support-2.44.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/serializer-2.7.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/webbit-0.4.14.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/xalan-2.7.1.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/xercesImpl-2.11.0.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/xml-apis-1.4.01.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/scoring-depth/scoring-depth.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/scoring-link/scoring-link.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/scoring-opic/scoring-opic.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/subcollection/subcollection.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/tld/tld.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-automaton/automaton-1.11-8.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-automaton/urlfilter-automaton.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-domain/urlfilter-domain.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-domainblacklist/urlfilter-domainblacklist.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-model/urlfilter-model.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-prefix/urlfilter-prefix.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-regex/urlfilter-regex.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-suffix/urlfilter-suffix.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-validator/urlfilter-validator.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlmeta/urlmeta.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-ajax/urlnormalizer-ajax.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-basic/urlnormalizer-basic.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-host/urlnormalizer-host.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-pass/urlnormalizer-pass.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-querystring/urlnormalizer-querystring.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-regex/urlnormalizer-regex.jar"/>
+	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-slash/urlnormalizer-slash.jar"/>
+	<classpathentry kind="lib" path="src/plugin/parse-swf/lib/javaswf.jar"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
+	<classpathentry kind="output" path="build/urlnormalizer-slash/classes"/>
+</classpath>
diff --git a/.project b/.project
new file mode 100644
index 0000000000..0978facd4c
--- /dev/null
+++ b/.project
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>NUTCH-CLONE</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+	</natures>
+</projectDescription>
diff --git a/build.xml b/build.xml
index be49b4f520..7cb8e87d4a 100644
--- a/build.xml
+++ b/build.xml
@@ -211,6 +211,7 @@
       <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-suffix/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-validator/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-model/src/java"/>
       <packageset dir="${plugins.dir}/urlmeta/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-host/src/java"/>
@@ -621,6 +622,7 @@
       <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-suffix/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-validator/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-model/src/java"/>
       <packageset dir="${plugins.dir}/urlmeta/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-host/src/java"/>
@@ -1037,6 +1039,7 @@
         <source path="${plugins.dir}/urlfilter-suffix/src/test/" />
         <source path="${plugins.dir}/urlfilter-validator/src/java/" />
         <source path="${plugins.dir}/urlfilter-validator/src/test/" />
+        <source path="${plugins.dir}/urlfilter-model/src/java/" />
         <source path="${plugins.dir}/urlmeta/src/java/" />
         <source path="${plugins.dir}/urlnormalizer-basic/src/java/" />
         <source path="${plugins.dir}/urlnormalizer-basic/src/test/" />
diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index e8ccf42d10..92dd165419 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -1135,6 +1135,28 @@
 
 <!-- parser properties -->
 
+<property>
+  <name>parser.modelfilter.trainfile</name>
+  <value>tweets-train.tsv</value>
+  <description>
+  </description>
+</property>
+
+<property>
+  <name>parser.modelfilter.dictionaryfile</name>
+  <value>wordlist.txt</value>
+  <description>
+  </description>
+</property>
+
+<property>
+  <name>parser.modelfilter</name>
+  <value>true</value>
+  <description>
+  </description>
+</property>
+
+
 <property>
   <name>parse.plugin.file</name>
   <value>parse-plugins.xml</value>
diff --git a/ivy/ivy.xml b/ivy/ivy.xml
index 3850c0c692..74c305c760 100644
--- a/ivy/ivy.xml
+++ b/ivy/ivy.xml
@@ -78,7 +78,11 @@
                 <dependency org="org.apache.cxf" name="cxf-rt-transports-http-jetty" rev="3.0.4"/>
                 <dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="2.5.1" /> 
                 <dependency org="com.fasterxml.jackson.dataformat" name="jackson-dataformat-cbor" rev="2.5.1" />
-                <dependency org="com.fasterxml.jackson.jaxrs" name="jackson-jaxrs-json-provider" rev="2.5.1" />	
+                <dependency org="com.fasterxml.jackson.jaxrs" name="jackson-jaxrs-json-provider" rev="2.5.1" />
+                <dependency org="org.apache.mahout" name="mahout-math" rev="0.8" />
+                <dependency org="org.apache.mahout" name="mahout-core" rev="0.8" />
+                <dependency org="org.apache.lucene" name="lucene-core" rev="4.3.0" />
+                <dependency org="org.apache.lucene" name="lucene-analyzers-common" rev="4.3.0" />	
 
 		<!--Configuration: test -->
 
@@ -100,6 +104,8 @@
 		<exclude module="jmxtools" />
 		<exclude module="jms" />
 		<exclude module="jmxri" />
+		<exclude org="com.thoughtworks.xstream"/>
+		<exclude org="org.apache.mrunit"/>
 
 	</dependencies>
 
diff --git a/src/java/org/apache/nutch/net/URLFilters.java b/src/java/org/apache/nutch/net/URLFilters.java
index 3deccca8ec..7d793ffb9b 100644
--- a/src/java/org/apache/nutch/net/URLFilters.java
+++ b/src/java/org/apache/nutch/net/URLFilters.java
@@ -23,22 +23,44 @@
 /** Creates and caches {@link URLFilter} implementing plugins. */
 public class URLFilters {
 
-  public static final String URLFILTER_ORDER = "urlfilter.order";
-  private URLFilter[] filters;
-
-  public URLFilters(Configuration conf) {
-    this.filters = (URLFilter[]) PluginRepository.get(conf).getOrderedPlugins(
-        URLFilter.class, URLFilter.X_POINT_ID, URLFILTER_ORDER);
-  }
-
-  /** Run all defined filters. Assume logical AND. */
-  public String filter(String urlString) throws URLFilterException {
-    for (int i = 0; i < this.filters.length; i++) {
-      if (urlString == null)
-        return null;
-      urlString = this.filters[i].filter(urlString);
-
-    }
-    return urlString;
-  }
+	public static final String URLFILTER_ORDER = "urlfilter.order";
+	private URLFilter[] filters;
+	private URLFilter filter = null;
+
+	public URLFilters(Configuration conf) {
+		this.filters = (URLFilter[]) PluginRepository.get(conf)
+				.getOrderedPlugins(URLFilter.class, URLFilter.X_POINT_ID,
+						URLFILTER_ORDER);
+	}
+
+	/** Run all defined filters. Assume logical AND. */
+	public String filter(String urlString) throws URLFilterException {
+		for (int i = 0; i < this.filters.length; i++) {
+			if (urlString == null)
+				return null;
+			urlString = this.filters[i].filter(urlString);
+
+		}
+		return urlString;
+	}
+
+	/**Get a filter with the full classname if only it is activated through the nutchsite.xml*/
+	public URLFilter getFilter(String pid) {
+
+		if (filter == null) {
+
+			for (int i = 0; i < this.filters.length; i++) {
+
+				if (filters[i].getClass().getName().equals(pid)) {
+
+					filter = filters[i];
+					break;
+				}
+
+			}
+
+		}
+		return filter;
+
+	}
 }
diff --git a/src/java/org/apache/nutch/parse/ModelURLFilterAbstract.java b/src/java/org/apache/nutch/parse/ModelURLFilterAbstract.java
new file mode 100644
index 0000000000..6c6bead2cc
--- /dev/null
+++ b/src/java/org/apache/nutch/parse/ModelURLFilterAbstract.java
@@ -0,0 +1,12 @@
+package org.apache.nutch.parse;
+
+import org.apache.nutch.net.URLFilter;
+
+public abstract class ModelURLFilterAbstract implements URLFilter{
+
+	
+	public abstract void filterParse(String text);
+	public abstract boolean filterUrl(String url) ;
+	public abstract void configure(String[] args) ;
+	
+}
diff --git a/src/java/org/apache/nutch/parse/ParseSegment.java b/src/java/org/apache/nutch/parse/ParseSegment.java
index b1ed1092c9..08472b8b48 100644
--- a/src/java/org/apache/nutch/parse/ParseSegment.java
+++ b/src/java/org/apache/nutch/parse/ParseSegment.java
@@ -29,11 +29,12 @@
 import org.apache.hadoop.conf.*;
 import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.metadata.Nutch;
+import org.apache.nutch.net.URLFilters;
 import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.protocol.*;
 import org.apache.nutch.scoring.ScoringFilterException;
 import org.apache.nutch.scoring.ScoringFilters;
-import org.apache.hadoop.fs.FileSystem;
+
 import org.apache.nutch.util.*;
 import org.apache.hadoop.fs.Path;
 
@@ -56,6 +57,14 @@ public class ParseSegment extends NutchTool implements Tool,
   private ParseUtil parseUtil;
 
   private boolean skipTruncated;
+  
+  public static final String PARSER_MODELFILTER="parser.modelfilter";
+  public static final String TRAINFILE_MODELFILTER="parser.modelfilter.trainfile";
+  public static final String DICTFILE_MODELFILTER="parser.modelfilter.dictionaryfile";
+  
+  private boolean filterflag;
+  private URLFilters filters;
+  private ModelURLFilterAbstract filter;
 
   public ParseSegment() {
     this(null);
@@ -69,6 +78,18 @@ public void configure(JobConf job) {
     setConf(job);
     this.scfilters = new ScoringFilters(job);
     skipTruncated = job.getBoolean(SKIP_TRUNCATED, true);
+    
+    filterflag = job.getBoolean(PARSER_MODELFILTER, true);
+    if(filterflag){
+    	String[] args=new String[2];
+    	args[0]=getConf().get(TRAINFILE_MODELFILTER);
+    	args[1]=getConf().get(DICTFILE_MODELFILTER);
+    	
+	filters = new URLFilters(job);
+	filter=(ModelURLFilterAbstract) filters.getFilter("org.apache.nutch.urlfilter.model.ModelURLFilter");
+	filter.configure(args);
+   
+  }
   }
 
   public void close() {
@@ -140,6 +161,29 @@ public void map(WritableComparable<?> key, Content content,
           LOG.warn("Error passing score: " + url + ": " + e.getMessage());
         }
       }
+      
+if(filterflag){
+          
+    	  
+    	  
+    	  filter.filterParse(parse.getText());
+    	  
+          ArrayList<Outlink> tempOutlinks= new ArrayList<Outlink>();
+          Outlink[] out=null;
+          for(int i=0;i<parse.getData().getOutlinks().length;i++){
+        	  
+          if(filter.filterUrl(parse.getData().getOutlinks()[i].getToUrl())){
+        	  tempOutlinks.add(parse.getData().getOutlinks()[i]);
+        			  
+          }
+          }
+          out=new Outlink[tempOutlinks.size()];
+          for(int i=0;i<tempOutlinks.size();i++){
+        	  out[i]=tempOutlinks.get(i);
+          }
+          
+          parse.getData().setOutlinks(out);
+          }
 
       long end = System.currentTimeMillis();
       LOG.info("Parsed (" + Long.toString(end - start) + "ms):" + url);
diff --git a/src/plugin/build.xml b/src/plugin/build.xml
index e2e8f5db78..f00c1a47bb 100755
--- a/src/plugin/build.xml
+++ b/src/plugin/build.xml
@@ -71,6 +71,7 @@
      <ant dir="urlfilter-regex" target="deploy"/>
      <ant dir="urlfilter-suffix" target="deploy"/>
      <ant dir="urlfilter-validator" target="deploy"/>
+     <ant dir="urlfilter-model" target="deploy"/>
      <ant dir="urlmeta" target="deploy"/>
      <ant dir="urlnormalizer-ajax" target="deploy"/>
      <ant dir="urlnormalizer-basic" target="deploy"/>
@@ -174,6 +175,7 @@
     <ant dir="urlfilter-regex" target="clean"/>
     <ant dir="urlfilter-suffix" target="clean"/>
     <ant dir="urlfilter-validator" target="clean"/>
+    <ant dir="urlfilter-model" target="clean" />
     <ant dir="urlmeta" target="clean"/>
     <ant dir="urlnormalizer-ajax" target="clean"/>
     <ant dir="urlnormalizer-basic" target="clean"/>
diff --git a/src/plugin/urlfilter-model/build.xml b/src/plugin/urlfilter-model/build.xml
new file mode 100644
index 0000000000..a7135bf29b
--- /dev/null
+++ b/src/plugin/urlfilter-model/build.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="urlfilter-model" default="jar-core">
+
+  <import file="../build-plugin.xml"/>
+
+</project>
diff --git a/src/plugin/urlfilter-model/ivy.xml b/src/plugin/urlfilter-model/ivy.xml
new file mode 100644
index 0000000000..1a86d68030
--- /dev/null
+++ b/src/plugin/urlfilter-model/ivy.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" ?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<ivy-module version="1.0">
+  <info organisation="org.apache.nutch" module="${ant.project.name}">
+    <license name="Apache 2.0"/>
+    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
+    <description>
+        Apache Nutch
+    </description>
+  </info>
+
+  <configurations>
+    <include file="../../..//ivy/ivy-configurations.xml"/>
+  </configurations>
+
+  <publications>
+    <!--get the artifact from our module name-->
+    <artifact conf="master"/>
+  </publications>
+
+  <dependencies>
+  </dependencies>
+  
+</ivy-module>
diff --git a/src/plugin/urlfilter-model/plugin.xml b/src/plugin/urlfilter-model/plugin.xml
new file mode 100644
index 0000000000..43b41d2132
--- /dev/null
+++ b/src/plugin/urlfilter-model/plugin.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<plugin
+   id="urlfilter-model"
+   name="Model URL Filter"
+   version="1.0.0"
+   provider-name="nutch.org">
+
+   <runtime>
+      <library name="urlfilter-model.jar">
+         <export name="*"/>
+      </library>     
+   </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
+   <extension id="org.apache.nutch.net.urlfilter.model"
+              name="Nutch Model URL Filter"
+              point="org.apache.nutch.net.URLFilter">
+      <implementation id="ModelURLFilter"
+                      class="org.apache.nutch.urlfilter.model.ModelURLFilter"/>
+   </extension>
+
+</plugin>
diff --git a/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/ModelURLFilter.java b/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/ModelURLFilter.java
new file mode 100644
index 0000000000..50109b44b3
--- /dev/null
+++ b/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/ModelURLFilter.java
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.urlfilter.model;
+
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.parse.ModelURLFilterAbstract;
+
+
+import java.io.Reader;
+
+import java.io.BufferedReader;
+
+import java.io.IOException;
+
+import java.util.ArrayList;
+
+/**
+ * Filters URLs based on a file of URL prefixes. The file is named by (1)
+ * property "urlfilter.prefix.file" in ./conf/nutch-default.xml, and (2)
+ * attribute "file" in plugin.xml of this plugin Attribute "file" has higher
+ * precedence if defined.
+ * 
+ * <p>
+ * The format of this file is one URL prefix per line.
+ * </p>
+ */
+public class ModelURLFilter extends ModelURLFilterAbstract {
+
+	private static final Logger LOG = LoggerFactory
+			.getLogger(ModelURLFilter.class);
+
+	private boolean relevent = false;
+	private Configuration conf;
+	private String inputFilePath;
+	private String dictionaryFile;
+	private ArrayList<String> wordlist = new ArrayList<String>();
+
+	public ModelURLFilter() throws Exception {
+
+	}
+
+	public void configure(String[] args) {
+
+		inputFilePath = args[0];
+		dictionaryFile = args[1];
+		BufferedReader br = null;
+
+		try {
+
+			String CurrentLine;
+
+			Reader reader = conf.getConfResourceAsReader(dictionaryFile);
+			br = new BufferedReader(reader);
+			while ((CurrentLine = br.readLine()) != null) {
+				wordlist.add(CurrentLine);
+			}
+
+		} catch (IOException e) {
+
+			e.printStackTrace();
+		} finally {
+			try {
+				if (br != null)
+					br.close();
+			} catch (IOException ex) {
+				ex.printStackTrace();
+			}
+		}
+
+		try {
+
+			train();
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+	}
+
+	public void filterParse(String text) {
+
+		try {
+			relevent = classify(text);
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+
+	}
+
+	public boolean filterUrl(String url) {
+
+		if (!relevent) {
+			if (!containsWord(url, wordlist)) {
+				return false;
+			}
+		}
+
+		return true;
+	}
+
+	public String filter(String url) {
+
+		return url;
+
+	}
+
+	public boolean classify(String text) throws IOException {
+
+		// if classified as relevent "1" then return true
+		if (NBClassifier.classify(text).equals("1"))
+			return true;
+		return false;
+	}
+
+	public void train() throws Exception {
+
+		// check if the model file exists, if it does then don't train
+		NBClassifier.createModel(inputFilePath);
+
+	}
+
+	public boolean containsWord(String url, ArrayList<String> wordlist) {
+		for (String word : wordlist) {
+			if (url.contains(word)) {
+				return true;
+			}
+		}
+
+		return false;
+	}
+
+	public void setConf(Configuration conf) {
+		this.conf = conf;
+
+	}
+
+	public Configuration getConf() {
+		return this.conf;
+	}
+
+}
diff --git a/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/NBClassifier.java b/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/NBClassifier.java
new file mode 100644
index 0000000000..714aaa4164
--- /dev/null
+++ b/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/NBClassifier.java
@@ -0,0 +1,234 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.urlfilter.model;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.Writer;
+import org.apache.hadoop.io.Text;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.util.Version;
+import org.apache.mahout.classifier.naivebayes.BayesUtils;
+import org.apache.mahout.classifier.naivebayes.NaiveBayesModel;
+import org.apache.mahout.classifier.naivebayes.StandardNaiveBayesClassifier;
+import org.apache.mahout.classifier.naivebayes.training.TrainNaiveBayesJob;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
+import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.Vector.Element;
+import org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles;
+import org.apache.mahout.vectorizer.TFIDF;
+
+import com.google.common.collect.ConcurrentHashMultiset;
+import com.google.common.collect.Multiset;
+
+public class NBClassifier {
+
+	public static Map<String, Integer> readDictionnary(Configuration conf,
+			Path dictionnaryPath) {
+		Map<String, Integer> dictionnary = new HashMap<String, Integer>();
+		for (Pair<Text, IntWritable> pair : new SequenceFileIterable<Text, IntWritable>(
+				dictionnaryPath, true, conf)) {
+			dictionnary.put(pair.getFirst().toString(), pair.getSecond().get());
+		}
+		return dictionnary;
+	}
+
+	public static Map<Integer, Long> readDocumentFrequency(Configuration conf,
+			Path documentFrequencyPath) {
+		Map<Integer, Long> documentFrequency = new HashMap<Integer, Long>();
+		for (Pair<IntWritable, LongWritable> pair : new SequenceFileIterable<IntWritable, LongWritable>(
+				documentFrequencyPath, true, conf)) {
+			documentFrequency
+					.put(pair.getFirst().get(), pair.getSecond().get());
+		}
+		return documentFrequency;
+	}
+
+	public static void createModel(String inputTrainFilePath) throws Exception {
+
+		String[] args1 = new String[4];
+
+		args1[0] = "-i";
+		args1[1] = "outseq";
+		args1[2] = "-o";
+		args1[3] = "vectors";
+
+		String[] args2 = new String[9];
+
+		args2[0] = "-i";
+		args2[1] = "vectors/tfidf-vectors";
+		args2[2] = "-el";
+		args2[3] = "-li";
+		args2[4] = "labelindex";
+		args2[5] = "-o";
+		args2[6] = "model";
+		args2[7] = "-ow";
+		args2[8] = "-c";
+
+		convertToSeq(inputTrainFilePath, "outseq");
+
+		SparseVectorsFromSequenceFiles.main(args1);
+
+		TrainNaiveBayesJob.main(args2);
+	}
+
+	public static String classify(String text) throws IOException {
+		return classify(text, "model", "labelindex",
+				"vectors/dictionary.file-0", "vectors/df-count/part-r-00000");
+	}
+
+	public static String classify(String text, String modelPath,
+			String labelIndexPath, String dictionaryPath,
+			String documentFrequencyPath) throws IOException {
+
+		Configuration configuration = new Configuration();
+
+		// model is a matrix (wordId, labelId) => probability score
+		NaiveBayesModel model = NaiveBayesModel.materialize(
+				new Path(modelPath), configuration);
+
+		StandardNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(
+				model);
+
+		// labels is a map label => classId
+		Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration,
+				new Path(labelIndexPath));
+		Map<String, Integer> dictionary = readDictionnary(configuration,
+				new Path(dictionaryPath));
+		Map<Integer, Long> documentFrequency = readDocumentFrequency(
+				configuration, new Path(documentFrequencyPath));
+
+		// analyzer used to extract word from text
+		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
+		// int labelCount = labels.size();
+		int documentCount = documentFrequency.get(-1).intValue();
+
+		Multiset<String> words = ConcurrentHashMultiset.create();
+
+		// extract words from text
+		TokenStream ts = analyzer.tokenStream("text", new StringReader(text));
+		CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+		ts.reset();
+		int wordCount = 0;
+		while (ts.incrementToken()) {
+			if (termAtt.length() > 0) {
+				String word = ts.getAttribute(CharTermAttribute.class)
+						.toString();
+				Integer wordId = dictionary.get(word);
+				// if the word is not in the dictionary, skip it
+				if (wordId != null) {
+					words.add(word);
+					wordCount++;
+				}
+			}
+		}
+
+		ts.end();
+		ts.close();
+		// create vector wordId => weight using tfidf
+		Vector vector = new RandomAccessSparseVector(10000);
+		TFIDF tfidf = new TFIDF();
+		for (Multiset.Entry<String> entry : words.entrySet()) {
+			String word = entry.getElement();
+			int count = entry.getCount();
+			Integer wordId = dictionary.get(word);
+			Long freq = documentFrequency.get(wordId);
+			double tfIdfValue = tfidf.calculate(count, freq.intValue(),
+					wordCount, documentCount);
+			vector.setQuick(wordId, tfIdfValue);
+		}
+		// one score for each label
+
+		Vector resultVector = classifier.classifyFull(vector);
+		double bestScore = -Double.MAX_VALUE;
+		int bestCategoryId = -1;
+		for (Element element : resultVector.all()) {
+			int categoryId = element.index();
+			double score = element.get();
+			if (score > bestScore) {
+				bestScore = score;
+				bestCategoryId = categoryId;
+			}
+
+		}
+
+		analyzer.close();
+		return labels.get(bestCategoryId);
+
+	}
+
+	static void convertToSeq(String inputFileName, String outputDirName)
+			throws IOException {
+		Configuration configuration = new Configuration();
+		FileSystem fs = FileSystem.get(configuration);
+		Writer writer = new SequenceFile.Writer(fs, configuration, new Path(
+				outputDirName + "/chunk-0"), Text.class, Text.class);
+
+		BufferedReader reader = new BufferedReader(
+				new FileReader(inputFileName));
+		Text key = new Text();
+		Text value = new Text();
+		while (true) {
+			String line = reader.readLine();
+			if (line == null) {
+				break;
+			}
+			String[] tokens = line.split("\t", 3);
+			if (tokens.length != 3) {
+				// System.out.println("Skip line: " + line);
+				continue;
+			}
+			String category = tokens[0];
+			String id = tokens[1];
+			String message = tokens[2];
+			key.set("/" + category + "/" + id);
+			value.set(message);
+			writer.append(key, value);
+
+		}
+		reader.close();
+		writer.close();
+
+	}
+
+	public static void main(String args[]) throws Exception {
+
+		// createModel("data/tweets-train.tsv");
+
+		// example
+		// String result=classify("how are you doing here bro");
+
+		// System.out.println(result);
+	}
+}
diff --git a/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/package-info.java b/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/package-info.java
new file mode 100644
index 0000000000..a74d4bebad
--- /dev/null
+++ b/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/package-info.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL filter plugin to include only URLs which match an element in a given list of
+ * domain suffixes, domain names, and/or host names.
+ * See {@link org.apache.nutch.urlfilter.domainblacklist} for the counterpart
+ * (exclude URLs by host or domain).
+ */
+package org.apache.nutch.urlfilter.model;
+

From e243cc5e626106a4cd8dfca8d9c2ec93e9648560 Mon Sep 17 00:00:00 2001
From: Asitang Mishra <asitang@gmail.com>
Date: Wed, 17 Jun 2015 09:14:37 -0700
Subject: [PATCH 2/5] patch 1.0 for NUTCH-2038

---
 .classpath | 855 -----------------------------------------------------
 .project   |  17 --
 2 files changed, 872 deletions(-)
 delete mode 100644 .classpath
 delete mode 100644 .project

diff --git a/.classpath b/.classpath
deleted file mode 100644
index 51cf515586..0000000000
--- a/.classpath
+++ /dev/null
@@ -1,855 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<classpath>
-	<classpathentry kind="src" path="src/java"/>
-	<classpathentry kind="src" path="src/plugin/creativecommons/src/java"/>
-	<classpathentry kind="src" path="src/plugin/creativecommons/src/test"/>
-	<classpathentry kind="src" path="src/plugin/feed/src/java"/>
-	<classpathentry kind="src" path="src/plugin/feed/src/test"/>
-	<classpathentry kind="src" path="src/plugin/headings/src/java"/>
-	<classpathentry kind="src" path="src/plugin/index-anchor/src/java"/>
-	<classpathentry kind="src" path="src/plugin/index-anchor/src/test"/>
-	<classpathentry kind="src" path="src/plugin/index-basic/src/java"/>
-	<classpathentry kind="src" path="src/plugin/index-basic/src/test"/>
-	<classpathentry kind="src" path="src/plugin/index-geoip/src/java"/>
-	<classpathentry kind="src" path="src/plugin/index-metadata/src/java"/>
-	<classpathentry kind="src" path="src/plugin/index-more/src/java"/>
-	<classpathentry kind="src" path="src/plugin/index-more/src/test"/>
-	<classpathentry kind="src" path="src/plugin/index-static/src/java"/>
-	<classpathentry kind="src" path="src/plugin/index-static/src/test"/>
-	<classpathentry kind="src" path="src/plugin/indexer-dummy/src/java"/>
-	<classpathentry kind="src" path="src/plugin/indexer-elastic/src/java"/>
-	<classpathentry kind="src" path="src/plugin/indexer-solr/src/java"/>
-	<classpathentry kind="src" path="src/plugin/language-identifier/src/java"/>
-	<classpathentry kind="src" path="src/plugin/language-identifier/src/test"/>
-	<classpathentry kind="src" path="src/plugin/lib-http/src/java"/>
-	<classpathentry kind="src" path="src/plugin/lib-http/src/test"/>
-	<classpathentry kind="src" path="src/plugin/lib-regex-filter/src/java"/>
-	<classpathentry kind="src" path="src/plugin/lib-regex-filter/src/test"/>
-	<classpathentry kind="src" path="src/plugin/lib-selenium/src/java"/>
-	<classpathentry kind="src" path="src/plugin/microformats-reltag/src/java"/>
-	<classpathentry kind="src" path="src/plugin/mimetype-filter/src/java"/>
-	<classpathentry kind="src" path="src/plugin/mimetype-filter/src/test"/>
-	<classpathentry kind="src" path="src/plugin/parse-ext/src/java"/>
-	<classpathentry kind="src" path="src/plugin/parse-ext/src/test"/>
-	<classpathentry kind="src" path="src/plugin/parse-html/src/java"/>
-	<classpathentry kind="src" path="src/plugin/parse-html/src/test"/>
-	<classpathentry kind="src" path="src/plugin/parse-js/src/java"/>
-	<classpathentry kind="src" path="src/plugin/parse-metatags/src/java"/>
-	<classpathentry kind="src" path="src/plugin/parse-metatags/src/test"/>
-	<classpathentry kind="src" path="src/plugin/parse-swf/src/java"/>
-	<classpathentry kind="src" path="src/plugin/parse-swf/src/test"/>
-	<classpathentry kind="src" path="src/plugin/parse-tika/src/java"/>
-	<classpathentry kind="src" path="src/plugin/parse-tika/src/test"/>
-	<classpathentry kind="src" path="src/plugin/parse-zip/src/java"/>
-	<classpathentry kind="src" path="src/plugin/parse-zip/src/test"/>
-	<classpathentry kind="src" path="src/plugin/protocol-file/src/java"/>
-	<classpathentry kind="src" path="src/plugin/protocol-file/src/test"/>
-	<classpathentry kind="src" path="src/plugin/protocol-ftp/src/java"/>
-	<classpathentry kind="src" path="src/plugin/protocol-http/src/java"/>
-	<classpathentry kind="src" path="src/plugin/protocol-http/src/test"/>
-	<classpathentry kind="src" path="src/plugin/protocol-httpclient/src/java"/>
-	<classpathentry kind="src" path="src/plugin/protocol-httpclient/src/test"/>
-	<classpathentry kind="src" path="src/plugin/protocol-selenium/src/java"/>
-	<classpathentry kind="src" path="src/plugin/scoring-depth/src/java"/>
-	<classpathentry kind="src" path="src/plugin/scoring-link/src/java"/>
-	<classpathentry kind="src" path="src/plugin/scoring-opic/src/java"/>
-	<classpathentry kind="src" path="src/plugin/subcollection/src/java"/>
-	<classpathentry kind="src" path="src/plugin/subcollection/src/test"/>
-	<classpathentry kind="src" path="src/plugin/tld/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-automaton/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-automaton/src/test"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-domain/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-domain/src/test"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-domainblacklist/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-domainblacklist/src/test"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-model/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-prefix/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-prefix/src/test"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-regex/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-regex/src/test"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-suffix/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-suffix/src/test"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-validator/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlfilter-validator/src/test"/>
-	<classpathentry kind="src" path="src/plugin/urlmeta/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-ajax/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-ajax/src/test"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-basic/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-basic/src/test"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-host/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-host/src/test"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-pass/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-pass/src/test"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-querystring/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-querystring/src/test"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-regex/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-regex/src/test"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-slash/src/java"/>
-	<classpathentry kind="src" path="src/plugin/urlnormalizer-slash/src/test"/>
-	<classpathentry kind="src" path="src/test"/>
-	<classpathentry kind="lib" path="build/apache-nutch-1.11-SNAPSHOT.jar"/>
-	<classpathentry kind="lib" path="build/creativecommons/creativecommons.jar"/>
-	<classpathentry kind="lib" path="build/feed/feed.jar"/>
-	<classpathentry kind="lib" path="build/headings/headings.jar"/>
-	<classpathentry kind="lib" path="build/index-anchor/index-anchor.jar"/>
-	<classpathentry kind="lib" path="build/index-basic/index-basic.jar"/>
-	<classpathentry kind="lib" path="build/index-geoip/index-geoip.jar"/>
-	<classpathentry kind="lib" path="build/index-metadata/index-metadata.jar"/>
-	<classpathentry kind="lib" path="build/index-more/index-more.jar"/>
-	<classpathentry kind="lib" path="build/index-static/index-static.jar"/>
-	<classpathentry kind="lib" path="build/indexer-dummy/indexer-dummy.jar"/>
-	<classpathentry kind="lib" path="build/indexer-elastic/indexer-elastic.jar"/>
-	<classpathentry kind="lib" path="build/indexer-solr/indexer-solr.jar"/>
-	<classpathentry kind="lib" path="build/language-identifier/language-identifier.jar"/>
-	<classpathentry kind="lib" path="build/lib-http/lib-http.jar"/>
-	<classpathentry kind="lib" path="build/lib-regex-filter/lib-regex-filter.jar"/>
-	<classpathentry kind="lib" path="build/lib-selenium/lib-selenium.jar"/>
-	<classpathentry kind="lib" path="build/lib-xml/lib-xml.jar"/>
-	<classpathentry kind="lib" path="build/lib/activation-1.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/asm-3.3.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-beanutils-1.7.0.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-beanutils-core-1.8.0.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-cli-1.2.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-cli-2.0-mahout.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-codec-1.6.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-collections-3.2.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-compress-1.9.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-configuration-1.6.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-digester-1.8.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-el-1.0.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-httpclient-3.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-io-2.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-lang-2.6.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-lang3-3.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-logging-1.1.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-math-2.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-math3-3.2.jar"/>
-	<classpathentry kind="lib" path="build/lib/commons-net-1.4.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/crawler-commons-0.5.jar"/>
-	<classpathentry kind="lib" path="build/lib/cxf-core-3.0.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/cxf-rt-bindings-soap-3.0.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/cxf-rt-bindings-xml-3.0.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/cxf-rt-databinding-jaxb-3.0.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/cxf-rt-frontend-jaxrs-3.0.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/cxf-rt-frontend-jaxws-3.0.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/cxf-rt-frontend-simple-3.0.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/cxf-rt-transports-http-3.0.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/cxf-rt-transports-http-jetty-3.0.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/cxf-rt-ws-addr-3.0.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/cxf-rt-ws-policy-3.0.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/cxf-rt-wsdl-3.0.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/geronimo-servlet_3.0_spec-1.0.jar"/>
-	<classpathentry kind="lib" path="build/lib/guava-14.0.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/hadoop-core-1.2.0.jar"/>
-	<classpathentry kind="lib" path="build/lib/httpclient-4.2.6.jar"/>
-	<classpathentry kind="lib" path="build/lib/httpcore-4.2.5.jar"/>
-	<classpathentry kind="lib" path="build/lib/icu4j-55.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/jackson-annotations-2.5.0.jar"/>
-	<classpathentry kind="lib" path="build/lib/jackson-core-2.5.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/jackson-core-asl-1.9.12.jar"/>
-	<classpathentry kind="lib" path="build/lib/jackson-databind-2.5.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/jackson-dataformat-cbor-2.5.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/jackson-jaxrs-1.7.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/jackson-jaxrs-base-2.5.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/jackson-jaxrs-json-provider-2.5.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/jackson-mapper-asl-1.9.12.jar"/>
-	<classpathentry kind="lib" path="build/lib/jackson-module-jaxb-annotations-2.5.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/jackson-xc-1.7.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/jasper-compiler-5.5.12.jar"/>
-	<classpathentry kind="lib" path="build/lib/jasper-runtime-5.5.12.jar"/>
-	<classpathentry kind="lib" path="build/lib/javax.annotation-api-1.2.jar"/>
-	<classpathentry kind="lib" path="build/lib/javax.ws.rs-api-2.0.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/jaxb-api-2.2.2.jar"/>
-	<classpathentry kind="lib" path="build/lib/jaxb-core-2.1.14.jar"/>
-	<classpathentry kind="lib" path="build/lib/jaxb-impl-2.2.3-1.jar"/>
-	<classpathentry kind="lib" path="build/lib/jersey-core-1.8.jar"/>
-	<classpathentry kind="lib" path="build/lib/jersey-json-1.8.jar"/>
-	<classpathentry kind="lib" path="build/lib/jersey-server-1.8.jar"/>
-	<classpathentry kind="lib" path="build/lib/jettison-1.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/jetty-6.1.26.jar"/>
-	<classpathentry kind="lib" path="build/lib/jetty-continuation-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/lib/jetty-http-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/lib/jetty-io-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/lib/jetty-security-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/lib/jetty-server-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/lib/jetty-util-6.1.26.jar"/>
-	<classpathentry kind="lib" path="build/lib/jetty-util-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/lib/jsp-2.1-6.1.14.jar"/>
-	<classpathentry kind="lib" path="build/lib/jsp-api-2.1-6.1.14.jar"/>
-	<classpathentry kind="lib" path="build/lib/log4j-1.2.15.jar"/>
-	<classpathentry kind="lib" path="build/lib/lucene-analyzers-common-4.3.0.jar"/>
-	<classpathentry kind="lib" path="build/lib/lucene-core-4.3.0.jar"/>
-	<classpathentry kind="lib" path="build/lib/mahout-core-0.8.jar" sourcepath="/Users/asitangmishra/.m2/repository/org/apache/mahout/mahout-core/0.8/mahout-core-0.8-sources.jar"/>
-	<classpathentry kind="lib" path="build/lib/mahout-math-0.8.jar"/>
-	<classpathentry kind="lib" path="build/lib/neethi-3.0.3.jar"/>
-	<classpathentry kind="lib" path="build/lib/oro-2.0.8.jar"/>
-	<classpathentry kind="lib" path="build/lib/servlet-api-2.5-20081211.jar"/>
-	<classpathentry kind="lib" path="build/lib/servlet-api-2.5-6.1.14.jar"/>
-	<classpathentry kind="lib" path="build/lib/slf4j-api-1.7.9.jar"/>
-	<classpathentry kind="lib" path="build/lib/slf4j-log4j12-1.6.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/stax-api-1.0-2.jar"/>
-	<classpathentry kind="lib" path="build/lib/stax-api-1.0.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/stax2-api-3.1.4.jar"/>
-	<classpathentry kind="lib" path="build/lib/tika-core-1.8.jar"/>
-	<classpathentry kind="lib" path="build/lib/woodstox-core-asl-4.4.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/wsdl4j-1.6.3.jar"/>
-	<classpathentry kind="lib" path="build/lib/xercesImpl-2.9.1.jar"/>
-	<classpathentry kind="lib" path="build/lib/xml-apis-1.3.04.jar"/>
-	<classpathentry kind="lib" path="build/lib/xml-resolver-1.2.jar"/>
-	<classpathentry kind="lib" path="build/lib/xmlenc-0.52.jar"/>
-	<classpathentry kind="lib" path="build/lib/xmlParserAPIs-2.6.2.jar"/>
-	<classpathentry kind="lib" path="build/lib/xmlschema-core-2.2.1.jar"/>
-	<classpathentry kind="lib" path="build/microformats-reltag/microformats-reltag.jar"/>
-	<classpathentry kind="lib" path="build/mimetype-filter/mimetype-filter.jar"/>
-	<classpathentry kind="lib" path="build/nutch-extensionpoints/nutch-extensionpoints.jar"/>
-	<classpathentry kind="lib" path="build/parse-ext/parse-ext.jar"/>
-	<classpathentry kind="lib" path="build/parse-html/parse-html.jar"/>
-	<classpathentry kind="lib" path="build/parse-js/parse-js.jar"/>
-	<classpathentry kind="lib" path="build/parse-metatags/parse-metatags.jar"/>
-	<classpathentry kind="lib" path="build/parse-swf/parse-swf.jar"/>
-	<classpathentry kind="lib" path="build/parse-tika/parse-tika.jar"/>
-	<classpathentry kind="lib" path="build/parse-zip/parse-zip.jar"/>
-	<classpathentry kind="lib" path="build/plugins/creativecommons/creativecommons.jar"/>
-	<classpathentry kind="lib" path="build/plugins/feed/feed.jar"/>
-	<classpathentry kind="lib" path="build/plugins/feed/jdom-1.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/feed/rome-0.9.jar"/>
-	<classpathentry kind="lib" path="build/plugins/headings/headings.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-anchor/index-anchor.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-basic/index-basic.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-geoip/commons-codec-1.6.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-geoip/commons-logging-1.1.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-geoip/geoip2-2.1.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-geoip/google-http-client-1.19.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-geoip/httpclient-4.0.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-geoip/httpcore-4.0.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-geoip/index-geoip.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-geoip/jackson-annotations-2.4.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-geoip/jackson-core-2.4.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-geoip/jackson-databind-2.4.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-geoip/jsr305-1.3.9.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-geoip/maxmind-db-1.0.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-metadata/index-metadata.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-more/index-more.jar"/>
-	<classpathentry kind="lib" path="build/plugins/index-static/index-static.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-dummy/indexer-dummy.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/antlr-runtime-3.5.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/asm-4.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/asm-commons-4.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/elasticsearch-1.4.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/indexer-elastic.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-analyzers-common-4.10.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-core-4.10.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-grouping-4.10.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-highlighter-4.10.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-join-4.10.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-memory-4.10.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-misc-4.10.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-queries-4.10.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-queryparser-4.10.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-sandbox-4.10.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-spatial-4.10.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/lucene-suggest-4.10.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-elastic/spatial4j-0.4.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/activation-1.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/commons-codec-1.4.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/commons-httpclient-3.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/commons-io-1.4.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/commons-logging-1.1.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/geronimo-stax-api_1.0_spec-1.0.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/indexer-solr.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/jline-0.9.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/log4j-1.2.15.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/lucene-core-3.4.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/mail-1.4.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/slf4j-api-1.6.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/solr-solrj-3.4.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/stax-api-1.0.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/wstx-asl-3.2.7.jar"/>
-	<classpathentry kind="lib" path="build/plugins/indexer-solr/zookeeper-3.3.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/language-identifier/language-identifier.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-http/lib-http.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-nekohtml/nekohtml-1.9.19.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-regex-filter/lib-regex-filter.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/cglib-nodep-2.1_3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-codec-1.9.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-collections-3.2.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-exec-1.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-io-2.4.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-jxpath-1.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-lang3-3.3.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/commons-logging-1.1.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/cssparser-0.9.14.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/gson-2.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/guava-18.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/htmlunit-2.15.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/htmlunit-core-js-2.15.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/httpclient-4.3.4.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/httpcore-4.3.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/httpmime-4.3.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/ini4j-0.5.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/jetty-http-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/jetty-io-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/jetty-util-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/jetty-websocket-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/jna-3.4.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/lib-selenium.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/nekohtml-1.9.21.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/netty-3.5.2.Final.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/operadriver-1.5.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/operalaunchers-1.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/platform-3.4.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/protobuf-java-2.4.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/sac-1.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-api-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-chrome-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-firefox-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-htmlunit-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-ie-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-java-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-remote-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-safari-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/selenium-support-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/serializer-2.7.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/webbit-0.4.14.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/xalan-2.7.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/xercesImpl-2.11.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-selenium/xml-apis-1.4.01.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-xml/jaxen-1.1.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-xml/jdom-1.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-xml/lib-xml.jar"/>
-	<classpathentry kind="lib" path="build/plugins/lib-xml/xercesImpl-2.9.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/microformats-reltag/microformats-reltag.jar"/>
-	<classpathentry kind="lib" path="build/plugins/mimetype-filter/mimetype-filter.jar"/>
-	<classpathentry kind="lib" path="build/plugins/nutch-extensionpoints/nutch-extensionpoints.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-ext/parse-ext.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-html/parse-html.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-html/tagsoup-1.2.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-js/parse-js.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-metatags/parse-metatags.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-swf/javaswf.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-swf/parse-swf.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/apache-mime4j-core-0.7.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/apache-mime4j-dom-0.7.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/asm-debug-all-4.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/aspectjrt-1.8.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/bcmail-jdk15on-1.52.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/bcpkix-jdk15on-1.52.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/bcprov-jdk15on-1.52.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/boilerpipe-1.1.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/bzip2-0.9.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/c3p0-0.9.1.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/cdm-4.5.5.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/commons-codec-1.6.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/commons-compress-1.9.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/commons-csv-1.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/commons-logging-1.1.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/commons-logging-api-1.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/commons-vfs2-2.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/ehcache-core-2.6.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/fontbox-1.8.9.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/grib-4.5.5.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/guava-10.0.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/httpclient-4.2.6.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/httpcore-4.2.5.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/httpmime-4.2.6.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/httpservices-4.5.5.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/isoparser-1.0.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/java-libpst-0.8.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/jcip-annotations-1.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/jcommander-1.35.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/jdom-1.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/jdom2-2.0.4.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/jempbox-1.8.9.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/jhighlight-1.0.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/jj2000-5.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/jmatio-1.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/jna-4.1.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/joda-time-2.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/jsoup-1.7.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/jsr305-1.3.9.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/juniversalchardet-1.0.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/junrar-0.7.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/maven-scm-api-1.4.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/maven-scm-provider-svn-commons-1.4.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/maven-scm-provider-svnexe-1.4.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/metadata-extractor-2.8.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/netcdf4-4.5.5.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/parse-tika.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/pdfbox-1.8.9.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/plexus-utils-1.5.6.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/poi-3.12-beta1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/poi-ooxml-3.12-beta1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/poi-ooxml-schemas-3.12-beta1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/poi-scratchpad-3.12-beta1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/protobuf-java-2.5.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/quartz-2.2.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/regexp-1.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/rome-0.9.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/slf4j-api-1.7.12.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/sqlite-jdbc-3.8.6.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/tagsoup-1.2.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/tika-parsers-1.8.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/udunits-4.5.5.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/vorbis-java-core-0.6.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/vorbis-java-tika-0.6.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/xmlbeans-2.6.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/xmpcore-5.1.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-tika/xz-1.5.jar"/>
-	<classpathentry kind="lib" path="build/plugins/parse-zip/parse-zip.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-file/protocol-file.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-ftp/commons-net-1.2.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-ftp/protocol-ftp.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-http/protocol-http.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-httpclient/jsoup-1.8.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-httpclient/protocol-httpclient.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/cglib-nodep-2.1_3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-codec-1.9.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-collections-3.2.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-exec-1.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-io-2.4.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-jxpath-1.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-lang3-3.3.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/commons-logging-1.1.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/cssparser-0.9.14.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/gson-2.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/guava-18.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/htmlunit-2.15.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/htmlunit-core-js-2.15.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/httpclient-4.3.4.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/httpcore-4.3.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/httpmime-4.3.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/ini4j-0.5.2.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/jetty-http-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/jetty-io-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/jetty-util-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/jetty-websocket-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/jna-3.4.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/nekohtml-1.9.21.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/netty-3.5.2.Final.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/operadriver-1.5.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/operalaunchers-1.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/platform-3.4.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/protobuf-java-2.4.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/protocol-selenium.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/sac-1.3.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-api-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-chrome-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-firefox-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-htmlunit-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-ie-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-java-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-remote-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-safari-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/selenium-support-2.44.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/serializer-2.7.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/webbit-0.4.14.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/xalan-2.7.1.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/xercesImpl-2.11.0.jar"/>
-	<classpathentry kind="lib" path="build/plugins/protocol-selenium/xml-apis-1.4.01.jar"/>
-	<classpathentry kind="lib" path="build/plugins/scoring-depth/scoring-depth.jar"/>
-	<classpathentry kind="lib" path="build/plugins/scoring-link/scoring-link.jar"/>
-	<classpathentry kind="lib" path="build/plugins/scoring-opic/scoring-opic.jar"/>
-	<classpathentry kind="lib" path="build/plugins/subcollection/subcollection.jar"/>
-	<classpathentry kind="lib" path="build/plugins/tld/tld.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlfilter-automaton/automaton-1.11-8.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlfilter-automaton/urlfilter-automaton.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlfilter-domain/urlfilter-domain.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlfilter-domainblacklist/urlfilter-domainblacklist.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlfilter-model/urlfilter-model.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlfilter-prefix/urlfilter-prefix.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlfilter-regex/urlfilter-regex.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlfilter-suffix/urlfilter-suffix.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlfilter-validator/urlfilter-validator.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlmeta/urlmeta.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlnormalizer-ajax/urlnormalizer-ajax.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlnormalizer-basic/urlnormalizer-basic.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlnormalizer-host/urlnormalizer-host.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlnormalizer-pass/urlnormalizer-pass.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlnormalizer-querystring/urlnormalizer-querystring.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlnormalizer-regex/urlnormalizer-regex.jar"/>
-	<classpathentry kind="lib" path="build/plugins/urlnormalizer-slash/urlnormalizer-slash.jar"/>
-	<classpathentry kind="lib" path="build/protocol-file/protocol-file.jar"/>
-	<classpathentry kind="lib" path="build/protocol-ftp/protocol-ftp.jar"/>
-	<classpathentry kind="lib" path="build/protocol-http/protocol-http.jar"/>
-	<classpathentry kind="lib" path="build/protocol-httpclient/protocol-httpclient.jar"/>
-	<classpathentry kind="lib" path="build/protocol-selenium/protocol-selenium.jar"/>
-	<classpathentry kind="lib" path="build/scoring-depth/scoring-depth.jar"/>
-	<classpathentry kind="lib" path="build/scoring-link/scoring-link.jar"/>
-	<classpathentry kind="lib" path="build/scoring-opic/scoring-opic.jar"/>
-	<classpathentry kind="lib" path="build/subcollection/subcollection.jar"/>
-	<classpathentry kind="lib" path="build/tld/tld.jar"/>
-	<classpathentry kind="lib" path="build/urlfilter-automaton/urlfilter-automaton.jar"/>
-	<classpathentry kind="lib" path="build/urlfilter-domain/urlfilter-domain.jar"/>
-	<classpathentry kind="lib" path="build/urlfilter-domainblacklist/urlfilter-domainblacklist.jar"/>
-	<classpathentry kind="lib" path="build/urlfilter-model/urlfilter-model.jar"/>
-	<classpathentry kind="lib" path="build/urlfilter-prefix/urlfilter-prefix.jar"/>
-	<classpathentry kind="lib" path="build/urlfilter-regex/urlfilter-regex.jar"/>
-	<classpathentry kind="lib" path="build/urlfilter-suffix/urlfilter-suffix.jar"/>
-	<classpathentry kind="lib" path="build/urlfilter-validator/urlfilter-validator.jar"/>
-	<classpathentry kind="lib" path="build/urlmeta/urlmeta.jar"/>
-	<classpathentry kind="lib" path="build/urlnormalizer-ajax/urlnormalizer-ajax.jar"/>
-	<classpathentry kind="lib" path="build/urlnormalizer-basic/urlnormalizer-basic.jar"/>
-	<classpathentry kind="lib" path="build/urlnormalizer-host/urlnormalizer-host.jar"/>
-	<classpathentry kind="lib" path="build/urlnormalizer-pass/urlnormalizer-pass.jar"/>
-	<classpathentry kind="lib" path="build/urlnormalizer-querystring/urlnormalizer-querystring.jar"/>
-	<classpathentry kind="lib" path="build/urlnormalizer-regex/urlnormalizer-regex.jar"/>
-	<classpathentry kind="lib" path="build/urlnormalizer-slash/urlnormalizer-slash.jar"/>
-	<classpathentry kind="lib" path="ivy/ivy-2.2.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/activation-1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/apache-nutch-1.11-SNAPSHOT.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/asm-3.3.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-beanutils-1.7.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-beanutils-core-1.8.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-cli-1.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-cli-2.0-mahout.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-codec-1.6.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-collections-3.2.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-compress-1.9.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-configuration-1.6.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-digester-1.8.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-el-1.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-httpclient-3.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-io-2.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-lang-2.6.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-lang3-3.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-logging-1.1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-math-2.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-math3-3.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/commons-net-1.4.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/crawler-commons-0.5.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/cxf-core-3.0.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-bindings-soap-3.0.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-bindings-xml-3.0.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-databinding-jaxb-3.0.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-frontend-jaxrs-3.0.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-frontend-jaxws-3.0.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-frontend-simple-3.0.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-transports-http-3.0.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-transports-http-jetty-3.0.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-ws-addr-3.0.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-ws-policy-3.0.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/cxf-rt-wsdl-3.0.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/geronimo-servlet_3.0_spec-1.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/guava-14.0.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/hadoop-core-1.2.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/httpclient-4.2.6.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/httpcore-4.2.5.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/icu4j-55.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jackson-annotations-2.5.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jackson-core-2.5.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jackson-core-asl-1.9.12.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jackson-databind-2.5.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jackson-dataformat-cbor-2.5.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jackson-jaxrs-1.7.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jackson-jaxrs-base-2.5.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jackson-jaxrs-json-provider-2.5.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jackson-mapper-asl-1.9.12.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jackson-module-jaxb-annotations-2.5.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jackson-xc-1.7.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jasper-compiler-5.5.12.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jasper-runtime-5.5.12.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/javax.annotation-api-1.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/javax.ws.rs-api-2.0.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jaxb-api-2.2.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jaxb-core-2.1.14.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jaxb-impl-2.2.3-1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jersey-core-1.8.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jersey-json-1.8.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jersey-server-1.8.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jettison-1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jetty-6.1.26.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jetty-continuation-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jetty-http-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jetty-io-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jetty-security-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jetty-server-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jetty-util-6.1.26.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jetty-util-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jsp-2.1-6.1.14.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/jsp-api-2.1-6.1.14.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/log4j-1.2.15.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/lucene-analyzers-common-4.3.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/lucene-core-4.3.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/mahout-core-0.8.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/mahout-math-0.8.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/neethi-3.0.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/oro-2.0.8.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/servlet-api-2.5-20081211.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/servlet-api-2.5-6.1.14.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/slf4j-api-1.7.9.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/slf4j-log4j12-1.6.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/stax-api-1.0-2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/stax-api-1.0.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/stax2-api-3.1.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/tika-core-1.8.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/woodstox-core-asl-4.4.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/wsdl4j-1.6.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/xercesImpl-2.9.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/xml-apis-1.3.04.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/xml-resolver-1.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/xmlenc-0.52.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/xmlParserAPIs-2.6.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/lib/xmlschema-core-2.2.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/creativecommons/creativecommons.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/feed/feed.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/feed/jdom-1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/feed/rome-0.9.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/headings/headings.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-anchor/index-anchor.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-basic/index-basic.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/commons-codec-1.6.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/commons-logging-1.1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/geoip2-2.1.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/google-http-client-1.19.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/httpclient-4.0.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/httpcore-4.0.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/index-geoip.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/jackson-annotations-2.4.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/jackson-core-2.4.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/jackson-databind-2.4.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/jsr305-1.3.9.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-geoip/maxmind-db-1.0.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-metadata/index-metadata.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-more/index-more.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/index-static/index-static.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-dummy/indexer-dummy.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/antlr-runtime-3.5.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/asm-4.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/asm-commons-4.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/elasticsearch-1.4.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/indexer-elastic.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-analyzers-common-4.10.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-core-4.10.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-grouping-4.10.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-highlighter-4.10.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-join-4.10.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-memory-4.10.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-misc-4.10.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-queries-4.10.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-queryparser-4.10.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-sandbox-4.10.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-spatial-4.10.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/lucene-suggest-4.10.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-elastic/spatial4j-0.4.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/activation-1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/commons-codec-1.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/commons-httpclient-3.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/commons-io-1.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/commons-logging-1.1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/geronimo-stax-api_1.0_spec-1.0.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/indexer-solr.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/jline-0.9.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/log4j-1.2.15.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/lucene-core-3.4.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/mail-1.4.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/slf4j-api-1.6.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/solr-solrj-3.4.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/stax-api-1.0.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/wstx-asl-3.2.7.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/indexer-solr/zookeeper-3.3.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/language-identifier/language-identifier.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-http/lib-http.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-nekohtml/nekohtml-1.9.19.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-regex-filter/lib-regex-filter.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/cglib-nodep-2.1_3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-codec-1.9.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-collections-3.2.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-exec-1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-io-2.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-jxpath-1.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-lang3-3.3.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/commons-logging-1.1.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/cssparser-0.9.14.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/gson-2.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/guava-18.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/htmlunit-2.15.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/htmlunit-core-js-2.15.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/httpclient-4.3.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/httpcore-4.3.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/httpmime-4.3.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/ini4j-0.5.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/jetty-http-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/jetty-io-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/jetty-util-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/jetty-websocket-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/jna-3.4.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/lib-selenium.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/nekohtml-1.9.21.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/netty-3.5.2.Final.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/operadriver-1.5.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/operalaunchers-1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/platform-3.4.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/protobuf-java-2.4.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/sac-1.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-api-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-chrome-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-firefox-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-htmlunit-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-ie-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-java-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-remote-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-safari-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/selenium-support-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/serializer-2.7.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/webbit-0.4.14.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/xalan-2.7.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/xercesImpl-2.11.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-selenium/xml-apis-1.4.01.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-xml/jaxen-1.1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-xml/jdom-1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-xml/lib-xml.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/lib-xml/xercesImpl-2.9.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/microformats-reltag/microformats-reltag.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/mimetype-filter/mimetype-filter.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/nutch-extensionpoints/nutch-extensionpoints.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-ext/parse-ext.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-html/parse-html.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-html/tagsoup-1.2.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-js/parse-js.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-metatags/parse-metatags.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-swf/javaswf.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-swf/parse-swf.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/apache-mime4j-core-0.7.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/apache-mime4j-dom-0.7.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/asm-debug-all-4.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/aspectjrt-1.8.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/bcmail-jdk15on-1.52.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/bcpkix-jdk15on-1.52.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/bcprov-jdk15on-1.52.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/boilerpipe-1.1.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/bzip2-0.9.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/c3p0-0.9.1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/cdm-4.5.5.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/commons-codec-1.6.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/commons-compress-1.9.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/commons-csv-1.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/commons-logging-1.1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/commons-logging-api-1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/commons-vfs2-2.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/ehcache-core-2.6.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/fontbox-1.8.9.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/grib-4.5.5.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/guava-10.0.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/httpclient-4.2.6.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/httpcore-4.2.5.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/httpmime-4.2.6.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/httpservices-4.5.5.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/isoparser-1.0.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/java-libpst-0.8.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jcip-annotations-1.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jcommander-1.35.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jdom-1.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jdom2-2.0.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jempbox-1.8.9.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jhighlight-1.0.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jj2000-5.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jmatio-1.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jna-4.1.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/joda-time-2.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jsoup-1.7.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/jsr305-1.3.9.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/juniversalchardet-1.0.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/junrar-0.7.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/maven-scm-api-1.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/maven-scm-provider-svn-commons-1.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/maven-scm-provider-svnexe-1.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/metadata-extractor-2.8.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/netcdf4-4.5.5.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/parse-tika.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/pdfbox-1.8.9.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/plexus-utils-1.5.6.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/poi-3.12-beta1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/poi-ooxml-3.12-beta1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/poi-ooxml-schemas-3.12-beta1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/poi-scratchpad-3.12-beta1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/protobuf-java-2.5.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/quartz-2.2.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/regexp-1.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/rome-0.9.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/slf4j-api-1.7.12.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/sqlite-jdbc-3.8.6.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/tagsoup-1.2.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/tika-parsers-1.8.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/udunits-4.5.5.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/vorbis-java-core-0.6.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/vorbis-java-tika-0.6.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/xmlbeans-2.6.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/xmpcore-5.1.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-tika/xz-1.5.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/parse-zip/parse-zip.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-file/protocol-file.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-ftp/commons-net-1.2.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-ftp/protocol-ftp.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-http/protocol-http.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-httpclient/jsoup-1.8.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-httpclient/protocol-httpclient.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/cglib-nodep-2.1_3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-codec-1.9.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-collections-3.2.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-exec-1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-io-2.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-jxpath-1.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-lang3-3.3.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/commons-logging-1.1.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/cssparser-0.9.14.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/gson-2.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/guava-18.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/htmlunit-2.15.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/htmlunit-core-js-2.15.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/httpclient-4.3.4.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/httpcore-4.3.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/httpmime-4.3.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/ini4j-0.5.2.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/jetty-http-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/jetty-io-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/jetty-util-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/jetty-websocket-8.1.15.v20140411.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/jna-3.4.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/nekohtml-1.9.21.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/netty-3.5.2.Final.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/operadriver-1.5.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/operalaunchers-1.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/platform-3.4.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/protobuf-java-2.4.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/protocol-selenium.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/sac-1.3.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-api-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-chrome-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-firefox-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-htmlunit-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-ie-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-java-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-remote-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-safari-driver-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/selenium-support-2.44.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/serializer-2.7.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/webbit-0.4.14.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/xalan-2.7.1.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/xercesImpl-2.11.0.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/protocol-selenium/xml-apis-1.4.01.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/scoring-depth/scoring-depth.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/scoring-link/scoring-link.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/scoring-opic/scoring-opic.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/subcollection/subcollection.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/tld/tld.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-automaton/automaton-1.11-8.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-automaton/urlfilter-automaton.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-domain/urlfilter-domain.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-domainblacklist/urlfilter-domainblacklist.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-model/urlfilter-model.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-prefix/urlfilter-prefix.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-regex/urlfilter-regex.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-suffix/urlfilter-suffix.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlfilter-validator/urlfilter-validator.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlmeta/urlmeta.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-ajax/urlnormalizer-ajax.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-basic/urlnormalizer-basic.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-host/urlnormalizer-host.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-pass/urlnormalizer-pass.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-querystring/urlnormalizer-querystring.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-regex/urlnormalizer-regex.jar"/>
-	<classpathentry kind="lib" path="runtime/local/plugins/urlnormalizer-slash/urlnormalizer-slash.jar"/>
-	<classpathentry kind="lib" path="src/plugin/parse-swf/lib/javaswf.jar"/>
-	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
-	<classpathentry kind="output" path="build/urlnormalizer-slash/classes"/>
-</classpath>
diff --git a/.project b/.project
deleted file mode 100644
index 0978facd4c..0000000000
--- a/.project
+++ /dev/null
@@ -1,17 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<projectDescription>
-	<name>NUTCH-CLONE</name>
-	<comment></comment>
-	<projects>
-	</projects>
-	<buildSpec>
-		<buildCommand>
-			<name>org.eclipse.jdt.core.javabuilder</name>
-			<arguments>
-			</arguments>
-		</buildCommand>
-	</buildSpec>
-	<natures>
-		<nature>org.eclipse.jdt.core.javanature</nature>
-	</natures>
-</projectDescription>

From 711f44d8d4af51538ff1764145ac743445b6f43b Mon Sep 17 00:00:00 2001
From: Asitang Mishra <asitang@gmail.com>
Date: Wed, 17 Jun 2015 09:35:28 -0700
Subject: [PATCH 3/5] patch 1.0 for NUTCH-2038

---
 src/java/org/apache/nutch/net/URLFilters.java | 56 +++++++++----------
 1 file changed, 27 insertions(+), 29 deletions(-)

diff --git a/src/java/org/apache/nutch/net/URLFilters.java b/src/java/org/apache/nutch/net/URLFilters.java
index 7d793ffb9b..9ed436978a 100644
--- a/src/java/org/apache/nutch/net/URLFilters.java
+++ b/src/java/org/apache/nutch/net/URLFilters.java
@@ -23,44 +23,42 @@
 /** Creates and caches {@link URLFilter} implementing plugins. */
 public class URLFilters {
 
-	public static final String URLFILTER_ORDER = "urlfilter.order";
-	private URLFilter[] filters;
-	private URLFilter filter = null;
+  public static final String URLFILTER_ORDER = "urlfilter.order";
+  private URLFilter[] filters;
 
-	public URLFilters(Configuration conf) {
-		this.filters = (URLFilter[]) PluginRepository.get(conf)
-				.getOrderedPlugins(URLFilter.class, URLFilter.X_POINT_ID,
-						URLFILTER_ORDER);
-	}
+  public URLFilters(Configuration conf) {
+    this.filters = (URLFilter[]) PluginRepository.get(conf).getOrderedPlugins(
+        URLFilter.class, URLFilter.X_POINT_ID, URLFILTER_ORDER);
+  }
 
-	/** Run all defined filters. Assume logical AND. */
-	public String filter(String urlString) throws URLFilterException {
-		for (int i = 0; i < this.filters.length; i++) {
-			if (urlString == null)
-				return null;
-			urlString = this.filters[i].filter(urlString);
+  /** Run all defined filters. Assume logical AND. */
+  public String filter(String urlString) throws URLFilterException {
+    for (int i = 0; i < this.filters.length; i++) {
+      if (urlString == null)
+        return null;
+      urlString = this.filters[i].filter(urlString);
 
-		}
-		return urlString;
-	}
+    }
+    return urlString;
+  }
+/**Get a filter with the full classname if only it is activated through the nutchsite.xml*/
+  public URLFilter getFilter(String pid) {
 
-	/**Get a filter with the full classname if only it is activated through the nutchsite.xml*/
-	public URLFilter getFilter(String pid) {
+    if (filter == null) {
 
-		if (filter == null) {
+      for (int i = 0; i < this.filters.length; i++) {
 
-			for (int i = 0; i < this.filters.length; i++) {
+        if (filters[i].getClass().getName().equals(pid)) {
 
-				if (filters[i].getClass().getName().equals(pid)) {
+          filter = filters[i];
+          break;
+        }
 
-					filter = filters[i];
-					break;
-				}
+      }
 
-			}
+    }
+    return filter;
 
-		}
-		return filter;
+  }
 
-	}
 }

From e0e924e15c247d3fa3dd92f387fe53ba7effd78a Mon Sep 17 00:00:00 2001
From: Asitang Mishra <asitang@gmail.com>
Date: Thu, 18 Jun 2015 08:09:30 -0700
Subject: [PATCH 4/5] final commir for pattch 1.0

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 1af3f31c5f..5b3c687303 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@ conf/slaves
 build/
 runtime/
 logs/
+/bin/

From cca768bc1c790a976594136433485fe899465cb8 Mon Sep 17 00:00:00 2001
From: Asitang Mishra <asitang@gmail.com>
Date: Fri, 19 Jun 2015 13:13:34 -0700
Subject: [PATCH 5/5] Patch 2.0 for NUTCH-2038

---
 conf/nutch-default.xml                        |  50 ++--
 src/java/org/apache/nutch/net/URLFilters.java |  13 +-
 .../nutch/parse/ModelURLFilterAbstract.java   |  45 +++-
 .../org/apache/nutch/parse/ParseSegment.java  |  96 ++++---
 .../nutch/urlfilter/model/ModelURLFilter.java | 170 +++++++------
 .../nutch/urlfilter/model/NBClassifier.java   | 234 ------------------
 .../urlfilter/model/NaiveBayesClassifier.java | 229 +++++++++++++++++
 .../nutch/urlfilter/model/package-info.java   |  10 +-
 8 files changed, 455 insertions(+), 392 deletions(-)
 delete mode 100644 src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/NBClassifier.java
 create mode 100644 src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/NaiveBayesClassifier.java

diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 92dd165419..867b87b413 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -1135,28 +1135,6 @@
 
 <!-- parser properties -->
 
-<property>
-  <name>parser.modelfilter.trainfile</name>
-  <value>tweets-train.tsv</value>
-  <description>
-  </description>
-</property>
-
-<property>
-  <name>parser.modelfilter.dictionaryfile</name>
-  <value>wordlist.txt</value>
-  <description>
-  </description>
-</property>
-
-<property>
-  <name>parser.modelfilter</name>
-  <value>true</value>
-  <description>
-  </description>
-</property>
-
-
 <property>
   <name>parse.plugin.file</name>
   <value>parse-plugins.xml</value>
@@ -1280,6 +1258,34 @@
 
 <!-- urlfilter plugin properties -->
 
+<property>
+  <name>urlfilter.model.trainfile</name>
+  <value></value>
+  <description>Set the name of the file to be used for Naive Bayes training. The format will be: 
+Each line contains two tab seperted parts
+There are two columns/parts:
+1. "1" or "0", "1" for relevent and "0" for irrelevent document.
+3. Text (text that will be used for training)
+
+Each row will be considered a new "document" for the classifier.
+
+  </description>
+</property>
+
+<property>
+  <name>urlfilter.model.wordlist</name>
+  <value></value>
+  <description>Put the name of the file you want to be used as a list of "hot words" to be matched in the url for the model filter. The format should be one word per line.
+  </description>
+</property>
+
+<property>
+  <name>urlfilter.model.filter</name>
+  <value>false</value>
+  <description>A boolean. Set it to true if using the model filter.
+  </description>
+</property>
+
 <property>
   <name>urlfilter.domain.file</name>
   <value>domain-urlfilter.txt</value>
diff --git a/src/java/org/apache/nutch/net/URLFilters.java b/src/java/org/apache/nutch/net/URLFilters.java
index 9ed436978a..16051e53d4 100644
--- a/src/java/org/apache/nutch/net/URLFilters.java
+++ b/src/java/org/apache/nutch/net/URLFilters.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -25,6 +25,7 @@ public class URLFilters {
 
   public static final String URLFILTER_ORDER = "urlfilter.order";
   private URLFilter[] filters;
+  private URLFilter filter=null;
 
   public URLFilters(Configuration conf) {
     this.filters = (URLFilter[]) PluginRepository.get(conf).getOrderedPlugins(
@@ -41,14 +42,18 @@ public String filter(String urlString) throws URLFilterException {
     }
     return urlString;
   }
-/**Get a filter with the full classname if only it is activated through the nutchsite.xml*/
-  public URLFilter getFilter(String pid) {
+
+  /**
+   * Get a filter with the full classname if only it is activated through the
+   * nutch-site.xml
+   */
+  public URLFilter getFilter(String classname) {
 
     if (filter == null) {
 
       for (int i = 0; i < this.filters.length; i++) {
 
-        if (filters[i].getClass().getName().equals(pid)) {
+        if (filters[i].getClass().getName().equals(classname)) {
 
           filter = filters[i];
           break;
diff --git a/src/java/org/apache/nutch/parse/ModelURLFilterAbstract.java b/src/java/org/apache/nutch/parse/ModelURLFilterAbstract.java
index 6c6bead2cc..58b647ee1d 100644
--- a/src/java/org/apache/nutch/parse/ModelURLFilterAbstract.java
+++ b/src/java/org/apache/nutch/parse/ModelURLFilterAbstract.java
@@ -1,12 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.nutch.parse;
 
 import org.apache.nutch.net.URLFilter;
 
-public abstract class ModelURLFilterAbstract implements URLFilter{
+/**
+ * An abstract class to make more function prototypes of a url filter plugin
+ * available in the core Nutch classes
+ */
+public abstract class ModelURLFilterAbstract implements URLFilter {
+
+  /** Uses text (parse text) to set state of the class */
+  public abstract boolean filterParse(String text);
+
+  /**
+   * Can be used instead of the generic filter(String url) to be called in any
+   * job other than generator of injector, so that the generic function can be
+   * short circuited for the generator i.e. the filter won't work in for the
+   * generator
+   */
+  public abstract boolean filterUrl(String url);
+
+  /**
+   * Configure the filter once before using the filtering functions, like train
+   * the classifier once
+   */
+  public abstract void configure(String[] args) throws Exception;
 
-	
-	public abstract void filterParse(String text);
-	public abstract boolean filterUrl(String url) ;
-	public abstract void configure(String[] args) ;
-	
 }
diff --git a/src/java/org/apache/nutch/parse/ParseSegment.java b/src/java/org/apache/nutch/parse/ParseSegment.java
index 08472b8b48..1c3ae552c9 100644
--- a/src/java/org/apache/nutch/parse/ParseSegment.java
+++ b/src/java/org/apache/nutch/parse/ParseSegment.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -34,7 +34,6 @@
 import org.apache.nutch.protocol.*;
 import org.apache.nutch.scoring.ScoringFilterException;
 import org.apache.nutch.scoring.ScoringFilters;
-
 import org.apache.nutch.util.*;
 import org.apache.hadoop.fs.Path;
 
@@ -57,11 +56,11 @@ public class ParseSegment extends NutchTool implements Tool,
   private ParseUtil parseUtil;
 
   private boolean skipTruncated;
-  
-  public static final String PARSER_MODELFILTER="parser.modelfilter";
-  public static final String TRAINFILE_MODELFILTER="parser.modelfilter.trainfile";
-  public static final String DICTFILE_MODELFILTER="parser.modelfilter.dictionaryfile";
-  
+
+  public static final String PARSER_MODELFILTER = "urlfilter.model.filter";
+  public static final String TRAINFILE_MODELFILTER = "urlfilter.model.trainfile";
+  public static final String DICTFILE_MODELFILTER = "urlfilter.model.wordlist";
+
   private boolean filterflag;
   private URLFilters filters;
   private ModelURLFilterAbstract filter;
@@ -80,16 +79,33 @@ public void configure(JobConf job) {
     skipTruncated = job.getBoolean(SKIP_TRUNCATED, true);
     
     filterflag = job.getBoolean(PARSER_MODELFILTER, true);
-    if(filterflag){
-    	String[] args=new String[2];
-    	args[0]=getConf().get(TRAINFILE_MODELFILTER);
-    	args[1]=getConf().get(DICTFILE_MODELFILTER);
-    	
-	filters = new URLFilters(job);
-	filter=(ModelURLFilterAbstract) filters.getFilter("org.apache.nutch.urlfilter.model.ModelURLFilter");
-	filter.configure(args);
-   
-  }
+    if (filterflag) {
+      String[] args = new String[2];
+      args[0] = getConf().get(TRAINFILE_MODELFILTER);
+      args[1] = getConf().get(DICTFILE_MODELFILTER);
+
+      if (args[0] == null || args[0].trim().length() == 0 || args[1] == null
+          || args[1].trim().length() == 0) {
+        String message = "Model URLFilter: trainfile or wordlist not set in the urlfilter.model.trainfile or urlfilter.model.wordlist";
+        if (LOG.isErrorEnabled()) {
+          filterflag = false;
+          LOG.error(message);
+        }
+        throw new IllegalArgumentException(message);
+      } else {
+        try {
+          filters = new URLFilters(job);
+          filter = (ModelURLFilterAbstract) filters
+              .getFilter("org.apache.nutch.urlfilter.model.ModelURLFilter");
+          filter.configure(args);
+        } catch (Exception e) {
+          // TODO: handle exception
+          LOG.warn("There was some problem while getting the model filter or training it. Not using the filter");
+          filterflag = false;
+        }
+
+      }
+    }
   }
 
   public void close() {
@@ -162,28 +178,36 @@ public void map(WritableComparable<?> key, Content content,
         }
       }
       
-if(filterflag){
-          
-    	  
-    	  
-    	  filter.filterParse(parse.getText());
-    	  
-          ArrayList<Outlink> tempOutlinks= new ArrayList<Outlink>();
-          Outlink[] out=null;
-          for(int i=0;i<parse.getData().getOutlinks().length;i++){
-        	  
-          if(filter.filterUrl(parse.getData().getOutlinks()[i].getToUrl())){
-        	  tempOutlinks.add(parse.getData().getOutlinks()[i]);
-        			  
+      if (filterflag) {
+
+        if (!filter.filterParse(parse.getText())) { // kick in the second tier
+                                                    // if parent page found
+                                                    // irrelevent
+          LOG.info("ModelURLFilter: Page found irrelevent:: " + url);
+          LOG.info("Checking outlinks");
+          ArrayList<Outlink> tempOutlinks = new ArrayList<Outlink>();
+          Outlink[] out = null;
+          for (int i = 0; i < parse.getData().getOutlinks().length; i++) {
+            LOG.info("ModelURLFilter: Outlink to check:: "
+                + parse.getData().getOutlinks()[i].getToUrl());
+            if (filter.filterUrl(parse.getData().getOutlinks()[i].getToUrl())) {
+              tempOutlinks.add(parse.getData().getOutlinks()[i]);
+              LOG.info("ModelURLFilter: found relevent");
+
+            } else {
+              LOG.info("ModelURLFilter: found irrelevent");
+            }
           }
+          out = new Outlink[tempOutlinks.size()];
+          for (int i = 0; i < tempOutlinks.size(); i++) {
+            out[i] = tempOutlinks.get(i);
           }
-          out=new Outlink[tempOutlinks.size()];
-          for(int i=0;i<tempOutlinks.size();i++){
-        	  out[i]=tempOutlinks.get(i);
-          }
-          
+
           parse.getData().setOutlinks(out);
-          }
+        } else {
+          LOG.info("ModelURLFilter: Page found relevent:: " + url);
+        }
+      }
 
       long end = System.currentTimeMillis();
       LOG.info("Parsed (" + Long.toString(end - start) + "ms):" + url);
diff --git a/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/ModelURLFilter.java b/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/ModelURLFilter.java
index 50109b44b3..b3353ec13e 100644
--- a/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/ModelURLFilter.java
+++ b/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/ModelURLFilter.java
@@ -16,19 +16,17 @@
  */
 package org.apache.nutch.urlfilter.model;
 
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.nutch.parse.ModelURLFilterAbstract;
 
 
 import java.io.Reader;
-
 import java.io.BufferedReader;
-
 import java.io.IOException;
-
 import java.util.ArrayList;
 
 /**
@@ -43,116 +41,116 @@
  */
 public class ModelURLFilter extends ModelURLFilterAbstract {
 
-	private static final Logger LOG = LoggerFactory
-			.getLogger(ModelURLFilter.class);
-
-	private boolean relevent = false;
-	private Configuration conf;
-	private String inputFilePath;
-	private String dictionaryFile;
-	private ArrayList<String> wordlist = new ArrayList<String>();
-
-	public ModelURLFilter() throws Exception {
+  private static final Logger LOG = LoggerFactory
+      .getLogger(ModelURLFilter.class);
 
-	}
+  private Configuration conf;
+  private String inputFilePath;
+  private String dictionaryFile;
+  private ArrayList<String> wordlist = new ArrayList<String>();
 
-	public void configure(String[] args) {
+  public ModelURLFilter() throws Exception {
 
-		inputFilePath = args[0];
-		dictionaryFile = args[1];
-		BufferedReader br = null;
+  }
 
-		try {
+  public void configure(String[] args) throws Exception {
 
-			String CurrentLine;
+    inputFilePath = args[0];
+    dictionaryFile = args[1];
+    BufferedReader br = null;
 
-			Reader reader = conf.getConfResourceAsReader(dictionaryFile);
-			br = new BufferedReader(reader);
-			while ((CurrentLine = br.readLine()) != null) {
-				wordlist.add(CurrentLine);
-			}
+    try {
 
-		} catch (IOException e) {
+      String CurrentLine;
 
-			e.printStackTrace();
-		} finally {
-			try {
-				if (br != null)
-					br.close();
-			} catch (IOException ex) {
-				ex.printStackTrace();
-			}
-		}
+      Reader reader = conf.getConfResourceAsReader(dictionaryFile);
+      br = new BufferedReader(reader);
+      while ((CurrentLine = br.readLine()) != null) {
+        wordlist.add(CurrentLine);
+      }
 
-		try {
+    } catch (IOException e) {
+      LOG.error("Error occured while reading the wordlist");
+      throw new Exception("Error occured while reading the wordlist");
+    } finally {
+      try {
+        if (br != null)
+          br.close();
+      } catch (IOException ex) {
+        ex.printStackTrace();
+      }
+    }
 
-			train();
-		} catch (Exception e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
-	}
+    try {
 
-	public void filterParse(String text) {
+      train();
+    } catch (Exception e) {
+      // TODO Auto-generated catch block
+      LOG.error("Error occured while training");
+      throw new Exception("Error occured while training");
+    }
+  }
 
-		try {
-			relevent = classify(text);
-		} catch (IOException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
+  public boolean filterParse(String text) {
 
-	}
+    try {
+      return classify(text);
+    } catch (IOException e) {
+      // TODO Auto-generated catch block
+      LOG.error("Error occured while classifying:: " + text);
 
-	public boolean filterUrl(String url) {
+    }
 
-		if (!relevent) {
-			if (!containsWord(url, wordlist)) {
-				return false;
-			}
-		}
+    return false;
+  }
 
-		return true;
-	}
+  public boolean filterUrl(String url) {
 
-	public String filter(String url) {
+    return containsWord(url, wordlist);
 
-		return url;
+  }
 
-	}
+  public String filter(String url) {
 
-	public boolean classify(String text) throws IOException {
+    return url;
 
-		// if classified as relevent "1" then return true
-		if (NBClassifier.classify(text).equals("1"))
-			return true;
-		return false;
-	}
+  }
 
-	public void train() throws Exception {
+  public boolean classify(String text) throws IOException {
 
-		// check if the model file exists, if it does then don't train
-		NBClassifier.createModel(inputFilePath);
+    // if classified as relevent "1" then return true
+    if (NaiveBayesClassifier.classify(text).equals("1"))
+      return true;
+    return false;
+  }
 
-	}
+  public void train() throws Exception {
+    // check if the model file exists, if it does then don't train
+    if (!FileSystem.get(conf).exists(new Path("model"))) {
+      LOG.info("Training the Naive Bayes Model");
+      NaiveBayesClassifier.createModel(inputFilePath);
+    } else {
+      LOG.info("Model already exists. Skipping training.");
+    }
+  }
 
-	public boolean containsWord(String url, ArrayList<String> wordlist) {
-		for (String word : wordlist) {
-			if (url.contains(word)) {
-				return true;
-			}
-		}
+  public boolean containsWord(String url, ArrayList<String> wordlist) {
+    for (String word : wordlist) {
+      if (url.contains(word)) {
+        return true;
+      }
+    }
 
-		return false;
-	}
+    return false;
+  }
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
+  public void setConf(Configuration conf) {
+    this.conf = conf;
 
-	}
+  }
 
-	public Configuration getConf() {
-		return this.conf;
-	}
+  public Configuration getConf() {
+    return this.conf;
+  }
 
 }
diff --git a/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/NBClassifier.java b/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/NBClassifier.java
deleted file mode 100644
index 714aaa4164..0000000000
--- a/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/NBClassifier.java
+++ /dev/null
@@ -1,234 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.urlfilter.model;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.SequenceFile.Writer;
-import org.apache.hadoop.io.Text;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.Version;
-import org.apache.mahout.classifier.naivebayes.BayesUtils;
-import org.apache.mahout.classifier.naivebayes.NaiveBayesModel;
-import org.apache.mahout.classifier.naivebayes.StandardNaiveBayesClassifier;
-import org.apache.mahout.classifier.naivebayes.training.TrainNaiveBayesJob;
-import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
-import org.apache.mahout.math.RandomAccessSparseVector;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.Vector.Element;
-import org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles;
-import org.apache.mahout.vectorizer.TFIDF;
-
-import com.google.common.collect.ConcurrentHashMultiset;
-import com.google.common.collect.Multiset;
-
-public class NBClassifier {
-
-	public static Map<String, Integer> readDictionnary(Configuration conf,
-			Path dictionnaryPath) {
-		Map<String, Integer> dictionnary = new HashMap<String, Integer>();
-		for (Pair<Text, IntWritable> pair : new SequenceFileIterable<Text, IntWritable>(
-				dictionnaryPath, true, conf)) {
-			dictionnary.put(pair.getFirst().toString(), pair.getSecond().get());
-		}
-		return dictionnary;
-	}
-
-	public static Map<Integer, Long> readDocumentFrequency(Configuration conf,
-			Path documentFrequencyPath) {
-		Map<Integer, Long> documentFrequency = new HashMap<Integer, Long>();
-		for (Pair<IntWritable, LongWritable> pair : new SequenceFileIterable<IntWritable, LongWritable>(
-				documentFrequencyPath, true, conf)) {
-			documentFrequency
-					.put(pair.getFirst().get(), pair.getSecond().get());
-		}
-		return documentFrequency;
-	}
-
-	public static void createModel(String inputTrainFilePath) throws Exception {
-
-		String[] args1 = new String[4];
-
-		args1[0] = "-i";
-		args1[1] = "outseq";
-		args1[2] = "-o";
-		args1[3] = "vectors";
-
-		String[] args2 = new String[9];
-
-		args2[0] = "-i";
-		args2[1] = "vectors/tfidf-vectors";
-		args2[2] = "-el";
-		args2[3] = "-li";
-		args2[4] = "labelindex";
-		args2[5] = "-o";
-		args2[6] = "model";
-		args2[7] = "-ow";
-		args2[8] = "-c";
-
-		convertToSeq(inputTrainFilePath, "outseq");
-
-		SparseVectorsFromSequenceFiles.main(args1);
-
-		TrainNaiveBayesJob.main(args2);
-	}
-
-	public static String classify(String text) throws IOException {
-		return classify(text, "model", "labelindex",
-				"vectors/dictionary.file-0", "vectors/df-count/part-r-00000");
-	}
-
-	public static String classify(String text, String modelPath,
-			String labelIndexPath, String dictionaryPath,
-			String documentFrequencyPath) throws IOException {
-
-		Configuration configuration = new Configuration();
-
-		// model is a matrix (wordId, labelId) => probability score
-		NaiveBayesModel model = NaiveBayesModel.materialize(
-				new Path(modelPath), configuration);
-
-		StandardNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(
-				model);
-
-		// labels is a map label => classId
-		Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration,
-				new Path(labelIndexPath));
-		Map<String, Integer> dictionary = readDictionnary(configuration,
-				new Path(dictionaryPath));
-		Map<Integer, Long> documentFrequency = readDocumentFrequency(
-				configuration, new Path(documentFrequencyPath));
-
-		// analyzer used to extract word from text
-		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
-		// int labelCount = labels.size();
-		int documentCount = documentFrequency.get(-1).intValue();
-
-		Multiset<String> words = ConcurrentHashMultiset.create();
-
-		// extract words from text
-		TokenStream ts = analyzer.tokenStream("text", new StringReader(text));
-		CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
-		ts.reset();
-		int wordCount = 0;
-		while (ts.incrementToken()) {
-			if (termAtt.length() > 0) {
-				String word = ts.getAttribute(CharTermAttribute.class)
-						.toString();
-				Integer wordId = dictionary.get(word);
-				// if the word is not in the dictionary, skip it
-				if (wordId != null) {
-					words.add(word);
-					wordCount++;
-				}
-			}
-		}
-
-		ts.end();
-		ts.close();
-		// create vector wordId => weight using tfidf
-		Vector vector = new RandomAccessSparseVector(10000);
-		TFIDF tfidf = new TFIDF();
-		for (Multiset.Entry<String> entry : words.entrySet()) {
-			String word = entry.getElement();
-			int count = entry.getCount();
-			Integer wordId = dictionary.get(word);
-			Long freq = documentFrequency.get(wordId);
-			double tfIdfValue = tfidf.calculate(count, freq.intValue(),
-					wordCount, documentCount);
-			vector.setQuick(wordId, tfIdfValue);
-		}
-		// one score for each label
-
-		Vector resultVector = classifier.classifyFull(vector);
-		double bestScore = -Double.MAX_VALUE;
-		int bestCategoryId = -1;
-		for (Element element : resultVector.all()) {
-			int categoryId = element.index();
-			double score = element.get();
-			if (score > bestScore) {
-				bestScore = score;
-				bestCategoryId = categoryId;
-			}
-
-		}
-
-		analyzer.close();
-		return labels.get(bestCategoryId);
-
-	}
-
-	static void convertToSeq(String inputFileName, String outputDirName)
-			throws IOException {
-		Configuration configuration = new Configuration();
-		FileSystem fs = FileSystem.get(configuration);
-		Writer writer = new SequenceFile.Writer(fs, configuration, new Path(
-				outputDirName + "/chunk-0"), Text.class, Text.class);
-
-		BufferedReader reader = new BufferedReader(
-				new FileReader(inputFileName));
-		Text key = new Text();
-		Text value = new Text();
-		while (true) {
-			String line = reader.readLine();
-			if (line == null) {
-				break;
-			}
-			String[] tokens = line.split("\t", 3);
-			if (tokens.length != 3) {
-				// System.out.println("Skip line: " + line);
-				continue;
-			}
-			String category = tokens[0];
-			String id = tokens[1];
-			String message = tokens[2];
-			key.set("/" + category + "/" + id);
-			value.set(message);
-			writer.append(key, value);
-
-		}
-		reader.close();
-		writer.close();
-
-	}
-
-	public static void main(String args[]) throws Exception {
-
-		// createModel("data/tweets-train.tsv");
-
-		// example
-		// String result=classify("how are you doing here bro");
-
-		// System.out.println(result);
-	}
-}
diff --git a/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/NaiveBayesClassifier.java b/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/NaiveBayesClassifier.java
new file mode 100644
index 0000000000..dc0f1f6f0e
--- /dev/null
+++ b/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/NaiveBayesClassifier.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.urlfilter.model;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.Writer;
+import org.apache.hadoop.io.Text;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.util.Version;
+import org.apache.mahout.classifier.naivebayes.BayesUtils;
+import org.apache.mahout.classifier.naivebayes.NaiveBayesModel;
+import org.apache.mahout.classifier.naivebayes.StandardNaiveBayesClassifier;
+import org.apache.mahout.classifier.naivebayes.training.TrainNaiveBayesJob;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
+import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.Vector.Element;
+import org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles;
+import org.apache.mahout.vectorizer.TFIDF;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.ConcurrentHashMultiset;
+import com.google.common.collect.Multiset;
+
+public class NaiveBayesClassifier {
+
+  private static final Logger LOG = LoggerFactory
+      .getLogger(NaiveBayesClassifier.class);
+
+  public static Map<String, Integer> readDictionnary(Configuration conf,
+      Path dictionnaryPath) {
+    Map<String, Integer> dictionnary = new HashMap<String, Integer>();
+    for (Pair<Text, IntWritable> pair : new SequenceFileIterable<Text, IntWritable>(
+        dictionnaryPath, true, conf)) {
+      dictionnary.put(pair.getFirst().toString(), pair.getSecond().get());
+    }
+    return dictionnary;
+  }
+
+  public static Map<Integer, Long> readDocumentFrequency(Configuration conf,
+      Path documentFrequencyPath) {
+    Map<Integer, Long> documentFrequency = new HashMap<Integer, Long>();
+    for (Pair<IntWritable, LongWritable> pair : new SequenceFileIterable<IntWritable, LongWritable>(
+        documentFrequencyPath, true, conf)) {
+      documentFrequency.put(pair.getFirst().get(), pair.getSecond().get());
+    }
+    return documentFrequency;
+  }
+
+  public static void createModel(String inputTrainFilePath) throws Exception {
+
+    String[] args1 = new String[4];
+
+    args1[0] = "-i";
+    args1[1] = "outseq";
+    args1[2] = "-o";
+    args1[3] = "vectors";
+
+    String[] args2 = new String[9];
+
+    args2[0] = "-i";
+    args2[1] = "vectors/tfidf-vectors";
+    args2[2] = "-el";
+    args2[3] = "-li";
+    args2[4] = "labelindex";
+    args2[5] = "-o";
+    args2[6] = "model";
+    args2[7] = "-ow";
+    args2[8] = "-c";
+
+    convertToSeq(inputTrainFilePath, "outseq");
+
+    SparseVectorsFromSequenceFiles.main(args1);
+
+    TrainNaiveBayesJob.main(args2);
+  }
+
+  public static String classify(String text) throws IOException {
+    return classify(text, "model", "labelindex", "vectors/dictionary.file-0",
+        "vectors/df-count/part-r-00000");
+  }
+
+  public static String classify(String text, String modelPath,
+      String labelIndexPath, String dictionaryPath, String documentFrequencyPath)
+      throws IOException {
+
+    Configuration configuration = new Configuration();
+
+    // model is a matrix (wordId, labelId) => probability score
+    NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelPath),
+        configuration);
+
+    StandardNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(
+        model);
+
+    // labels is a map label => classId
+    Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration,
+        new Path(labelIndexPath));
+    Map<String, Integer> dictionary = readDictionnary(configuration, new Path(
+        dictionaryPath));
+    Map<Integer, Long> documentFrequency = readDocumentFrequency(configuration,
+        new Path(documentFrequencyPath));
+
+    // analyzer used to extract word from text
+    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
+    // int labelCount = labels.size();
+    int documentCount = documentFrequency.get(-1).intValue();
+
+    Multiset<String> words = ConcurrentHashMultiset.create();
+
+    // extract words from text
+    TokenStream ts = analyzer.tokenStream("text", new StringReader(text));
+    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+    ts.reset();
+    int wordCount = 0;
+    while (ts.incrementToken()) {
+      if (termAtt.length() > 0) {
+        String word = ts.getAttribute(CharTermAttribute.class).toString();
+        Integer wordId = dictionary.get(word);
+        // if the word is not in the dictionary, skip it
+        if (wordId != null) {
+          words.add(word);
+          wordCount++;
+        }
+      }
+    }
+
+    ts.end();
+    ts.close();
+    // create vector wordId => weight using tfidf
+    Vector vector = new RandomAccessSparseVector(10000);
+    TFIDF tfidf = new TFIDF();
+    for (Multiset.Entry<String> entry : words.entrySet()) {
+      String word = entry.getElement();
+      int count = entry.getCount();
+      Integer wordId = dictionary.get(word);
+      Long freq = documentFrequency.get(wordId);
+      double tfIdfValue = tfidf.calculate(count, freq.intValue(), wordCount,
+          documentCount);
+      vector.setQuick(wordId, tfIdfValue);
+    }
+    // one score for each label
+
+    Vector resultVector = classifier.classifyFull(vector);
+    double bestScore = -Double.MAX_VALUE;
+    int bestCategoryId = -1;
+    for (Element element : resultVector.all()) {
+      int categoryId = element.index();
+      double score = element.get();
+      if (score > bestScore) {
+        bestScore = score;
+        bestCategoryId = categoryId;
+      }
+
+    }
+
+    analyzer.close();
+    return labels.get(bestCategoryId);
+
+  }
+
+  static void convertToSeq(String inputFileName, String outputDirName)
+      throws IOException {
+    Configuration configuration = new Configuration();
+    FileSystem fs = FileSystem.get(configuration);
+    Writer writer = new SequenceFile.Writer(fs, configuration, new Path(
+        outputDirName + "/chunk-0"), Text.class, Text.class);
+    BufferedReader reader = null;
+    reader = new BufferedReader(
+        configuration.getConfResourceAsReader(inputFileName));
+    Text key = new Text();
+    Text value = new Text();
+    long uniqueid=0;
+    while (true) {
+      uniqueid++;
+      String line = reader.readLine();
+      if (line == null) {
+        break;
+      }
+      String[] tokens = line.split("\t", 2);
+      if (tokens.length != 2) {
+        continue;
+      }
+      String category = tokens[0];
+      String id = ""+uniqueid;
+      String message = tokens[1];
+      key.set("/" + category + "/" + id);
+      value.set(message);
+      writer.append(key, value);
+
+    }
+    reader.close();
+    writer.close();
+
+  }
+
+  
+}
diff --git a/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/package-info.java b/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/package-info.java
index a74d4bebad..fb59e507d8 100644
--- a/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/package-info.java
+++ b/src/plugin/urlfilter-model/src/java/org/apache/nutch/urlfilter/model/package-info.java
@@ -16,10 +16,12 @@
  */
 
 /**
- * URL filter plugin to include only URLs which match an element in a given list of
- * domain suffixes, domain names, and/or host names.
- * See {@link org.apache.nutch.urlfilter.domainblacklist} for the counterpart
- * (exclude URLs by host or domain).
+ * URL filter plugin with a two tier architecture for filtering:
+ * The filter is called from the parser and looks at the current page that was parsed.
+ * Does a Naive Bayes Classification on the text of the page and decides if it is relevant or not.
+ * If relevant then let all the outlinks pass, if not then the second check kicks in,
+ * which checks for some "hotwords" in the outlink urls itself (from a wordlist provided by the user).
+ * If a match then let the outlink pass).
  */
 package org.apache.nutch.urlfilter.model;