Permalink
Browse files

NUTCH-1232 Remove site field from index-basic

git-svn-id: https://svn.apache.org/repos/asf/nutch/trunk@1226409 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
Markus Jelsma
Markus Jelsma committed Jan 2, 2012
1 parent ef198ff commit 943aed79faeec1759a3f617b14f9023ec8b84214
View
@@ -1,5 +1,7 @@
Nutch Change Log
+* NUTCH-1232 Remove site field from index-basic (markus)
+
* NUTCH-1239 Webgraph should remove deleted pages from segment input (markus)
* NUTCH-1238 Fetcher throughput threshold must start before feeder finished (markus)
View
@@ -310,7 +310,6 @@
<!-- fields for index-basic plugin -->
<field name="host" type="url" stored="false" indexed="true"/>
- <field name="site" type="string" stored="false" indexed="true"/>
<field name="url" type="url" stored="true" indexed="true" required="true"/>
<!-- stored=true for highlighting, use term vectors and positions for fast highlighting -->
<field name="content" type="text_general" stored="true" indexed="true"/>
View
@@ -22,6 +22,7 @@
https://issues.apache.org/jira/browse/NUTCH-994
https://issues.apache.org/jira/browse/NUTCH-997
https://issues.apache.org/jira/browse/NUTCH-1058
+ https://issues.apache.org/jira/browse/NUTCH-1232
and
http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/
example/solr/conf/schema.xml?view=markup
@@ -73,8 +74,7 @@
<field name="boost" type="float" stored="true" indexed="false"/>
<!-- fields for index-basic plugin -->
- <field name="host" type="url" stored="false" indexed="true"/>
- <field name="site" type="string" stored="false" indexed="true"/>
+ <field name="host" type="string" stored="false" indexed="true"/>
<field name="url" type="url" stored="true" indexed="true"
required="true"/>
<field name="content" type="text" stored="false" indexed="true"/>
@@ -32,7 +32,6 @@
-->
<fields>
<field dest="content" source="content"/>
- <field dest="site" source="site"/>
<field dest="title" source="title"/>
<field dest="host" source="host"/>
<field dest="segment" source="segment"/>
@@ -67,7 +67,6 @@ public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum
if (host != null) {
doc.add("host", host);
- doc.add("site", host);
}
doc.add("url", reprUrlString == null ? urlString : reprUrlString);

0 comments on commit 943aed7

Please sign in to comment.