Skip to content

Commit

Permalink
NUTCH-2957 indexer-solr / Solr schema.xml
Browse files Browse the repository at this point in the history
- add fall-back field definitions for unknown index fields
- update comments and descriptions
- fix indentation
  • Loading branch information
sebastian-nagel committed Aug 17, 2022
1 parent edebfe4 commit c0f723e
Showing 1 changed file with 15 additions and 8 deletions.
23 changes: 15 additions & 8 deletions src/plugin/indexer-solr/schema.xml
Expand Up @@ -15,23 +15,26 @@
See the License for the specific language governing permissions and
limitations under the License.
-->

<!--
Description: This document contains Solr 4.x schema definition to
Description: This document contains Solr schema definition to
be used with Solr integration currently built into Nutch.
This schema is not minimal, there are some useful field type definitions left,
and the set of fields and their flags (indexed/stored/term vectors) can be
further optimized depending on needs. See
http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/conf/schema.xml?view=markup
further optimized depending on needs. See
https://github.com/apache/solr/blob/main/solr/server/solr/configsets/_default/conf/managed-schema.xml
for more info.
-->

<schema name="nutch" version="1.5">
<schema name="nutch" version="1.6">

<types>

<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />

<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
<fieldtype name="binary" class="solr.BinaryField"/>


Expand Down Expand Up @@ -91,7 +94,7 @@

<!-- A Trie based date field for faster date range queries and date faceting. -->
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
<fieldType name="tdates" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tdates" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0" multiValued="true"/>


<!-- solr.TextField allows the specification of custom text analyzers
Expand Down Expand Up @@ -293,7 +296,7 @@
</fieldType>

<!-- since fields of this type are by default not stored or indexed,
any data added to them will be ignored outright. -->
any data added to them will be ignored outright. -->
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />

<!-- boolean type: "true" or "false" -->
Expand Down Expand Up @@ -423,8 +426,11 @@
<!-- field containing segment's raw binary content if indexed with -addBinaryContent -->
<field name="binaryContent" type="binary" stored="true" indexed="false"/>

</fields>
<uniqueKey>id</uniqueKey>
<!-- fall-back fields for fields not defined by the schema -->
<dynamicField name="*_str" type="strings" stored="false" docValues="true" indexed="false" useDocValuesAsStored="false"/>
</fields>

<uniqueKey>id</uniqueKey>

<!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field differently,
Expand All @@ -436,4 +442,5 @@
<copyField source="anchor" dest="text"/>
<copyField source="author" dest="text"/>
<copyField source="latLon" dest="location"/>

</schema>

0 comments on commit c0f723e

Please sign in to comment.