Skip to content

Commit

Permalink
Initial checkin
Browse files Browse the repository at this point in the history
  • Loading branch information
ianbarber committed Mar 9, 2011
0 parents commit 79348ee
Show file tree
Hide file tree
Showing 16 changed files with 738 additions and 0 deletions.
1 change: 1 addition & 0 deletions README
@@ -0,0 +1 @@
Some basic code and config files from the slides of my Solr: Beyond the Basics talk
28 changes: 28 additions & 0 deletions conf/db-data-config.xml
@@ -0,0 +1,28 @@
<dataConfig>
<dataSource driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://localhost:3306/orig"
user="root"
password="password" />
<document>
<entity name="story"
query="SELECT s.id, s.content, s.slug, FROM_UNIXTIME(s.publish_date) as publish_date, s.title, s.description, CONCAT(u.first_name, ' ', u.last_name) as author, s.author_id FROM stories as s JOIN users as u ON u.id = s.author_id WHERE s.status_id = 1"
deltaImportQuery="SELECT s.id, s.content, s.slug, s.publish_date, s.title, s.description, CONCAT(u.first_name, ' ', u.last_name) as author, s.author_id FROM stories as s JOIN users as u ON u.id = s.author_id WHERE s.status_id = 1 AND s.id = ${dataimporter.delta.id}"
deltaQuery="SELECT id FROM stories WHERE modified > ${dataimporter.last_index_time}"
transformer="TemplateTransformer,HTMLStripTransformer"
>
<field column="permalink" name="permalink" template="http://fooweb.com/${story.slug}" />
<field column="publish_date" name="date" />
<field column="title" />
<field column="description" name="lead_para" stripHTML="true" />
<field column="content" name="body" stripHTML="true" />
<field column="author" />
<entity name="tag" query="SELECT t.title as tag FROM tags as t JOIN story_tags as st ON st.tag_id = t.id WHERE st.story_id = ${story.id}">
<field column="tag" />
</entity>
<entity name="topic" query="SELECT t.title as category FROM topics as t JOIN story_topics as vst ON st.topic_id = t.id WHERE st.item_id = ${story.id}">
<field column="category" />
</entity>
<field column="source_site" template="cms" />
</entity>
</document>
</dataConfig>
2 changes: 2 additions & 0 deletions conf/protwords.txt
@@ -0,0 +1,2 @@
fooweb
frobulator
145 changes: 145 additions & 0 deletions conf/schema.xml
@@ -0,0 +1,145 @@
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="fooweb" version="1.2">
<types>
<!-- Lowercased identifier field type -->
<fieldType name="lowercase" class="solr.TextField">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>

<!-- General Text Field type -->
<fieldType name="text" class="solr.TextField" stored="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true"
/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
generateNumberParts="1"
catenateWords="1"
catenateNumbers="1"
catenateAll="0"
splitOnCaseChange="1"
/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
</analyzer>
</fieldType>

<!-- Unstemmed text field, for unknown foreign text for example -->
<fieldType name="text_unstemmed" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true"
/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
generateNumberParts="1"
catenateWords="1"
catenateNumbers="1"
catenateAll="0"
splitOnCaseChange="1"
/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

<!-- Phonetics Text field -->
<fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
</analyzer>
</fieldtype>

<!-- Spelling -->
<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100" omitNorms="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>

<!-- Trie based Data field for faster range searching -->
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>

<!-- Trie based int -->
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>

<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
</types>

<fields>
<field name="permalink" type="string" indexed="true" stored="true" required="true" />
<field name="category" type="string" indexed="true" stored="true" multiValued="true" />
<field name="tag" type="lowercase" indexed="true" stored="true" multiValued="true" />
<field name="title" type="text" indexed="true" stored="true" required="true" termVector="true" />
<field name="lead_para" type="text" indexed="true" stored="true" />
<field name="image_text" type="text" indexed="true" stored="true" />
<field name="body" type="text" indexed="true" stored="true" required="true" termVector="true" />
<field name="author" type="lowercase" indexed="true" stored="false" required="true" multiValued="true" />
<field name="date" type="tdate" indexed="true" stored="true" multiValued="true" />
<field name="text" type="text" indexed="true" stored="false" multiValued="true" />
<field name="title_sort" type="lowercase" indexed="true" stored="false" />
<field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true" />
<field name="phonetic" type="phonetic" indexed="true" stored="false" multiValued="true" />
<field name="source_site" type="string" indexed="true" stored="true" required="true" />
<field name="comment" type="text_unstemmed" indexed="true" stored="false" multiValued="true" />
<!--field name="location" type="latlon" indexed="true" stored="true" /-->
<field name="address" type="text" indexed="true" stored="false" />
<field name="price" type="tint" indexed="true" stored="false" />

<!-- Dynamic Fields -->
<dynamicField name="*_tus" type="text_unstemmed" indexed="true" stored="true"/>
<dynamicField name="ignored_*" type="ignored" />
</fields>

<!-- Copy Fields -->
<!-- Title sort field -->
<copyField source="title" dest="title_sort"/>
<!-- Copy the categories so we can apply text analysis -->
<copyField source="permalink" dest="text"/>
<copyField source="category" dest="text"/>
<copyField source="title" dest="text"/>
<copyField source="lead_para" dest="text"/>
<copyField source="body" dest="text"/>
<copyField source="author" dest="text"/>
<copyField source="address" dest="text"/>
<!-- Copy Text to Text_Rev -->
<copyField source="permalink" dest="text_rev"/>
<copyField source="category" dest="text_rev"/>
<copyField source="title" dest="text_rev"/>
<copyField source="lead_para" dest="text_rev"/>
<copyField source="body" dest="text_rev"/>
<copyField source="author" dest="text_rev"/>
<!-- Copy Text to Phonetic -->
<copyField source="category" dest="phonetic"/>
<copyField source="title" dest="phonetic"/>
<copyField source="lead_para" dest="phonetic"/>
<copyField source="body" dest="phonetic"/>
<copyField source="author" dest="phonetic"/>

<!-- ID -->
<uniqueKey>permalink</uniqueKey>

<defaultSearchField>text</defaultSearchField>
<solrQueryParser defaultOperator="OR"/>

</schema>
6 changes: 6 additions & 0 deletions conf/solr.xml
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<solr sharedLib="/var/solr/lib" persistent="true">
<cores adminPath="/admin/cores">
<core default="true" instanceDir="main" name="main"></core>
</cores>
</solr>

0 comments on commit 79348ee

Please sign in to comment.