Skip to content

Commit

Permalink
Item10657:Item11402:
Browse files Browse the repository at this point in the history
   * using ClassicTokenizerFactory and Filter now instead of plain WhitespaceTokenizerFactory
   * added support for uima-based information extraction to default solr processing chain
   * improved spellchecking params to return better spellcorrections even for smaller text corpora
   * added full german translation (needs http://foswiki.org/Development/Enhancei18nArchitecture to be resolved)
   * reimplemented date faceting by means of the newer more generic range facets
   * fixed search for some queries involving phrases
   * fixed indexing multivalue formfields by actually using the Form api properly
   * fixed SolrPlugin crashing on save when {SupportedLanguages} weren't configured 
   



git-svn-id: http://svn.foswiki.org/trunk/SolrPlugin@13570 0b4bb1d4-4e5a-0410-9cc4-b2b747904278
  • Loading branch information
MichaelDaum authored and MichaelDaum committed Jan 9, 2012
1 parent 970b61e commit 606e823
Show file tree
Hide file tree
Showing 11 changed files with 858 additions and 141 deletions.
18 changes: 9 additions & 9 deletions data/System/SolrSearchBaseTemplate.txt
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ jQuery(function($) {
%SOLRFORMAT{"solr1"
header="<div class='solrNumResults'>
$percntIF{\"$count>0\"
then=\"$percntMAKETEXT{\"Results [_1] of <strong>[_2]</strong>\" args=\"$from - $to, $count\"}$percnt\"
then=\"<nop>$percntMAKETEXT{\"Results [_1] of <strong>[_2]</strong>\" args=\"$from - $to, $count\"}$percnt\"
else=\"<span class='foswikiAlert'>%MAKETEXT{"nothing found"}%</span>\"
}$percnt
</div>"
Expand Down Expand Up @@ -799,7 +799,7 @@ jQuery(function($) {
format="\"$1\""
footer="]}"
separator=", "
}%' name='search' size='60' value='%URLPARAM{"search" encode="entity"}%' style='margin-right:8px;'/>
}%' name='search' size='50' value='%URLPARAM{"search" encode="entity"}%' style='margin-right:8px;'/>
</td>
<td>
%BUTTON{"%MAKETEXT{"Search"}%" type="submit" title="%MAKETEXT{"submit the query"}%" icon="find" class="solrSubmitButton"}%
Expand Down Expand Up @@ -839,7 +839,7 @@ jQuery(function($) {
<select class='foswikiSelect' id='solrSorting'>
<option class='foswikiOption' value='score desc' %IF{"'%URLPARAM{"sort" default="%TMPL:P{"solr::defaultsort"}%"}%'='sort desc'" then="selected"}%>%MAKETEXT{"Relevance"}%</option>
<option class='foswikiOption' value='date desc' %IF{"'%URLPARAM{"sort" default="%TMPL:P{"solr::defaultsort"}%"}%'='date desc'" then="selected"}%>%MAKETEXT{"Last changed"}%</option>
<option class='foswikiOption' value='topic_sort asc' %IF{"'%URLPARAM{"sort" default="%TMPL:P{"solr::defaultsort"}%"}%'='topic_sort asc'" then="selected"}%>%MAKETEXT{"Name"}%</option>
<option class='foswikiOption' value='title_sort asc' %IF{"'%URLPARAM{"sort" default="%TMPL:P{"solr::defaultsort"}%"}%'='title_sort asc'" then="selected"}%>%MAKETEXT{"Name"}%</option>
</select>
</td>
</tr>
Expand Down Expand Up @@ -926,7 +926,7 @@ jQuery(function($) {
%TMPL:DEF{"solr::format::list::topic"}%<!-- -->
<div class='solrSearchHit solrTopicHit'>
<h3 style='background-image:url(%icon%)' title='%type%'>
[[%web%.%topic%][%topic%]]
[[%web%.%topic%]]
<span class='foswikiGrayText'>
%WEBLINK{"%web%"
format="<a class='webLink' href='$url'>%MAKETEXT{"in [_1]" args="$title"}%</a>"
Expand Down Expand Up @@ -969,7 +969,7 @@ jQuery(function($) {
<div class='solrSearchHit solrAttachmentHit'>
<h3 style='background-image:url(%icon%)' title='%type%'>
[[%url%][%name%]]
<span class='foswikiGrayText'>%MAKETEXT{"in [_1]" args="[[%web%.%topic%][%topic%]]"}%</span>
<span class='foswikiGrayText'>%MAKETEXT{"in [_1]" args="[[%web%.%topic%]]"}%</span>
%IF{"'%comment%'!=''" then="<div class='foswikiGrayText'>%comment%</div>" else="<!-- -->"}%
</h3>
<div class='solrHilite'>
Expand Down Expand Up @@ -1007,7 +1007,7 @@ jQuery(function($) {
</div>
<h3>
[[%url%][%name%]]
<span class='foswikiGrayText'>%MAKETEXT{"in [_1]" args="[[%web%.%topic%][%topic%]]"}%</span>
<span class='foswikiGrayText'>%MAKETEXT{"in [_1]" args="[[%web%.%topic%]]"}%</span>
%IF{"'%comment%'!=''" then="<div class='foswikiGrayText'>%comment%</div>" else="<!-- -->"}%
</h3>
<div class='solrHilite'>
Expand All @@ -1030,7 +1030,7 @@ jQuery(function($) {
<div class='solrSearchHit solrCommentHot'>
<h3 style='background-image:url(%icon%)' type='%type%'>
[[%url%][%title%]]
<span class='foswikiGrayText'>%MAKETEXT{"in [_1]" args="[[%web%.%topic%][%topic%]]"}%</span>
<span class='foswikiGrayText'>%MAKETEXT{"in [_1]" args="[[%web%.%topic%]]"}%</span>
</h3>
<div class='solrHilite'>
<literal>%hilite%</literal>
Expand Down Expand Up @@ -1073,7 +1073,7 @@ jQuery(function($) {
<h3>
[[%url%][%SPACEOUT{%title%}%]]
</h3>
<div class='foswikiGrayText'>in&nbsp;[[%web%.%topic%][%topic%]]</div>
<div class='foswikiGrayText'>in&nbsp;[[%web%.%topic%]]</div>
<div class='solrRevision'>
%date%, %USERINFO{"%author%" format="[[$wikiusername][$wikiname]]"}%
</div>
Expand All @@ -1099,7 +1099,7 @@ jQuery(function($) {
}%
</div>
<div class='solrImageCaption'>
[[%web%.%topic%][%topic%]]
[[%web%.%topic%]]
</div>
</div>
<!-- -->%TMPL:END%
Expand Down
13 changes: 10 additions & 3 deletions lib/Foswiki/Plugins/SolrPlugin/Base.pm
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,7 @@ sub inlineError {
sub fromUtf8 {
my ($this, $string) = @_;

my $charset = $Foswiki::cfg{Site}{CharSet};

return Encode::encode($charset, $string);
return Encode::decode_utf8($string);
}

##############################################################################
Expand All @@ -287,11 +285,20 @@ sub toUtf8 {
my $charset = $Foswiki::cfg{Site}{CharSet};
return $string if $charset =~ /^utf-?8$/i;


my $octets = Encode::decode($charset, $string);
$octets = Encode::encode('utf-8', $octets);
return $octets;
}

##############################################################################
sub toSiteCharSet {
my ($this, $string) = @_;

return Encode::encode($Foswiki::cfg{Site}{CharSet}, $string);
}


##############################################################################
sub entityDecode {
my ($this, $text) = @_;
Expand Down
4 changes: 2 additions & 2 deletions lib/Foswiki/Plugins/SolrPlugin/Config.spec
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ $Foswiki::cfg{SolrPlugin}{AutoStartDaemon} = 0;
# **COMMAND**
# Command used to start the solr instance. Note that <code>solrstart</code> is a shell script wrapping
# around the actual startup routine
$Foswiki::cfg{SolrPlugin}{SolrStartCmd} = $Foswiki::cfg{ToolsDir}.'/solrstart %SOLRHOME|F%';
$Foswiki::cfg{SolrPlugin}{SolrStartCmd} = '$Foswiki::cfg{ToolsDir}/solrstart %SOLRHOME|F%';

# **PATH**
# Path to the directory containing the <code>start.jar</code> file. That's where the jetty engine is
# located and where solr puts its data further down the directory structure
$Foswiki::cfg{SolrPlugin}{SolrHome} = '/home/www-data/foswiki/solr';
$Foswiki::cfg{SolrPlugin}{SolrHome} = '';

# **STRING**
# Default collection where to put foswiki content to (including topic text as well as all attachments)
Expand Down
27 changes: 19 additions & 8 deletions lib/Foswiki/Plugins/SolrPlugin/Index.pm
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ sub index {
# mode to run in parallel

try {
$this->lock();
# $this->lock();

my $query = Foswiki::Func::getCgiQuery();
my $web = $query->param('web') || 'all';
Expand All @@ -108,7 +108,7 @@ sub index {
}

finally {
$this->unlock();
# $this->unlock();
}
}

Expand Down Expand Up @@ -281,8 +281,12 @@ sub indexTopic {

# parent data
my $parent = $meta->getParent();
$parent =~ s/\//\./g;
$this->_addLink(\%outgoingLinks, $web, $topic, undef, $parent);
my $parentWeb;
my $parentTopic;
if ($parent) {
($parentWeb, $parentTopic) = $this->normalizeWebTopicName($web, $parent);
$this->_addLink(\%outgoingLinks, $web, $topic, $parentWeb, $parentTopic);
}

# get all outgoing links from topic text
$this->extractOutgoingLinks($web, $topic, $origText, \%outgoingLinks);
Expand Down Expand Up @@ -358,9 +362,10 @@ sub indexTopic {
createdate => $createDate,
type => 'topic',
# topic specific
parent => $parent,
);

$doc->add_fields(parent => "$parentWeb.$parentTopic") if $parent;

# tag and analyze language
my $contentLanguage = $this->getContentLanguage($web, $topic);
if (defined $contentLanguage) {
Expand Down Expand Up @@ -396,6 +401,7 @@ sub indexTopic {
my $attrs = $fieldDef->{attributes}; # TODO: check for Facet
my $name = $fieldDef->{name};
my $type = $fieldDef->{type};
my $isMultiValued = $fieldDef->isMultiValued;
my $field = $meta->get('FIELD', $name);
next unless $field;

Expand Down Expand Up @@ -428,8 +434,7 @@ sub indexTopic {
}

# multi-valued types
elsif ($type =~ /^(checkbox|select|radio|textboxlist)/ ||
$name =~ /TopicType/) { # TODO: make this configurable
elsif ($isMultiValued || $name =~ /TopicType/) { # TODO: make this configurable

$doc->add_fields(
'field_'.$name.'_lst' => [split(/\s*,\s*/, $value)]
Expand Down Expand Up @@ -551,6 +556,11 @@ sub indexTopic {
sub getContentLanguage {
my ($this, $web, $topic) = @_;

unless (defined $Foswiki::cfg{SolrPlugin}{SupportedLanguages}) {
Foswiki::Func::writeWarning("{SolrPlugin}{SupportedLanguages} not defined. Please run configure.");
return;
}

my $donePush = 0;
if ($web ne $this->{session}{webName} || $topic ne $this->{session}{topicName}) {
Foswiki::Func::pushTopicContext($web, $topic);
Expand Down Expand Up @@ -578,7 +588,7 @@ sub extractOutgoingLinks {
$text = $this->takeOutBlocks($text, 'noautolink', $removed);

# normal wikiwords
$text =~ s#$STARTWW(?:($Foswiki::regex{webNameRegex})\.)?($Foswiki::regex{wikiWordRegex}|$Foswiki::regex{abbrevRegex})#$this->_addLink($outgoingLinks, $web, $topic, $1, $2)#gexom;
$text =~ s#(?:($Foswiki::regex{webNameRegex})\.)?($Foswiki::regex{wikiWordRegex}|$Foswiki::regex{abbrevRegex})#$this->_addLink($outgoingLinks, $web, $topic, $1, $2)#gexom;

# square brackets
$text =~ s#\[\[([^\]\[\n]+)\]\]#$this->_addLink($outgoingLinks, $web, $topic, undef, $1)#ge;
Expand All @@ -595,6 +605,7 @@ sub _addLink {

my $link = $web.".".$topic;
return '' if $link =~ /^http|ftp/; # don't index external links
return '' unless Foswiki::Func::topicExists($web, $topic);

$link =~ s/\%SCRIPTURL(PATH)?{.*?}\%\///g;
$link =~ s/%WEB%/$baseWeb/g;
Expand Down
2 changes: 2 additions & 0 deletions lib/Foswiki/Plugins/SolrPlugin/MANIFEST
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
locale/SolrPlugin/de.po 0644
locale/SolrPlugin/Foswiki.pot 0644
bin/solrsearch 0755
data/System/NatSkinWebChangesViewTemplate.txt 0644
data/System/PatternSkinSolrSearchViewTemplate.txt 0644
Expand Down
41 changes: 22 additions & 19 deletions lib/Foswiki/Plugins/SolrPlugin/Search.pm
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ sub handleSOLRSEARCH {
}
}


return $this->formatResponse($params, $theWeb, $theTopic, $response);
}

Expand Down Expand Up @@ -331,26 +330,27 @@ HERE

# date facets
elsif ($this->isDateField($facetID)) {
my $facet = $facets->{facet_dates}{$facetLabel};
my $facet = $facets->{facet_ranges}{$facetLabel};
next unless $facet;
$facet = $facet->{counts};

# count rows
my $len = 0;
foreach my $key (keys %$facet) { # SMELL: sorting lost in perl interface
next if $key =~ /^(gap|end|before)$/;
my $count = $facet->{$key};
for(my $i = 0; $i < scalar(@$facet); $i+=2) {
my $key = $facet->[$i];
my $count = $facet->[$i+1];
next unless $count;
next if $theFacetExclude && $key =~ /$theFacetExclude/;
next if $theFacetInclude && $key !~ /$theFacetInclude/;
$len++;
}

unless ($hideSingleFacets{$facetID} && $len <= 1) {
foreach my $key (reverse sort keys %$facet) { # SMELL: sorting lost in perl interface
my $count = $facet->{$key};
for(my $i = 0; $i < scalar(@$facet); $i+=2) {
my $key = $facet->[$i];
my $count = $facet->[$i+1];
next unless $count;
$key = $this->fromUtf8($key);
next if $key =~ /^(gap|end|before)$/;
next if $theFacetExclude && $key =~ /$theFacetExclude/;
next if $theFacetInclude && $key !~ /$theFacetInclude/;
$facetTotal += $count;
Expand Down Expand Up @@ -466,7 +466,7 @@ HERE

#$this->log("result=$result");

return $result;
return $this->toSiteCharSet($result);
}

##############################################################################
Expand Down Expand Up @@ -611,7 +611,6 @@ sub restSOLRAUTOCOMPLETE {
my $thePrefix;
my $foundPrefix = 0;


my $wikiUser = Foswiki::Func::getWikiName();
my @filter = $this->parseFilter($theFilter);
push(@filter, "(access_granted:$wikiUser OR access_granted:all)")
Expand All @@ -623,6 +622,8 @@ sub restSOLRAUTOCOMPLETE {
$theQuery =~ s/([$Foswiki::regex{lowerAlpha}])([$Foswiki::regex{upperAlpha}$Foswiki::regex{numeric}]+)/$1 $2/go;
$theQuery =~ s/([$Foswiki::regex{numeric}])([$Foswiki::regex{upperAlpha}])/$1 $2/go;

# work around solr not doing case-insensitive facet queries
$theQuery = lc($theQuery);

if ($theQuery =~ /^(.+) (.+?)$/) {
$theQuery = $1;
Expand Down Expand Up @@ -650,7 +651,7 @@ sub restSOLRAUTOCOMPLETE {

if ($theRaw) {
my $result = $response->raw_response->content()."\n\n";
$result = $this->fromUtf8($result);
#$result = $this->fromUtf8($result);
return $result;
}
$this->log($response->raw_response->content()) if DEBUG;
Expand All @@ -659,7 +660,7 @@ sub restSOLRAUTOCOMPLETE {
return '' unless $facets;

# format autocompletion
$theQuery = $this->fromUtf8($theQuery);
#$theQuery = $this->fromUtf8($theQuery);

my @result = ();
foreach my $facet (keys %{$facets->{facet_fields}}) {
Expand Down Expand Up @@ -1003,7 +1004,7 @@ sub doSearch {
# gather different types of filters
foreach my $item (@tmpfilter) {

if ($item =~ /^(.*):"?(.*?)"?$/) {
if ($item =~ /^(.*):(.*?)$/) {
my $facetName = $1;
my $facetValue = $2;

Expand Down Expand Up @@ -1199,12 +1200,14 @@ sub getFacetParams {
}

# date facets params
# TODO: provide general interface to range facets
if ($dateFacets) {
$solrParams->{"facet.date"} = $dateFacets;
$solrParams->{"facet.date.start"} = $params->{facetdatestart} || 'NOW/DAY-7DAYS';
$solrParams->{"facet.date.end"} = $params->{facetdateend} || 'NOW/DAY+1DAYS';
$solrParams->{"facet.date.gap"} = $params->{facetdategap} || '+1DAY';
$solrParams->{"facet.date.other"} = $params->{facetdateother} || 'before';
$solrParams->{"facet.range"} = $dateFacets;
$solrParams->{"facet.range.start"} = $params->{facetdatestart} || 'NOW/DAY-7DAYS';
$solrParams->{"facet.range.end"} = $params->{facetdateend} || 'NOW/DAY+1DAYS';
$solrParams->{"facet.range.gap"} = $params->{facetdategap} || '+1DAY';
$solrParams->{"facet.range.other"} = $params->{facetdateother} || 'before';
$solrParams->{"facet.range.hardend"} = 'true'; # TODO
}

$solrParams->{"facet.query"} = $queryFacets if $queryFacets;
Expand Down Expand Up @@ -1553,7 +1556,7 @@ sub parseFilter {
}
$item =~ s/\$field/$field/g;
$item =~ s/\$value/$value/g;
#print STDERR "...adding=$item\n";
#print STDERR "... adding=$item\n";
push(@filter, $item);
}
}
Expand Down
Loading

0 comments on commit 606e823

Please sign in to comment.