Skip to content

Commit

Permalink
Item14815: add a field macro to the solr index
Browse files Browse the repository at this point in the history
Item14806: Solr too much unrelated results

Item14807: Solr language detection does not work

i.e.:

   * improved language detection in solr
   * added fields =name_std= and =name_search= for better searchability of attachments
   * added field =macro= to capture use of wiki macros
   * don't display wiki markup in search result summaries
   * reduce amount of presumably unrelated search results
  • Loading branch information
MichaelDaum committed Jan 31, 2019
1 parent abcadcd commit f14b99d
Show file tree
Hide file tree
Showing 67 changed files with 1,748 additions and 1,308 deletions.
3 changes: 2 additions & 1 deletion data/System/PatternSkinSolrSearchViewTemplate.txt
@@ -1,7 +1,8 @@
%META:TOPICINFO{author="ProjectContributor" comment="" date="1505725909" format="1.1" version="1"}%
%META:TOPICINFO{author="ProjectContributor" comment="" date="1548932840" format="1.1" version="1"}%
%META:TOPICPARENT{name="SolrSearchBase"}%
%{<verbatim class="tml">}%
%TMPL:INCLUDE{"SolrSearchBase"}%
%TMPL:DEF{"bodyclassname"}%patternSolrPage%TMPL:END%
%{</verbatim>}%

%{<verbatim class="tml">}%
Expand Down
3 changes: 2 additions & 1 deletion data/System/PatternSkinWebChangesViewTemplate.txt
@@ -1,6 +1,7 @@
%META:TOPICINFO{author="ProjectContributor" comment="" date="1505725909" format="1.1" version="1"}%
%META:TOPICINFO{author="ProjectContributor" comment="" date="1548932840" format="1.1" version="1"}%
%{<verbatim class="tml">}%
%TMPL:INCLUDE{"WebChangesView"}%
%TMPL:DEF{"bodyclassname"}%patternSolrPage%TMPL:END%
%{</verbatim>}%

%{<verbatim class="tml">}%
Expand Down
26 changes: 17 additions & 9 deletions data/System/SolrPlugin.txt
@@ -1,4 +1,4 @@
%META:TOPICINFO{author="ProjectContributor" comment="" date="1539266447" format="1.1" version="1"}%
%META:TOPICINFO{author="ProjectContributor" comment="" date="1548932840" format="1.1" version="1"}%
---+ Solr Plugin
%FORMFIELD{"Description"}%

Expand Down Expand Up @@ -238,7 +238,7 @@ The service is configured by placing below configuration script at =/etc/iwatch/
<watchlist>
<title>Foswiki</title>
<contactpoint email="root@localhost" name="Administrator"/>
<path type="recursive" filter=".*\.txt$" alert="off" syslog="on" exec="su -l <httpd-user> -c '<foswiki-dir>/tools/solrjob'"><foswiki-dir>/data</path>
<path type="recursive" filter=".*\.txt$" alert="off" syslog="on" exec="su <httpd-user> '<foswiki-dir>/tools/solrjob'"><foswiki-dir>/data</path>
<path type="regexception">\.tmp|\.sw\w|\.svn|\.lease|\.lock|,$|\.changes|,v|^_[0-9]|^log|^Temporary|^UnitTestCheck</path>
</watchlist>
</config>
Expand All @@ -257,12 +257,12 @@ For !VirtualHostingContrib use:
<contactpoint email="root@localhost" name="Administrator"/>

<!-- watch directories shared among all virtual domains -->
<path type="recursive" filter=".*\.txt$" alert="off" syslog="on" exec="sudo -u <httpd-user> <foswiki-dir>/tools/solrjob --host all"><foswiki-dir>/data/System</path>
<!-- <path type="recursive" filter=".*\.txt$" alert="off" syslog="on" exec="sudo -u <httpd-user> <foswiki-dir>/tools/solrjob --host all"><foswiki-dir>/data/Applications</path> -->
<path type="recursive" filter=".*\.txt$" alert="off" syslog="on" exec="su <httpd-user> -c '<foswiki-dir>/tools/solrjob --host all'"><foswiki-dir>/data/System</path>
<!-- <path type="recursive" filter=".*\.txt$" alert="off" syslog="on" exec="su <httpd-user> -c '<foswiki-dir>/tools/solrjob --host all'"><foswiki-dir>/data/Applications</path> -->

<!-- watch each virtual domain for changes -->
<path type="recursive" filter=".*\.txt$" alert="off" syslog="on" exec="su -l <httpd-user> -c '<foswiki-dir>/tools/solrjob --host <domain1>'"><vhosts-dir>/<domain1>/data</path>
<!-- <path type="recursive" filter=".*\.txt$" alert="off" syslog="on" exec="su -l <httpd-user> -c '<foswiki-dir>/tools/solrjob --host <domain2>'"><vhosts-dir>/<domain2>/data</path> -->
<path type="recursive" filter=".*\.txt$" alert="off" syslog="on" exec="su <httpd-user> -c '<foswiki-dir>/tools/solrjob --host <domain1'>"><vhosts-dir>/<domain1>/data</path>
<!-- <path type="recursive" filter=".*\.txt$" alert="off" syslog="on" exec="su <httpd-user> -c '<foswiki-dir>/tools/solrjob --host <domain2'>"><vhosts-dir>/<domain2>/data</path> -->

<path type="regexception">\.tmp|\.sw\w|\.svn|\.lease|\.lock|,$|\.changes|,v|^_[0-9]|^log|^Temporary|^UnitTestCheck</path>
</watchlist>
Expand Down Expand Up @@ -721,6 +721,7 @@ useful for spatial search.
| icon | string | | stored | icon to indetify the rendition for this document |
| id | string | | stored | unique identifier for each document; this is the _external_ id usable in applications; there's an internal solr document id not related to this field |
| language | string | | stored | language of the current document; this may be specified explicitly using the =CONTENT_LANGUAGE= preference, or set to "detect" to let the solr update chain detect the language automatically |
| macro | string | multivalued | | list of wiki macros being used in this topic |
| name | string | | stored | filename of an attachment |
| outgoing | string | multivalued | stored | list of all outgoing links; this information is used to detect backlinks |
| parent | string | | stored | parent topic of the current topic |
Expand Down Expand Up @@ -830,6 +831,8 @@ specific search applications. The destination fields are then analysed using the
| name | charnorm |
| name | phonetic |
| name | spell |
| name | name_std |
| name | name_search |
| tag | catchall |
| tag | charnorm |
| tag | phonetic |
Expand Down Expand Up @@ -882,6 +885,11 @@ specific search applications. The destination fields are then analysed using the

---++ Change History
%TABLE{columnwidths="7em" tablewidth="100%"}%
| 31 Jan 2019: | reduce amount of presumably unrelated search results; \
improved language detection in solr; \
added fields =name_std= and =name_search= for better searchability of attachments; \
don't display wiki markup in search result summaries; \
added field =macro= to capture use of wiki macros |
| 10 Oct 2018: | mime types are now multivalued, e.g. and image is now tagged type: =["gif", "image", "attachment"]=; \
better support for attachments listed in the autosuggest drop down box; \
the rudimentary type mapping is now based on the system mime types table and not using a typemap file in solr's config anymore; \
Expand Down Expand Up @@ -1024,9 +1032,9 @@ specific search applications. The destination fields are then analysed using the
%META:FIELD{name="Release" title="Release" value="%25$RELEASE%25"}%
%META:FIELD{name="Description" title="Description" value="%25$SHORTDESCRIPTION%25"}%
%META:FIELD{name="Repository" title="Repository" value="https://github.com/foswiki/SolrPlugin"}%
%META:FIELD{name="Copyright" title="Copyright" value="&copy; 2009-2018, Michael Daum http://michaeldaumconsulting.com"}%
%META:FIELD{name="Copyright" title="Copyright" value="&copy; 2009-2019, Michael Daum http://michaeldaumconsulting.com"}%
%META:FIELD{name="License" title="License" value="GPL ([[http://www.gnu.org/copyleft/gpl.html][GNU General Public License]])"}%
%META:FIELD{name="Home" title="Home" value="Foswiki:Extensions/SolrPlugin"}%
%META:FIELD{name="Support" title="Support" value="Foswiki:Support/SolrPlugin"}%
%META:FILEATTACHMENT{name="SolrPluginSnap1.png" attr="" comment="" date="1539266447" size="93552" user="ProjectContributor" version="1"}%
%META:FILEATTACHMENT{name="SolrPluginSnap2.png" attr="" comment="" date="1539266447" size="158013" user="ProjectContributor" version="1"}%
%META:FILEATTACHMENT{name="SolrPluginSnap1.png" attr="" comment="" date="1548932840" size="93552" user="ProjectContributor" version="1"}%
%META:FILEATTACHMENT{name="SolrPluginSnap2.png" attr="" comment="" date="1548932840" size="158013" user="ProjectContributor" version="1"}%
11 changes: 7 additions & 4 deletions data/System/SolrSearchBaseTemplate.txt
@@ -1,4 +1,4 @@
%META:TOPICINFO{author="ProjectContributor" comment="" date="1539265770" format="1.1" version="1"}%
%META:TOPICINFO{author="ProjectContributor" comment="" date="1548932840" format="1.1" version="1"}%
%META:TOPICPARENT{name="SolrPlugin"}%
%{<verbatim class="tml">}%
%{ ###########################################################################
Expand All @@ -13,13 +13,15 @@
}%
%TMPL:DEF{"solr::defaultweb"}%all%TMPL:END%
%TMPL:DEF{"solr::instantsearch"}%false%TMPL:END%
%TMPL:DEF{"solr::defaultexactsearch"}%false%TMPL:END%
%TMPL:DEF{"solr::defaultrows"}%10%TMPL:END%
%TMPL:DEF{"solr::defaultquerytype"}%edismax%TMPL:END%
%TMPL:DEF{"solr::defaultsort"}%score desc%TMPL:END%
%TMPL:DEF{"solr::dateformat"}%dddd, Do MMMM YYYY, HH:mm%TMPL:END%
%TMPL:DEF{"solr::fields"}%%TMPL:END%
%TMPL:DEF{"solr::queryfields"}%%TMPL:END%
%TMPL:DEF{"solr::extrafilter"}%<literal>%SOLR_EXTRAFILTER{default=""}%</literal> %IF{ "not $USERNAME ingroup 'AdminGroup'" then="-web_search:Applications" }%%TMPL:END%
%TMPL:DEF{"solr::exactsearch"}%title_std^7 catchall%TMPL:END%
%TMPL:DEF{"solr::facet::include::web"}%%TMPL:END%
%TMPL:DEF{"solr::facet::exclude::web"}%^(_.*%IF{ "not $USERNAME ingroup 'AdminGroup'" then="|Application.*" }%)$%TMPL:END%
%{</verbatim>}%
Expand All @@ -29,7 +31,7 @@
content: main definition that puts together all pieces
}%
%TMPL:DEF{"content"}%<noautolink>
<div id='solrSearch' style='display:none' data-more-fields='%TMPL:P{"solr::fields"}%' %FORMATLIST{"%TMPL:P{"solr::queryfields"}%" header="data-query-fields='" footer="'" separator=" " split=" "}% data-solr-url='%SCRIPTURL{"rest"}%/SolrPlugin/proxy' data-solr-params='{"topic": "%WEB%.%TOPIC%", "qt":"%TMPL:P{"solr::defaultquerytype"}%", "rows": %TMPL:P{"solr::defaultrows"}%}' data-extra-filter='%TMPL:P{"solr::extrafilter"}%'>
<div id='solrSearch' style='display:none' data-more-fields='%TMPL:P{"solr::fields"}%' %FORMATLIST{"%TMPL:P{"solr::queryfields"}%" header="data-query-fields='" footer="'" separator=" " split=" "}% data-solr-url='%SCRIPTURL{"rest"}%/SolrPlugin/proxy' data-solr-params='{"topic": "%WEB%.%TOPIC%", "qt":"%TMPL:P{"solr::defaultquerytype"}%", "rows": %TMPL:P{"solr::defaultrows"}% %IF{"'%TMPL:P{"solr::defaultexactsearch"}%'='true'" then=", \"qf\": \"$percntTMPL:P{\"solr::exactsearch\"}$percnt\""}%}' data-extra-filter='%TMPL:P{"solr::extrafilter"}%' >
%TMPL:P{"solr::title"}%%{}%
%TMPL:P{"solr::header"}%%{}%
%TMPL:P{"solr::hits"}%%{}%
Expand Down Expand Up @@ -68,7 +70,7 @@
%FLEXWEBLIST{
webs="public"
exclude="Trash.*"
format="\"$web\": \"$title\""
format="\"$web\": \"$percntENCODE{\"$title\" type=\"quote\"}$percnt\""
subheader=",$n"
separator=",$n"
}%,
Expand Down Expand Up @@ -819,7 +821,7 @@
solr::input::checkboxes::exactsearch - display a "Exact Search" checkbox
}%
%TMPL:DEF{"solr::input::checkboxes::exactsearch"}%%{}%
<span class='solrToggle' data-field='qf' data-value='title_std^7 catchall' data-title='%MAKETEXT{"Exact search"}%'></span>
<span class='solrToggle' data-field='qf' data-value='%TMPL:P{"solr::exactsearch"}%' data-title='%MAKETEXT{"Exact search"}%'></span>
%{}%%TMPL:END%
%{</verbatim>}%

Expand Down Expand Up @@ -851,4 +853,5 @@
</div>
%{</verbatim>}%

%META:PREFERENCE{name="ALLOWTOPICVIEW" title="ALLOWTOPICVIEW" type="Set" value="*"}%
%META:PREFERENCE{name="PERMSET_VIEW" title="PERMSET_VIEW" type="Local" value="everybody"}%
6 changes: 3 additions & 3 deletions lib/Foswiki/Plugins/SolrPlugin.pm
@@ -1,6 +1,6 @@
# Plugin for Foswiki - The Free and Open Source Wiki, http://foswiki.org/
#
# Copyright (C) 2009-2018 Michael Daum http://michaeldaumconsulting.com
# Copyright (C) 2009-2019 Michael Daum http://michaeldaumconsulting.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand Down Expand Up @@ -30,8 +30,8 @@ BEGIN {
}
}

our $VERSION = '7.20';
our $RELEASE = '11 Oct 2018';
our $VERSION = '7.30';
our $RELEASE = '31 Jan 2019';
our $SHORTDESCRIPTION = 'Enterprise Search Engine for Foswiki based on Solr';
our $NO_PREFS_IN_TOPIC = 1;
our %searcher;
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Plugins/SolrPlugin/Autosuggest.pm
@@ -1,6 +1,6 @@
# Plugin for Foswiki - The Free and Open Source Wiki, http://foswiki.org/
#
# Copyright (C) 2013-2018 Michael Daum http://michaeldaumconsulting.com
# Copyright (C) 2013-2019 Michael Daum http://michaeldaumconsulting.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand Down
59 changes: 50 additions & 9 deletions lib/Foswiki/Plugins/SolrPlugin/Base.pm
@@ -1,6 +1,6 @@
# Plugin for Foswiki - The Free and Open Source Wiki, http://foswiki.org/
#
# Copyright (C) 2009-2018 Michael Daum http://michaeldaumconsulting.com
# Copyright (C) 2009-2019 Michael Daum http://michaeldaumconsulting.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand Down Expand Up @@ -332,6 +332,14 @@ sub getTopicSummary {
$summary = $field->{value} if $field && $field->{value};
}

# unless ($summary) {
# $summary = $this->getSection($web, $topic, $text, "teaser");
# }
#
# unless ($summary) {
# $summary = $this->getSection($web, $topic, $text, "summary");
# }

return '' unless defined $summary;

$summary = $this->plainify($summary, $web, $topic);
Expand All @@ -340,6 +348,38 @@ sub getTopicSummary {
return $summary;
}

################################################################################
sub getSection {
my ($this, $web, $topic, $text, $name, $type) = @_;

return unless $name;

my $key = $web.'::'.$topic;
my $sections = $this->{_sections}{$key};
unless (defined $sections) {
$text = Foswiki::Func::readTopic($web, $topic) unless defined $text;

my $ntext;
($ntext, $sections) = Foswiki::parseSections($text); # SMELL: parseSection should be part of Foswiki::Func

foreach my $s (@$sections) {
$s->{text} = substr($ntext, $s->{start}, $s->{end} - $s->{start});
}


$this->{_sections}{$key} = $sections;
}
return unless defined $sections;

foreach my $s (@$sections) {
next if $s->{name} ne $name;
next if $type && $s->{type} ne $type;
return $s->{text}
}

return;
}

################################################################################
# wrapper around Foswiki::Func::getScriptUrlPath
# that really, _really_, __really__ returns a relative path even when
Expand Down Expand Up @@ -376,9 +416,10 @@ sub plainify {
$text =~ s/%TOPIC%/$topic/g;
$text =~ s/%WIKITOOLNAME%/$wtn/g;

# don't remove ALL macros, only some, todo: add some more
# $text =~ s/%$Foswiki::regex{tagNameRegex}({.*?})?%//g;
$text =~ s/%(?:STARTSECTION|BEGINSECTION|ENDSECTION|STOPSECTION|STARTINCLUDE|STOPINCLUDE|TOC|JQICON|FORMFIELD|CLEAR|SCRIPTURLPATH|SCRIPTURL|TWISTY|BUTTON)(?:\{.*?\})?%//g;
while ($text =~ s/((?:%|\$perce?nt)$Foswiki::regex{tagNameRegex}(?:\{.*?\})?(?:%|\$perce?nt))//gs) {
# nop
}
$text =~ s/["']?\}%["']?|["']?%\{["']?//g; # some leftsobers

# Format e-mail to add spam padding (HTML tags removed later)
$text =~ s/$STARTWW((mailto\:)?[a-zA-Z0-9-_.+]+@[a-zA-Z0-9-_.]+\.[a-zA-Z0-9-_]+)$ENDWW//gm;
Expand Down Expand Up @@ -506,11 +547,6 @@ sub getMappedMimeType {
$type = 'chart';
}

# documents
elsif ($subType =~ /document|ms\-?word|rtf/) {
$type = 'document';
}

# presentation
elsif ($subType =~ /powerpoint|presentation|slide/) {
$type = 'presentation';
Expand All @@ -521,6 +557,11 @@ sub getMappedMimeType {
$type = 'spreadsheet';
}

# documents
elsif ($subType =~ /document|ms\-?word|rtf/) {
$type = 'document';
}

# pdf
elsif ($subType =~ /pdf|postscript/) {
$type = 'pdf';
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Plugins/SolrPlugin/Crawler.pm
@@ -1,6 +1,6 @@
# Plugin for Foswiki - The Free and Open Source Wiki, http://foswiki.org/
#
# Copyright (C) 2012-2018 Michael Daum http://michaeldaumconsulting.com
# Copyright (C) 2012-2019 Michael Daum http://michaeldaumconsulting.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand Down
@@ -1,6 +1,6 @@
# Plugin for Foswiki - The Free and Open Source Wiki, http://foswiki.org/
#
# Copyright (C) 2012-2018 Michael Daum http://michaeldaumconsulting.com
# Copyright (C) 2012-2019 Michael Daum http://michaeldaumconsulting.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand Down

0 comments on commit f14b99d

Please sign in to comment.