Skip to content

Commit

Permalink
Item8543: convert include, exclude, web, and search strings into a ni…
Browse files Browse the repository at this point in the history
…ce hash of array elements that we can then convert into a more efficient single query. much faster, but still need to re-order to reduce the set before the javascript is called

git-svn-id: http://svn.foswiki.org/trunk/MongoDBPlugin@6569 0b4bb1d4-4e5a-0410-9cc4-b2b747904278
  • Loading branch information
SvenDowideit authored and SvenDowideit committed Feb 28, 2010
1 parent a81197e commit 7328975
Showing 1 changed file with 95 additions and 65 deletions.
160 changes: 95 additions & 65 deletions lib/Foswiki/Store/SearchAlgorithms/MongoDB.pm
Expand Up @@ -78,12 +78,30 @@ sub query {
. scalar( @{ $query->{tokens} } ) . " : "
. join( ',', @{ $query->{tokens} } ) . "\n";

# AND search - search once for each token, ANDing result together
#TODO: this is stupid. suggested re-impl:
#TODO:
# the query & search functions in the query&search algo just _create_ the hash for the query
# and this is stored in the topic Set. When the topic set is 'evaluated' the query is sent (by the topic set)
# and from there the cursor is used.
#nonetheless, the rendering of 2000 results takes much longer than the querying, but as the 2 are on separate servers, everything is golden :)

my %elements;



#TODO: Mongo advanced query docco indicates taht /^a/ is faster than /^a.*/ and /^a.*$/ so should refactor to that.
my $includeTopicsRegex = Foswiki::Search::InfoCache::convertTopicPatternToRegex($options->{topic});
my $excludeTopicsRegex = Foswiki::Search::InfoCache::convertTopicPatternToRegex( $options->{excludetopic} );
if ($includeTopicsRegex ne '') {
push(@{$elements{_topic}}, { '$regex' => "$includeTopicsRegex" } );
}
if ($excludeTopicsRegex ne '') {
push(@{$elements{_topic}}, { '$not' => { '$regex' => "$excludeTopicsRegex" } } );
}

push(@{$elements{_web}}, $web );

my $casesensitive = defined($options->{casesensitive})?$options->{casesensitive}:1;

foreach my $token ( @{ $query->{tokens} } ) {

# flag for AND NOT search
Expand All @@ -101,97 +119,107 @@ sub query {
$searchString = quotemeta($searchString);
}

my $cursor =
doMongoSearch( $web, $options, '_topic', $searchString);

#TODO: this will go into the custom TopicSet
while ( my $topic = $cursor->next ) {
$topicMatches{ $topic->{_topic} } = 1;
if ($invertSearch) {
push(@{$elements{_topic}}, { '$not' => { '$regex' => "$searchString", '$options' => ($casesensitive? 'i' : '') } } );
} else {
push(@{$elements{_topic}}, { '$regex' => "$searchString", '$options' => ($casesensitive? 'i' : '') } );
}

}
print STDERR "after topic scope search\n";

# scope='text', e.g. grep search on topic text:
my $textMatches;
unless ( $options->{'scope'} eq 'topic' ) {
$textMatches =
search( $token, $web, $topicSet, $session->{store}, $options );
}

#bring the text matches into the topicMatch hash
if ($textMatches) {
@topicMatches{ keys %$textMatches } = values %$textMatches;
}
my $searchString = $token;
if ( $options->{type} && $options->{type} eq 'regex' ) {

my @scopeTextList = ();
if ($invertSearch) {
$topicSet->reset();
while ( $topicSet->hasNext() ) {
my $topic = $topicSet->next();
# Escape /, used as delimiter. This also blocks any attempt to use
# the search string to execute programs on the server.
$searchString =~ s!/!\\/!g;
}
else {

if ( $topicMatches{$topic} ) {
}
else {
push( @scopeTextList, $topic );
}
# Escape non-word chars in search string for plain text search
$searchString =~ s/(\W)/\\$1/g;
}
}
else {

#TODO: the sad thing about this is we lose info
@scopeTextList = keys(%topicMatches);
# Convert GNU grep \< \> syntax to \b
$searchString =~ s/(?<!\\)\\[<>]/\\b/g;
$searchString =~ s/^(.*)$/\\b$1\\b/go if $options->{'wordboundaries'};

if ($invertSearch) {
push(@{$elements{_text}}, { '$not' => { '$regex' => "$searchString", '$options' => ($casesensitive? 'i' : '') } } );
} else {
push(@{$elements{_text}}, { '$regex' => "$searchString", '$options' => ($casesensitive? 'i' : '') } );
}
}
} #end foreach

my $cursor =
doMongoSearch( $web, $options, \%elements);

$topicSet =
new Foswiki::Search::InfoCache( $Foswiki::Plugins::SESSION, $web,
\@scopeTextList );
my @answer;
while ( my $topic = $cursor->next ) {
push(@answer, $topic->{_topic});
}

$topicSet =
new Foswiki::Search::InfoCache( $Foswiki::Plugins::SESSION, $web,
\@answer );

return $topicSet;
}

sub doMongoSearch {
my $web = shift;
my $options = shift;
my $scope = shift;
my $searchString = shift;
my $casesensitive = defined($options->{casesensitive})?$options->{casesensitive}:1;
my $elements = shift;

print STDERR
"######## Search::MongoDB search ($web) tokens $searchString \n";
"######## Search::MongoDB search ($web) \n";
require Foswiki::Plugins::MongoDBPlugin;
require Foswiki::Plugins::MongoDBPlugin::DB;
my $collection =
Foswiki::Plugins::MongoDBPlugin::getMongoDB()->_getCollection('current');

my $mongoQuery = Tie::IxHash->new(
_web => $web,
$scope => { '$regex' => $searchString, '$options' => ($casesensitive? 'i' : '') }
);

#TODO: Mongo advanced query docco indicates taht /^a/ is faster than /^a.*/ and /^a.*$/ so should refactor to that.
my $includeTopicsRegex = Foswiki::Search::InfoCache::convertTopicPatternToRegex($options->{topic});
my $excludeTopicsRegex = Foswiki::Search::InfoCache::convertTopicPatternToRegex( $options->{excludeTopics} );
#don't need the IxHash here, but when we come to use MapReduce we will.
#my $mongoQuery = Tie::IxHash->new(
# _web => $web,
# );
my %mongoQuery = ();
my $mongoJavascriptFunc = '';
my $counter = 1;

#Its probably more productive to convert the inc and excl into SearchString bits and pushing it all through the same loops.
#in fact, what if I conert it all to javascript - that can be used in mapreduce later too,
# and then 'guess' which term might give the most performance to extract into the non js query portions.
if ((($scope eq '_topic')and (($includeTopicsRegex ne '') or ($excludeTopicsRegex ne ''))) or
(($includeTopicsRegex ne '') and ($excludeTopicsRegex ne ''))){
$mongoQuery->Push('$where' => 'function() {
'.convertQueryToJavascript('excludetopics','_topic', $excludeTopicsRegex, '', '!' ).'
'.convertQueryToJavascript('includetopics','_topic', $includeTopicsRegex, '', '' ).'
}');
} else {
if ($includeTopicsRegex ne '') {
$mongoQuery->Push( _topic => { '$regex' => "$includeTopicsRegex" } );
} elsif ($includeTopicsRegex ne '') {
$mongoQuery->Push( _topic => { '$not' => { '$regex' => "$excludeTopicsRegex" } } );
#pop off the first query element foreach scope and use that literally
foreach my $scope (keys(%{$elements})) {
foreach my $elem (@{$elements->{$scope}}) {
if (!defined($mongoQuery{$scope})) {
$mongoQuery{$scope} = $elem;
} else {
my $not = $elem->{'$not'};
if (defined($not)) {
$elem = $not;
$not = '!';
}
my $casesensitive = $elem->{'$options'};
my $reg = $elem->{'$regex'};
$mongoJavascriptFunc .= convertQueryToJavascript('query'.$counter,
$scope,
$reg,
$casesensitive,
$not );
$counter++;
}
}
}
if ($counter > 1) {
$mongoJavascriptFunc = 'function() {'.
$mongoJavascriptFunc.
'return (1==1);}';
$mongoQuery{'$where'} = $mongoJavascriptFunc;
print STDERR "------$mongoJavascriptFunc\n";
}

my $cursor = $collection->query($mongoQuery);
my $cursor = $collection->query(\%mongoQuery);

print STDERR "found " . $cursor->count . "\n";

Expand All @@ -204,15 +232,17 @@ sub convertQueryToJavascript {
my $regex = shift;
my $regexoptions = shift || '';
my $not = shift || '';
my $invertedNot = ($not eq '!')?'' : '!';


return '' if ($regex eq '');

return <<"HERE";
{
$name = /$regex/$regexoptions ;
matched = $name.test(this.$scope);
if (matched) {
return $not(matched);
if ($invertedNot(matched)) {
return (1==0);
}
}
HERE
Expand Down

0 comments on commit 7328975

Please sign in to comment.