Skip to content

Commit

Permalink
simple stopwords support
Browse files Browse the repository at this point in the history
git-svn-id: http://code.phpbb.com/svn/sphinx/trunk@10 058e8d10-4797-47b5-994a-8fa6acaf3df0
  • Loading branch information
naderman committed Feb 29, 2008
1 parent 95e4777 commit ff01bf0
Show file tree
Hide file tree
Showing 4 changed files with 267 additions and 5 deletions.
251 changes: 251 additions & 0 deletions sphinx_plugin/config/sphinx_stopwords.txt
@@ -0,0 +1,251 @@
a
about
after
ago
all
almost
along
alot
also
am
an
and
answer
any
anybody
anybodys
anywhere
are
arent
around
as
ask
askd
at
bad
be
because
been
before
being
best
better
between
big
btw
but
by
can
cant
come
could
couldnt
day
days
days
did
didnt
do
does
doesnt
dont
down
each
etc
either
else
even
ever
every
everybody
everybodys
everyone
far
find
for
found
from
get
go
going
gone
good
got
gotten
had
has
have
havent
having
her
here
hers
him
his
home
how
hows
href
I
Ive
if
in
ini
into
is
isnt
it
its
its
just
know
large
less
like
liked
little
looking
look
looked
looking
lot
maybe
many
me
more
most
much
must
mustnt
my
near
need
never
new
news
no
none
not
nothing
now
of
off
often
old
on
once
only
oops
or
other
our
ours
out
over
page
please
put
question
questions
questioned
quote
rather
really
recent
said
saw
say
says
she
see
sees
should
sites
small
so
some
something
sometime
somewhere
soon
take
than
true
thank
that
thatd
thats
the
their
theirs
theres
theirs
them
then
there
these
they
theyll
theyd
theyre
this
those
though
through
thus
time
times
to
too
under
until
untrue
up
upon
use
users
version
very
via
want
was
way
we
well
went
were
werent
what
when
where
which
who
whom
whose
why
wide
will
with
within
without
wont
world
worse
worst
would
wrote
www
yes
yet
you
youd
youll
your
youre
yours
AFAIK
IIRC
LOL
ROTF
ROTFLMAO
YMMV
10 changes: 9 additions & 1 deletion sphinx_plugin/includes/search/fulltext_sphinx.php
Expand Up @@ -253,7 +253,7 @@ function config_updated()
array('source', "source_phpbb_{$this->id}_main"),
array('docinfo', 'extern'),
array('morphology', 'none'),
array('stopwords', (isset($config['fulltext_sphinx_stop_words_file'])) ? $config['fulltext_sphinx_stop_words_file'] : ''),
array('stopwords', (file_exists($config['fulltext_sphinx_config_path'] . 'sphinx_stopwords.txt')) ? $config['fulltext_sphinx_config_path'] . 'sphinx_stopwords.txt' : ''),
array('min_word_len', '2'),
array('charset_type', 'utf-8'),
array('charset_table', 'U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z, A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6, U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101, U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109, U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F, U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115, U+0116->U+0117, U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D, U+011D, U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133, U+0134->U+0135, U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C, U+013C, U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142, U+0143->U+0144, U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B, U+014B, U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151, U+0152->U+0153, U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159, U+0159, U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F, U+0160->U+0161, U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167, U+0167, U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D, U+016E->U+016F, U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175, U+0175, U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, U+017B->U+017C, U+017C, U+017D->U+017E, U+017E, U+4E00..U+9FFF'),
Expand Down Expand Up @@ -1104,11 +1104,19 @@ function acp()
}
}

/**
* @todo check whether stopwords are really activated/make it an option
*/

$tpl = '
<dl>
<dt><label for="fulltext_sphinx_config_path">' . $user->lang['FULLTEXT_SPHINX_CONFIG_PATH'] . ':</label><br /><span>' . $user->lang['FULLTEXT_SPHINX_CONFIG_PATH_EXPLAIN'] . '</span></dt>
<dd><input id="fulltext_sphinx_config_path" type="text" size="40" maxlength="255" name="config[fulltext_sphinx_config_path]" value="' . $config['fulltext_sphinx_config_path'] . '" /></dd>
</dl>
<dl>
<dt><label for="fulltext_sphinx_data_path">' . $user->lang['FULLTEXT_SPHINX_STOPWORDS_FILE'] . ':</label><br /><span>' . $user->lang['FULLTEXT_SPHINX_STOPWORDS_FILE_EXPLAIN'] . '</span></dt>
<dd><strong>' . ((file_exists($config['fulltext_sphinx_config_path'] . 'sphinx_stopwords.txt')) ? $user->lang['YES'] : $user->lang['NO']) . '</strong></dd>
</dl>
<dl>
<dt><label for="fulltext_sphinx_data_path">' . $user->lang['FULLTEXT_SPHINX_DATA_PATH'] . ':</label><br /><span>' . $user->lang['FULLTEXT_SPHINX_DATA_PATH_EXPLAIN'] . '</span></dt>
<dd><input id="fulltext_sphinx_data_path" type="text" size="40" maxlength="255" name="config[fulltext_sphinx_data_path]" value="' . $config['fulltext_sphinx_data_path'] . '" /></dd>
Expand Down
9 changes: 6 additions & 3 deletions sphinx_plugin/language/en/mods/fulltext_sphinx.php
Expand Up @@ -35,19 +35,22 @@
'FULLTEXT_SPHINX_BIN_PATH_EXPLAIN' => 'If this path could not be determined automatically you have to enter the path to the directory in which the sphinx executables <samp>indexer</samp> and <samp>searchd</samp> reside.',
'FULLTEXT_SPHINX_CONFIG_PATH' => 'Path to configuration directory',
'FULLTEXT_SPHINX_CONFIG_PATH_EXPLAIN' => 'You should create this config directory outside the web accessable directories. It has to be writable by the user as which your webserver is running (often www-data or nobody).',
'FULLTEXT_SPHINX_CONFIGURE_FIRST' => 'Before you create an index you have to enable and configure sphinx under GENERAL -> SERVER CONFIGURATION -> Search settings.',
'FULLTEXT_SPHINX_DATA_PATH' => 'Path to data directory',
'FULLTEXT_SPHINX_DATA_PATH_EXPLAIN' => 'You should create this directory outside the web accessable directories. It has to be writable by the user as which your webserver is running (often www-data or nobody). It will be used to store the indexes and log files.',
'FULLTEXT_SPHINX_DELTA_POSTS' => 'Number of posts in frequently updated delta index',
'FULLTEXT_SPHINX_DIRECTORY_NOT_FOUND' => 'The directory <strong>%s</strong> does not exist. Please correct your path settings.',
'FULLTEXT_SPHINX_FILE_NOT_EXECUTABLE' => 'The file <strong>%s</strong> is not executable for the webserver.',
'FULLTEXT_SPHINX_FILE_NOT_FOUND' => 'The file <strong>%s</strong> does not exist. Please correct your path settings.',
'FULLTEXT_SPHINX_FILE_NOT_WRITABLE' => 'The file <strong>%s</strong> cannot be written by the webserver.',
'FULLTEXT_SPHINX_LAST_SEARCHES' => 'Recent search queries',
'FULLTEXT_SPHINX_MAIN_POSTS' => 'Number of posts in main index',
'FULLTEXT_SPHINX_PORT' => 'Sphinx search deamon port',
'FULLTEXT_SPHINX_PORT_EXPLAIN' => 'Port on which the sphinx search deamon on localhost listens. Leave empty to use the default 3312',
'FULLTEXT_SPHINX_REQUIRES_EXEC' => 'The sphinx plugin for phpBB requires PHP’s <code>exec</code> function which is disabled on your system.',
'FULLTEXT_SPHINX_WRONG_DATABASE' => 'The sphinx plugin for phpBB currently only supports MySQL',
'FULLTEXT_SPHINX_MAIN_POSTS' => 'Number of posts in main index',
'FULLTEXT_SPHINX_DELTA_POSTS' => 'Number of posts in frequently updated delta index',
'FULLTEXT_SPHINX_LAST_SEARCHES' => 'Recent search queries',
'FULLTEXT_SPHINX_STOPWORDS_FILE' => 'Stopwords activated',
'FULLTEXT_SPHINX_STOPWORDS_FILE_EXPLAIN' => 'You can place a file called sphinx_stopwords.txt containing one word in each line in your config directory. If this file is present these words will be excluded from the indexing process.',
));

?>
2 changes: 1 addition & 1 deletion sphinx_plugin/scripts/create_links
Expand Up @@ -13,7 +13,7 @@ if [ "$1" = "--help" -o "$1" = "-h" -o "$1" == "" ]; then
fi

if [ "$plugin_path" == "" ]; then
plugin_path="."
plugin_path="../"
fi

if [ "$phpbb_path" == "" ]; then
Expand Down

0 comments on commit ff01bf0

Please sign in to comment.