Permalink
Browse files

Support XPath method to do a Porter stem operation on text

  • Loading branch information...
hughsie committed Nov 6, 2018
1 parent aa925c2 commit ac6e6a2a62ce7fe4c4912d5e34346919e3b500af
Showing with 102 additions and 2 deletions.
  1. +8 −0 meson.build
  2. +1 −0 meson_options.txt
  3. +14 −1 src/xb-self-test.c
  4. +79 −1 src/xb-silo.c
@@ -114,6 +114,14 @@ libxmlb_deps = [
uuid,
]
# support stemming of search tokens
if get_option('stemmer')
cc = meson.get_compiler('c')
stemmer = cc.find_library('stemmer')
libxmlb_deps += stemmer
conf.set('HAVE_LIBSTEMMER', 1)
endif
gnome = import('gnome')
conf.set_quoted('PACKAGE_NAME', meson.project_name())
@@ -1,3 +1,4 @@
option('gtkdoc', type : 'boolean', value : true, description : 'enable developer documentation')
option('introspection', type : 'boolean', value : true, description : 'generate GObject Introspection data')
option('tests', type : 'boolean', value : true, description : 'enable tests')
option('stemmer', type : 'boolean', value : true, description : 'enable stemmer support')
@@ -232,7 +232,8 @@ xb_predicate_func (void)
NULL
};
xb_machine_set_debug_flags (xb_silo_get_machine (silo),
XB_MACHINE_DEBUG_FLAG_SHOW_STACK);
XB_MACHINE_DEBUG_FLAG_SHOW_STACK |
XB_MACHINE_DEBUG_FLAG_SHOW_PARSING);
for (guint i = 0; tests[i].pred != NULL; i++) {
g_autofree gchar *str = NULL;
g_autoptr(GError) error = NULL;
@@ -1246,6 +1247,18 @@ xb_xpath_func (void)
g_assert_cmpstr (xb_node_get_text (n), ==, "gimp.desktop");
g_clear_object (&n);
/* query with stem */
xb_machine_set_debug_flags (xb_silo_get_machine (silo),
XB_MACHINE_DEBUG_FLAG_SHOW_STACK |
XB_MACHINE_DEBUG_FLAG_SHOW_PARSING);
n = xb_silo_query_first (silo, "components/component/id[text()~=stem('gimping')]", &error);
g_assert_no_error (error);
g_assert_nonnull (n);
g_assert_cmpstr (xb_node_get_text (n), ==, "gimp.desktop");
g_clear_object (&n);
xb_machine_set_debug_flags (xb_silo_get_machine (silo),
XB_MACHINE_DEBUG_FLAG_SHOW_STACK);
/* query with text:integer */
n = xb_silo_query_first (silo, "components/component/id['123'=123]", &error);
g_assert_no_error (error);
@@ -12,11 +12,15 @@
#include <glib-object.h>
#include <gio/gio.h>
#ifdef HAVE_LIBSTEMMER
#include <libstemmer.h>
#endif
#include "xb-builder.h"
#include "xb-node-private.h"
#include "xb-opcode-private.h"
#include "xb-silo-private.h"
#include "xb-stack.h"
#include "xb-stack-private.h"
#include "xb-string-private.h"
typedef struct {
@@ -36,6 +40,10 @@ typedef struct {
XbMachine *machine;
XbSiloProfileFlags profile_flags;
GString *profile_str;
#ifdef HAVE_LIBSTEMMER
struct sb_stemmer *stemmer_ctx;
GMutex stemmer_mutex;
#endif
} XbSiloPrivate;
typedef struct {
@@ -89,6 +97,37 @@ xb_silo_add_profile (XbSilo *self, GTimer *timer, const gchar *fmt, ...)
g_timer_reset (timer);
}
/* private */
static gchar *
xb_silo_stem (XbSilo *self, const gchar *value)
{
XbSiloPrivate *priv = GET_PRIVATE (self);
#ifdef HAVE_LIBSTEMMER
const gchar *tmp;
gsize len_dst;
gsize len_src;
g_autofree gchar *value_casefold = NULL;
g_autoptr(GMutexLocker) locker = g_mutex_locker_new (&priv->stemmer_mutex);
/* not enabled */
value_casefold = g_utf8_casefold (value, -1);
if (priv->stemmer_ctx == NULL)
return g_steal_pointer (&value_casefold);
/* stem */
len_src = strlen (value_casefold);
tmp = (const gchar *) sb_stemmer_stem (priv->stemmer_ctx,
(guchar *) value_casefold,
(gint) len_src);
len_dst = (gsize) sb_stemmer_length (priv->stemmer_ctx);
if (len_src == len_dst)
return g_steal_pointer (&value_casefold);
return g_strndup (tmp, len_dst);
#else
return g_utf8_casefold (value, -1);
#endif
}
/* private */
const gchar *
xb_silo_from_strtab (XbSilo *self, guint32 offset)
@@ -958,6 +997,33 @@ xb_silo_machine_func_attr_cb (XbMachine *self,
return TRUE;
}
static gboolean
xb_silo_machine_func_stem_cb (XbMachine *self,
XbStack *stack,
gboolean *result,
gpointer user_data,
gpointer exec_data,
GError **error)
{
XbSilo *silo = XB_SILO (user_data);
g_autoptr(XbOpcode) op = xb_machine_stack_pop (self, stack);
/* TEXT */
if (xb_opcode_cmp_str (op)) {
const gchar *str = xb_opcode_get_str (op);
xb_machine_stack_push_text_steal (self, stack, xb_silo_stem (silo, str));
return TRUE;
}
/* fail */
g_set_error (error,
G_IO_ERROR,
G_IO_ERROR_NOT_SUPPORTED,
"%s type not supported",
xb_opcode_kind_to_string (xb_opcode_get_kind (op)));
return FALSE;
}
static gboolean
xb_silo_machine_func_text_cb (XbMachine *self,
XbStack *stack,
@@ -1156,9 +1222,16 @@ xb_silo_init (XbSilo *self)
g_mutex_init (&priv->nodes_mutex);
#ifdef HAVE_LIBSTEMMER
priv->stemmer_ctx = sb_stemmer_new ("en", NULL);
g_mutex_init (&priv->stemmer_mutex);
#endif
priv->machine = xb_machine_new ();
xb_machine_add_method (priv->machine, "attr", 1,
xb_silo_machine_func_attr_cb, self, NULL);
xb_machine_add_method (priv->machine, "stem", 1,
xb_silo_machine_func_stem_cb, self, NULL);
xb_machine_add_method (priv->machine, "text", 0,
xb_silo_machine_func_text_cb, self, NULL);
xb_machine_add_method (priv->machine, "first", 0,
@@ -1186,6 +1259,11 @@ xb_silo_finalize (GObject *obj)
g_mutex_clear (&priv->nodes_mutex);
#ifdef HAVE_LIBSTEMMER
sb_stemmer_delete (priv->stemmer_ctx);
g_mutex_clear (&priv->stemmer_mutex);
#endif
g_free (priv->guid);
g_string_free (priv->profile_str, TRUE);
g_object_unref (priv->machine);

0 comments on commit ac6e6a2

Please sign in to comment.