diff --git a/CMakeLists.txt b/CMakeLists.txt index 6979e74bf..64b0c53be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,6 +111,11 @@ else() install(DIRECTORY include TYPE INCLUDE) install(DIRECTORY ${CMAKE_BINARY_DIR}/include TYPE INCLUDE) install(FILES ${CMAKE_BINARY_DIR}/pocketsphinx.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) + + option(BUILD_GSTREAMER "Build GStreamer plugin" OFF) + if(BUILD_GSTREAMER) + add_subdirectory(gst) + endif() endif() # Can print this at the end, just to know what it was diff --git a/MANIFEST.in b/MANIFEST.in index 7465d6f71..414beadf8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -15,6 +15,7 @@ include setup.cfg include setup.py include sphinx_config.h.in recursive-include cython * +recursive-include gst * recursive-include docs * recursive-include doxygen * recursive-include examples * diff --git a/gst/CMakeLists.txt b/gst/CMakeLists.txt new file mode 100644 index 000000000..952484911 --- /dev/null +++ b/gst/CMakeLists.txt @@ -0,0 +1,21 @@ +find_package(PkgConfig REQUIRED) +pkg_check_modules(GOBJECT gobject-2.0 REQUIRED) +pkg_check_modules(GSTREAMER gstreamer-1.0 gstreamer-base-1.0) +add_library(gstpocketsphinx SHARED gstpocketsphinx.c) +set_property(TARGET pocketsphinx PROPERTY POSITION_INDEPENDENT_CODE on) +target_link_libraries(gstpocketsphinx PUBLIC + pocketsphinx + ${GSTREAMER_LIBRARIES} + ${GOBJECT_LIBRARIES} + ) +target_include_directories( + gstpocketsphinx PRIVATE ${CMAKE_BINARY_DIR} + gstpocketsphinx PRIVATE ${CMAKE_SOURCE_DIR}/src + gstpocketsphinx PUBLIC ${CMAKE_SOURCE_DIR}/include + gstpocketsphinx PUBLIC ${CMAKE_BINARY_DIR}/include + gstpocketsphinx INTERFACE ${CMAKE_SOURCE_DIR}/include + gstpocketsphinx INTERFACE ${CMAKE_BINARY_DIR}/include + gstpocketsphinx PUBLIC ${GSTREAMER_INCLUDE_DIRS} ${GOBJECT_INCLUDE_DIRS} + ) +message("Installing GStreamer plugin to ${CMAKE_INSTALL_FULL_LIBDIR}/gstreamer-1.0") +install(TARGETS gstpocketsphinx LIBRARY DESTINATION ${CMAKE_INSTALL_FULL_LIBDIR}/gstreamer-1.0) diff --git a/gst/gstpocketsphinx.c b/gst/gstpocketsphinx.c new file mode 100644 index 000000000..b3497f60e --- /dev/null +++ b/gst/gstpocketsphinx.c @@ -0,0 +1,812 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2014 Alpha Cephei Inc. + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + * Author: David Huggins-Daines + */ + +/** + * SECTION:element-pocketsphix + * + * The element runs the speech recohgnition on incomming audio buffers and + * generates an element messages named "pocketsphinx" + * for each hypothesis and one for the final result. The message's structure + * contains these fields: + * + * + * + * + * #GstClockTime + * "timestamp": + * the timestamp of the buffer that triggered the message. + * + * + * + * + * #gboolean + * "final": + * %FALSE for intermediate messages. + * + * + * + * + * #gin32 + * "confidence": + * posterior probability (confidence) of the result in log domain + * + * + * + * + * #gchar + * "hypothesis": + * the recognized text + * + * + * + * + * + * Example pipeline + * |[ + * gst-launch-1.0 -m autoaudiosrc ! audioconvert ! audioresample ! pocketsphinx ! fakesink + * ]| + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include +#include "util/strfuncs.h" +#include "util/ckd_alloc.h" + +#include "gstpocketsphinx.h" + +GST_DEBUG_CATEGORY_STATIC(pocketsphinx_debug); +#define GST_CAT_DEFAULT pocketsphinx_debug + + +static void +gst_pocketsphinx_set_property(GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec); + +static void +gst_pocketsphinx_get_property(GObject * object, guint prop_id, + GValue * value, GParamSpec * pspec); + +static GstStateChangeReturn +gst_pocketsphinx_change_state(GstElement *element, GstStateChange transition); + +static GstFlowReturn +gst_pocketsphinx_chain(GstPad * pad, GstObject *parent, GstBuffer * buffer); + +static gboolean +gst_pocketsphinx_event(GstPad *pad, GstObject *parent, GstEvent *event); + +static void +gst_pocketsphinx_finalize_utt(GstPocketSphinx *ps); + +static void +gst_pocketsphinx_finalize(GObject * gobject); + +enum +{ + PROP_0, + PROP_HMM_DIR, + PROP_LM_FILE, + PROP_LMCTL_FILE, + PROP_DICT_FILE, + PROP_MLLR_FILE, + PROP_FSG_FILE, + PROP_ALLPHONE_FILE, + PROP_KWS_FILE, + PROP_JSGF_FILE, + PROP_FWDFLAT, + PROP_BESTPATH, + PROP_MAXHMMPF, + PROP_MAXWPF, + PROP_BEAM, + PROP_WBEAM, + PROP_PBEAM, + PROP_DSRATIO, + + PROP_LATDIR, + PROP_LM_NAME, + PROP_DECODER +}; + +/* + * Static data. + */ + +static GstStaticPadTemplate sink_factory = + GST_STATIC_PAD_TEMPLATE("sink", + GST_PAD_SINK, + GST_PAD_ALWAYS, + GST_STATIC_CAPS("audio/x-raw, " + "format = (string) { S16LE }, " + "channels = (int) 1, " + "rate = (int) 16000") + ); + +static GstStaticPadTemplate src_factory = + GST_STATIC_PAD_TEMPLATE("src", + GST_PAD_SRC, + GST_PAD_ALWAYS, + GST_STATIC_CAPS("text/plain") + ); + +static void +wrap_ps_free(void *ps) +{ + (void)ps_free((ps_decoder_t *)ps); +} + +/* + * Boxing of ps_decoder_t. + */ +GType +ps_decoder_get_type(void) +{ + static GType ps_decoder_type = 0; + + if (G_UNLIKELY(ps_decoder_type == 0)) { + ps_decoder_type = g_boxed_type_register_static + ("PSDecoder", + /* Conveniently, these should just work. */ + (GBoxedCopyFunc) ps_retain, + (GBoxedFreeFunc) wrap_ps_free); + } + + return ps_decoder_type; +} + + +G_DEFINE_TYPE(GstPocketSphinx, gst_pocketsphinx, GST_TYPE_ELEMENT); + +static void +gst_pocketsphinx_class_init(GstPocketSphinxClass * klass) +{ + GObjectClass *gobject_class; + GstElementClass *element_class;; + + gobject_class =(GObjectClass *) klass; + element_class = (GstElementClass *)klass; + + gobject_class->set_property = gst_pocketsphinx_set_property; + gobject_class->get_property = gst_pocketsphinx_get_property; + gobject_class->finalize = gst_pocketsphinx_finalize; + + /* TODO: We will bridge cmd_ln.h properties to GObject + * properties here somehow eventually. */ + g_object_class_install_property + (gobject_class, PROP_HMM_DIR, + g_param_spec_string("hmm", "HMM Directory", + "Directory containing acoustic model parameters", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_LM_FILE, + g_param_spec_string("lm", "LM File", + "Language model file", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_LMCTL_FILE, + g_param_spec_string("lmctl", "LM Control File", + "Language model control file (for class LMs)", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_FSG_FILE, + g_param_spec_string("fsg", "FSG File", + "Finite state grammar file", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_ALLPHONE_FILE, + g_param_spec_string("allphone", "Allphone File", + "Phonetic language model file", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_KWS_FILE, + g_param_spec_string("kws", "Keyphrases File", + "List of keyphrases for spotting", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_JSGF_FILE, + g_param_spec_string("jsgf", "Grammer file", + "File with grammer in Java Speech Grammar Format (JSGF)", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_DICT_FILE, + g_param_spec_string("dict", "Dictionary File", + "Dictionary File", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_MLLR_FILE, + g_param_spec_string("mllr", "MLLR transformation file", + "Transformation to apply to means and variances", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_FWDFLAT, + g_param_spec_boolean("fwdflat", "Flat Lexicon Search", + "Enable Flat Lexicon Search", + FALSE, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_BESTPATH, + g_param_spec_boolean("bestpath", "Graph Search", + "Enable Graph Search", + FALSE, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_MAXHMMPF, + g_param_spec_int("maxhmmpf", "Maximum HMMs per frame", + "Maximum number of HMMs searched per frame", + 1, 100000, 1000, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_MAXWPF, + g_param_spec_int("maxwpf", "Maximum words per frame", + "Maximum number of words searched per frame", + 1, 100000, 10, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_BEAM, + g_param_spec_double("beam", "Beam width applied to every frame in Viterbi search", + "Beam width applied to every frame in Viterbi search", + -1, 1, 1e-48, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_PBEAM, + g_param_spec_double("pbeam", "Beam width applied to phone transitions", + "Beam width applied to phone transitions", + -1, 1, 1e-48, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_WBEAM, + g_param_spec_double("wbeam", "Beam width applied to word exits", + "Beam width applied to phone transitions", + -1, 1, 7e-29, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_DSRATIO, + g_param_spec_int("dsratio", "Frame downsampling ratio", + "Evaluate acoustic model every N frames", + 1, 10, 1, + G_PARAM_READWRITE)); + + /* Could be changed on runtime when ps is already initialized */ + g_object_class_install_property + (gobject_class, PROP_LM_NAME, + g_param_spec_string("lmname", "LM Name", + "Language model name (to select LMs from lmctl)", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_LATDIR, + g_param_spec_string("latdir", "Lattice Directory", + "Output Directory for Lattices", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_DECODER, + g_param_spec_boxed("decoder", "Decoder object", + "The underlying decoder", + PS_DECODER_TYPE, + G_PARAM_READABLE)); + + + GST_DEBUG_CATEGORY_INIT(pocketsphinx_debug, "pocketsphinx", 0, + "Automatic Speech Recognition"); + + + element_class->change_state = gst_pocketsphinx_change_state; + + gst_element_class_add_pad_template(element_class, + gst_static_pad_template_get(&sink_factory)); + gst_element_class_add_pad_template(element_class, + gst_static_pad_template_get(&src_factory)); + + gst_element_class_set_static_metadata(element_class, "PocketSphinx", "Filter/Audio", "Convert speech to text", "CMUSphinx-devel "); + +} + +static void +gst_pocketsphinx_set_string(GstPocketSphinx *ps, + const gchar *key, const GValue *value) +{ + if (value != NULL) { + ps_config_set_str(ps->config, key, g_value_get_string(value)); + } else { + ps_config_set_str(ps->config, key, NULL); + } +} + +static void +gst_pocketsphinx_set_int(GstPocketSphinx *ps, + const gchar *key, const GValue *value) +{ + ps_config_set_int(ps->config, key, g_value_get_int(value)); +} + +static void +gst_pocketsphinx_set_boolean(GstPocketSphinx *ps, + const gchar *key, const GValue *value) +{ + ps_config_set_bool(ps->config, key, g_value_get_boolean(value)); +} + +static void +gst_pocketsphinx_set_double(GstPocketSphinx *ps, + const gchar *key, const GValue *value) +{ + ps_config_set_float(ps->config, key, g_value_get_double(value)); +} + +static void +gst_pocketsphinx_set_property(GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + GstPocketSphinx *ps = GST_POCKETSPHINX(object); + + switch (prop_id) { + + case PROP_HMM_DIR: + gst_pocketsphinx_set_string(ps, "hmm", value); + break; + case PROP_LM_FILE: + /* FSG and LM are mutually exclusive. */ + gst_pocketsphinx_set_string(ps, "lm", value); + gst_pocketsphinx_set_string(ps, "lmctl", NULL); + gst_pocketsphinx_set_string(ps, "fsg", NULL); + gst_pocketsphinx_set_string(ps, "allphone", NULL); + gst_pocketsphinx_set_string(ps, "kws", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", NULL); + break; + case PROP_LMCTL_FILE: + /* FSG and LM are mutually exclusive. */ + gst_pocketsphinx_set_string(ps, "lm", NULL); + gst_pocketsphinx_set_string(ps, "lmctl", value); + gst_pocketsphinx_set_string(ps, "fsg", NULL); + gst_pocketsphinx_set_string(ps, "allphone", NULL); + gst_pocketsphinx_set_string(ps, "kws", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", NULL); + break; + case PROP_DICT_FILE: + gst_pocketsphinx_set_string(ps, "dict", value); + break; + case PROP_MLLR_FILE: + gst_pocketsphinx_set_string(ps, "mllr", value); + break; + case PROP_FSG_FILE: + /* FSG and LM are mutually exclusive */ + gst_pocketsphinx_set_string(ps, "lm", NULL); + gst_pocketsphinx_set_string(ps, "lmctl", NULL); + gst_pocketsphinx_set_string(ps, "fsg", value); + gst_pocketsphinx_set_string(ps, "allphone", NULL); + gst_pocketsphinx_set_string(ps, "kws", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", NULL); + break; + case PROP_ALLPHONE_FILE: + /* FSG and LM are mutually exclusive. */ + gst_pocketsphinx_set_string(ps, "lm", NULL); + gst_pocketsphinx_set_string(ps, "lmctl", NULL); + gst_pocketsphinx_set_string(ps, "fsg", NULL); + gst_pocketsphinx_set_string(ps, "allphone", value); + gst_pocketsphinx_set_string(ps, "kws", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", NULL); + break; + case PROP_KWS_FILE: + /* FSG and LM are mutually exclusive. */ + gst_pocketsphinx_set_string(ps, "lm", NULL); + gst_pocketsphinx_set_string(ps, "lmctl", NULL); + gst_pocketsphinx_set_string(ps, "fsg", NULL); + gst_pocketsphinx_set_string(ps, "allphone", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", NULL); + gst_pocketsphinx_set_string(ps, "kws", value); + break; + case PROP_JSGF_FILE: + /* FSG and LM are mutually exclusive. */ + gst_pocketsphinx_set_string(ps, "lm", NULL); + gst_pocketsphinx_set_string(ps, "lmctl", NULL); + gst_pocketsphinx_set_string(ps, "fsg", NULL); + gst_pocketsphinx_set_string(ps, "allphone", NULL); + gst_pocketsphinx_set_string(ps, "kws", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", value); + break; + case PROP_FWDFLAT: + gst_pocketsphinx_set_boolean(ps, "fwdflat", value); + break; + case PROP_BESTPATH: + gst_pocketsphinx_set_boolean(ps, "bestpath", value); + break; + case PROP_MAXHMMPF: + gst_pocketsphinx_set_int(ps, "maxhmmpf", value); + break; + case PROP_MAXWPF: + gst_pocketsphinx_set_int(ps, "maxwpf", value); + break; + case PROP_BEAM: + gst_pocketsphinx_set_double(ps, "beam", value); + break; + case PROP_PBEAM: + gst_pocketsphinx_set_double(ps, "pbeam", value); + break; + case PROP_WBEAM: + gst_pocketsphinx_set_double(ps, "wbeam", value); + break; + case PROP_DSRATIO: + gst_pocketsphinx_set_int(ps, "ds", value); + break; + + + case PROP_LATDIR: + if (ps->latdir) + g_free(ps->latdir); + ps->latdir = g_strdup(g_value_get_string(value)); + break; + case PROP_LM_NAME: + gst_pocketsphinx_set_string(ps, "fsg", NULL); + gst_pocketsphinx_set_string(ps, "lm", NULL); + gst_pocketsphinx_set_string(ps, "allphone", NULL); + gst_pocketsphinx_set_string(ps, "kws", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", NULL); + gst_pocketsphinx_set_string(ps, "lmname", value); + + /** + * Chances are that lmctl is already loaded and all + * corresponding searches are configured, so we simply + * try to set the search + */ + + if (value != NULL && ps->ps) { + ps_activate_search(ps->ps, g_value_get_string(value)); + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + return; + } + + /* If decoder was already initialized, reinit */ + if (ps->ps && prop_id != PROP_LATDIR && prop_id != PROP_LM_NAME) + ps_reinit(ps->ps, ps->config); +} + +static void +gst_pocketsphinx_get_property(GObject * object, guint prop_id, + GValue * value, GParamSpec * pspec) +{ + GstPocketSphinx *ps = GST_POCKETSPHINX(object); + + switch (prop_id) { + case PROP_DECODER: + g_value_set_boxed(value, ps->ps); + break; + case PROP_HMM_DIR: + g_value_set_string(value, ps_config_str(ps->config, "hmm")); + break; + case PROP_LM_FILE: + g_value_set_string(value, ps_config_str(ps->config, "lm")); + break; + case PROP_LMCTL_FILE: + g_value_set_string(value, ps_config_str(ps->config, "lmctl")); + break; + case PROP_LM_NAME: + g_value_set_string(value, ps_config_str(ps->config, "lmname")); + break; + case PROP_DICT_FILE: + g_value_set_string(value, ps_config_str(ps->config, "dict")); + break; + case PROP_MLLR_FILE: + g_value_set_string(value, ps_config_str(ps->config, "mllr")); + break; + case PROP_FSG_FILE: + g_value_set_string(value, ps_config_str(ps->config, "fsg")); + break; + case PROP_ALLPHONE_FILE: + g_value_set_string(value, ps_config_str(ps->config, "allphone")); + break; + case PROP_KWS_FILE: + g_value_set_string(value, ps_config_str(ps->config, "kws")); + break; + case PROP_JSGF_FILE: + g_value_set_string(value, ps_config_str(ps->config, "jsgf")); + break; + case PROP_FWDFLAT: + g_value_set_boolean(value, ps_config_bool(ps->config, "fwdflat")); + break; + case PROP_BESTPATH: + g_value_set_boolean(value, ps_config_bool(ps->config, "bestpath")); + break; + case PROP_LATDIR: + g_value_set_string(value, ps->latdir); + break; + case PROP_MAXHMMPF: + g_value_set_int(value, ps_config_int(ps->config, "maxhmmpf")); + break; + case PROP_MAXWPF: + g_value_set_int(value, ps_config_int(ps->config, "maxwpf")); + break; + case PROP_BEAM: + g_value_set_double(value, ps_config_float(ps->config, "beam")); + break; + case PROP_PBEAM: + g_value_set_double(value, ps_config_float(ps->config, "pbeam")); + break; + case PROP_WBEAM: + g_value_set_double(value, ps_config_float(ps->config, "wbeam")); + break; + case PROP_DSRATIO: + g_value_set_int(value, ps_config_int(ps->config, "ds")); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gst_pocketsphinx_finalize(GObject * gobject) +{ + GstPocketSphinx *ps = GST_POCKETSPHINX(gobject); + + ps_free(ps->ps); + ps_config_free(ps->config); + g_free(ps->last_result); + g_free(ps->latdir); + + G_OBJECT_CLASS(gst_pocketsphinx_parent_class)->finalize(gobject); +} + +static void +gst_pocketsphinx_init(GstPocketSphinx * ps) +{ + ps->sinkpad = + gst_pad_new_from_static_template(&sink_factory, "sink"); + ps->srcpad = + gst_pad_new_from_static_template(&src_factory, "src"); + ps->adapter = gst_adapter_new(); + + /* Parse default command-line options. */ + ps->config = ps_config_init(NULL); + ps_default_search_args(ps->config); + + /* Set up pads. */ + gst_element_add_pad(GST_ELEMENT(ps), ps->sinkpad); + gst_pad_set_chain_function(ps->sinkpad, gst_pocketsphinx_chain); + gst_pad_set_event_function(ps->sinkpad, gst_pocketsphinx_event); + gst_pad_use_fixed_caps(ps->sinkpad); + + gst_element_add_pad(GST_ELEMENT(ps), ps->srcpad); + gst_pad_use_fixed_caps(ps->srcpad); + + /* Initialize time. */ + ps->last_result_time = 0; + ps->last_result = NULL; +} + +static GstStateChangeReturn +gst_pocketsphinx_change_state(GstElement *element, GstStateChange transition) +{ + GstPocketSphinx *ps = GST_POCKETSPHINX(element); + + switch (transition) { + case GST_STATE_CHANGE_NULL_TO_READY: + ps->ps = ps_init(ps->config); + if (ps->ps == NULL) { + GST_ELEMENT_ERROR(GST_ELEMENT(ps), LIBRARY, INIT, + ("Failed to initialize PocketSphinx"), + ("Failed to initialize PocketSphinx")); + return GST_STATE_CHANGE_FAILURE; + } + ps->ep = ps_endpointer_init(0, 0.0, 0, + ps_config_int(ps->config, "samprate"), 0); + if (ps->ep == NULL) { + GST_ELEMENT_ERROR(GST_ELEMENT(ps), LIBRARY, INIT, + ("Failed to initialize PocketSphinx endpointer"), + ("Failed to initialize PocketSphinx endpointer")); + return GST_STATE_CHANGE_FAILURE; + } + ps->frame_size = ps_endpointer_frame_size(ps->ep) * 2; + break; + case GST_STATE_CHANGE_READY_TO_NULL: + ps_free(ps->ps); + ps->ps = NULL; + default: + break; + } + + return GST_ELEMENT_CLASS(gst_pocketsphinx_parent_class)->change_state(element, transition); +} + +static void +gst_pocketsphinx_post_message(GstPocketSphinx *ps, gboolean final, + GstClockTime timestamp, gint32 prob, const gchar *hyp) +{ + GstStructure *s = gst_structure_new ("pocketsphinx", + "timestamp", G_TYPE_UINT64, timestamp, + "final", G_TYPE_BOOLEAN, final, + "confidence", G_TYPE_LONG, prob, + "hypothesis", G_TYPE_STRING, hyp, NULL); + + gst_element_post_message (GST_ELEMENT (ps), gst_message_new_element (GST_OBJECT (ps), s)); +} + +static GstFlowReturn +gst_pocketsphinx_chain(GstPad * pad, GstObject *parent, GstBuffer * buffer) +{ + GstPocketSphinx *ps; + + (void)pad; + ps = GST_POCKETSPHINX(parent); + + gst_adapter_push(ps->adapter, buffer); + while (gst_adapter_available(ps->adapter) >= ps->frame_size) { + const guint *data = gst_adapter_map(ps->adapter, ps->frame_size); + int prev_in_speech = ps_endpointer_in_speech(ps->ep); + const int16 *speech = ps_endpointer_process(ps->ep, (int16 *)data); + if (speech != NULL) { + if (!prev_in_speech) + ps_start_utt(ps->ps); + ps_process_raw(ps->ps, + speech, ps->frame_size / 2, + FALSE, FALSE); + if (!ps_endpointer_in_speech(ps->ep)) { + gst_pocketsphinx_finalize_utt(ps); + } else if (ps->last_result_time == 0 + /* Get a partial result every now and then, see if it is different. */ + /* Check every 100 milliseconds. */ + || (GST_BUFFER_TIMESTAMP(buffer) - ps->last_result_time) > 100*10*1000) { + int32 score; + char const *hyp; + + hyp = ps_get_hyp(ps->ps, &score); + ps->last_result_time = GST_BUFFER_TIMESTAMP(buffer); + if (hyp && strlen(hyp) > 0) { + if (ps->last_result == NULL || 0 != strcmp(ps->last_result, hyp)) { + g_free(ps->last_result); + ps->last_result = g_strdup(hyp); + gst_pocketsphinx_post_message(ps, FALSE, ps->last_result_time, + ps_get_prob(ps->ps), hyp); + } + } + } + } + gst_adapter_unmap(ps->adapter); + gst_adapter_flush(ps->adapter, ps->frame_size); + } + return GST_FLOW_OK; +} + + +static void +gst_pocketsphinx_finalize_utt(GstPocketSphinx *ps) +{ + GstBuffer *buffer; + char const *hyp; + int32 score; + + hyp = NULL; + + ps_end_utt(ps->ps); + hyp = ps_get_hyp(ps->ps, &score); + + if (hyp) { + gst_pocketsphinx_post_message(ps, TRUE, GST_CLOCK_TIME_NONE, + ps_get_prob(ps->ps), hyp); + buffer = gst_buffer_new_and_alloc(strlen(hyp) + 1); + gst_buffer_fill(buffer, 0, hyp, strlen(hyp)); + gst_buffer_fill(buffer, strlen(hyp), "\n", 1); + gst_pad_push(ps->srcpad, buffer); + } + + if (ps->latdir) { + char *latfile; + char uttid[16]; + + sprintf(uttid, "%09u", ps->uttno); + ps->uttno++; + latfile = string_join(ps->latdir, "/", uttid, ".lat", NULL); + ps_lattice_t *dag; + if ((dag = ps_get_lattice(ps->ps))) + ps_lattice_write(dag, latfile); + ckd_free(latfile); + } +} + +static gboolean +gst_pocketsphinx_event(GstPad *pad, GstObject *parent, GstEvent *event) +{ + GstPocketSphinx *ps; + + ps = GST_POCKETSPHINX(parent); + + switch (event->type) { + case GST_EVENT_EOS: + { + gst_pocketsphinx_finalize_utt(ps); + return gst_pad_event_default(pad, parent, event); + } + default: + return gst_pad_event_default(pad, parent, event); + } +} + +static void +gst_pocketsphinx_log(void *user_data, err_lvl_t lvl, const char *fmt, ...) +{ + static const int gst_level[ERR_MAX] = {GST_LEVEL_DEBUG, GST_LEVEL_INFO, + GST_LEVEL_WARNING, GST_LEVEL_ERROR, GST_LEVEL_ERROR}; + + (void)user_data; + va_list ap; + va_start(ap, fmt); + gst_debug_log_valist(pocketsphinx_debug, gst_level[lvl], "", "", 0, NULL, fmt, ap); + va_end(ap); +} + + +static gboolean +plugin_init(GstPlugin * plugin) +{ + + err_set_callback(gst_pocketsphinx_log, NULL); + err_set_loglevel(ERR_INFO); + + if (!gst_element_register(plugin, "pocketsphinx", + GST_RANK_NONE, GST_TYPE_POCKETSPHINX)) + return FALSE; + return TRUE; +} + +#define PACKAGE PACKAGE_NAME +GST_PLUGIN_DEFINE(GST_VERSION_MAJOR, + GST_VERSION_MINOR, + pocketsphinx, + "PocketSphinx plugin", + plugin_init, PACKAGE_VERSION, + "BSD", + "PocketSphinx", "http://cmusphinx.sourceforge.net/") diff --git a/gst/gstpocketsphinx.h b/gst/gstpocketsphinx.h new file mode 100644 index 000000000..cb2a664ae --- /dev/null +++ b/gst/gstpocketsphinx.h @@ -0,0 +1,99 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + * Author: David Huggins-Daines + */ + +#ifndef __GST_POCKETSPHINX_H__ +#define __GST_POCKETSPHINX_H__ + +#include +#include +#include + +G_BEGIN_DECLS + +#define GST_TYPE_POCKETSPHINX \ + (gst_pocketsphinx_get_type()) +#define GST_POCKETSPHINX(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_POCKETSPHINX,GstPocketSphinx)) +#define GST_POCKETSPHINX_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_POCKETSPHINX,GstPocketSphinxClass)) +#define GST_IS_POCKETSPHINX(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_POCKETSPHINX)) +#define GST_IS_POCKETSPHINX_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_POCKETSPHINX)) + +typedef struct _GstPocketSphinx GstPocketSphinx; +typedef struct _GstPocketSphinxClass GstPocketSphinxClass; + +struct _GstPocketSphinx +{ + GstElement element; + GstAdapter *adapter; + GstPad *sinkpad, *srcpad; + + ps_decoder_t *ps; + ps_endpointer_t *ep; + ps_config_t *config; + + size_t frame_size; + + gchar *latdir; /**< Output directory for word lattices. */ + gint uttno; + + GstClockTime last_result_time; /**< Timestamp of last partial result. */ + char *last_result; /**< String of last partial result. */ +}; + +struct _GstPocketSphinxClass +{ + GstElementClass parent_class; + + void (*partial_result) (GstElement *element, const gchar *hyp_str); + void (*result) (GstElement *element, const gchar *hyp_str); +}; + +GType gst_pocketsphinx_get_type(void); + +/* + * Boxing of decoder. + */ +#define PS_DECODER_TYPE (ps_decoder_get_type()) +GType ps_decoder_get_type(void); + +G_END_DECLS + +#endif /* __GST_POCKETSPHINX_H__ */ diff --git a/gst/livedemo.c b/gst/livedemo.c new file mode 100644 index 000000000..44290f19d --- /dev/null +++ b/gst/livedemo.c @@ -0,0 +1,108 @@ +#include +#include + +static gboolean +bus_call(GstBus * bus, GstMessage * msg, gpointer data) +{ + GMainLoop *loop = (GMainLoop *) data; + + switch (GST_MESSAGE_TYPE(msg)) { + + case GST_MESSAGE_EOS: + g_print("End of stream\n"); + g_main_loop_quit(loop); + break; + + case GST_MESSAGE_ERROR:{ + gchar *debug; + GError *error; + + gst_message_parse_error(msg, &error, &debug); + g_free(debug); + + g_printerr("Error: %s\n", error->message); + g_error_free(error); + + g_main_loop_quit(loop); + break; + } + default: + break; + } + + const GstStructure *st = gst_message_get_structure(msg); + if (st && strcmp(gst_structure_get_name(st), "pocketsphinx") == 0) { + if (g_value_get_boolean(gst_structure_get_value(st, "final"))) + g_print("Got result %s\n", g_value_get_string(gst_structure_get_value(st, "hypothesis"))); + } + + return TRUE; +} + + +int +main(int argc, char *argv[]) +{ + GMainLoop *loop; + + GstElement *pipeline, *source, *decoder, *sink; + GstBus *bus; + guint bus_watch_id; + + /* Initialisation */ + gst_init(&argc, &argv); + + loop = g_main_loop_new(NULL, FALSE); + + /* Check input arguments */ + if (argc != 2) { + g_printerr("Usage: %s \n", argv[0]); + return -1; + } + + /* Create gstreamer elements */ + pipeline = gst_pipeline_new("pipeline"); + source = gst_element_factory_make("filesrc", "file-source"); + decoder = gst_element_factory_make("pocketsphinx", "asr"); + sink = gst_element_factory_make("fakesink", "output"); + + if (!pipeline || !source || !decoder || !sink) { + g_printerr("One element could not be created. Exiting.\n"); + return -1; + } + + /* Set up the pipeline */ + /* we set the input filename to the source element */ + g_object_set(G_OBJECT(source), "location", argv[1], NULL); + + g_object_set(G_OBJECT(decoder), "lmctl", "test.lmctl", NULL); + g_object_set(G_OBJECT(decoder), "lmname", "tidigits", NULL); + + /* we add a message handler */ + bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline)); + bus_watch_id = gst_bus_add_watch(bus, bus_call, loop); + gst_object_unref(bus); + + /* we add all elements into the pipeline */ + gst_bin_add_many(GST_BIN(pipeline), source, decoder, sink, NULL); + + /* we link the elements together */ + gst_element_link_many(source, decoder, sink, NULL); + + gst_element_set_state(pipeline, GST_STATE_PLAYING); + + /* Iterate */ + g_print("Running...\n"); + g_main_loop_run(loop); + + /* Out of the main loop, clean up nicely */ + g_print("Returned, stopping playback\n"); + gst_element_set_state(pipeline, GST_STATE_NULL); + + g_print("Deleting pipeline\n"); + gst_object_unref(GST_OBJECT(pipeline)); + g_source_remove(bus_watch_id); + g_main_loop_unref(loop); + + return 0; +} diff --git a/gst/livedemo.py b/gst/livedemo.py new file mode 100644 index 000000000..872800998 --- /dev/null +++ b/gst/livedemo.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python + +# Copyright (c) 2008 Carnegie Mellon University. +# +# You may modify and redistribute this file under the same terms as +# the CMU Sphinx system. See LICENSE for more information. + + +from gi import pygtkcompat +import gi + +gi.require_version('Gst', '1.0') +from gi.repository import GObject, Gst +GObject.threads_init() +Gst.init(None) + +gst = Gst + +print("Using pygtkcompat and Gst from gi") + +pygtkcompat.enable() +pygtkcompat.enable_gtk(version='3.0') + +import gtk + +class DemoApp(object): + """GStreamer/PocketSphinx Demo Application""" + def __init__(self): + """Initialize a DemoApp object""" + self.init_gui() + self.init_gst() + + def init_gui(self): + """Initialize the GUI components""" + self.window = gtk.Window() + self.window.connect("delete-event", gtk.main_quit) + self.window.set_default_size(400,200) + self.window.set_border_width(10) + vbox = gtk.VBox() + self.textbuf = gtk.TextBuffer() + self.text = gtk.TextView(buffer=self.textbuf) + self.text.set_wrap_mode(gtk.WRAP_WORD) + vbox.pack_start(self.text) + self.button = gtk.ToggleButton("Speak") + self.button.connect('clicked', self.button_clicked) + vbox.pack_start(self.button, False, False, 5) + self.window.add(vbox) + self.window.show_all() + + def init_gst(self): + """Initialize the speech components""" + self.pipeline = gst.parse_launch('autoaudiosrc ! audioconvert ! audioresample ' + '! pocketsphinx ! fakesink') +# '! pocketsphinx hmm=../model/en-us/en-us lm=../model/en-us/en-us.lm.bin dict=../model/en-us/cmudict-en-us.dict ! fakesink') + bus = self.pipeline.get_bus() + bus.add_signal_watch() + bus.connect('message::element', self.element_message) + + self.pipeline.set_state(gst.State.PAUSED) + + def element_message(self, bus, msg): + """Receive element messages from the bus.""" + msgtype = msg.get_structure().get_name() + if msgtype != 'pocketsphinx': + return + + if msg.get_structure().get_value('final'): + self.final_result(msg.get_structure().get_value('hypothesis'), msg.get_structure().get_value('confidence')) + self.pipeline.set_state(gst.State.PAUSED) + self.button.set_active(False) + elif msg.get_structure().get_value('hypothesis'): + self.partial_result(msg.get_structure().get_value('hypothesis')) + + def partial_result(self, hyp): + """Delete any previous selection, insert text and select it.""" + # All this stuff appears as one single action + self.textbuf.begin_user_action() + self.textbuf.delete_selection(True, self.text.get_editable()) + self.textbuf.insert_at_cursor(hyp) + ins = self.textbuf.get_insert() + iter = self.textbuf.get_iter_at_mark(ins) + iter.backward_chars(len(hyp)) + self.textbuf.move_mark(ins, iter) + self.textbuf.end_user_action() + + def final_result(self, hyp, confidence): + """Insert the final result.""" + # All this stuff appears as one single action + self.textbuf.begin_user_action() + self.textbuf.delete_selection(True, self.text.get_editable()) + self.textbuf.insert_at_cursor(hyp) + self.textbuf.end_user_action() + + def button_clicked(self, button): + """Handle button presses.""" + if button.get_active(): + button.set_label("Stop") + self.pipeline.set_state(gst.State.PLAYING) + else: + button.set_label("Speak") + self.pipeline.set_state(gst.State.PAUSED) + +app = DemoApp() +gtk.main()