Skip to content

Commit

Permalink
Say goodbye to utterance ids in the API, redundant thing
Browse files Browse the repository at this point in the history
git-svn-id: svn+ssh://svn.code.sf.net/p/cmusphinx/code/trunk/pocketsphinx@12805 94700074-3cef-4d97-a70e-9c8c206c02f5
  • Loading branch information
nshmyrev committed Jan 22, 2015
1 parent 87a4f04 commit b638fab
Show file tree
Hide file tree
Showing 18 changed files with 117 additions and 165 deletions.
28 changes: 5 additions & 23 deletions include/pocketsphinx.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,26 +290,23 @@ char *ps_lookup_word(ps_decoder_t *ps,
*
* @param ps Decoder.
* @param rawfh Previously opened file stream.
* @param uttid Utterance ID (or NULL to generate automatically).
* @param maxsamps Maximum number of samples to read from rawfh, or -1
* to read until end-of-file.
* @return Number of samples of audio.
*/
POCKETSPHINX_EXPORT
long ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
char const *uttid, long maxsamps);
long maxsamps);

/**
* Decode a senone score dump file.
*
* @param ps Decoder
* @param fh Previously opened file handle positioned at start of file.
* @param uttid Utterance ID (or NULL to generate automatically).
* @return Number of frames read.
*/
POCKETSPHINX_EXPORT
int ps_decode_senscr(ps_decoder_t *ps, FILE *senfh,
char const *uttid);
int ps_decode_senscr(ps_decoder_t *ps, FILE *senfh);

/**
* Start processing of the stream of speech. Channel parameters like
Expand All @@ -329,22 +326,10 @@ int ps_start_stream(ps_decoder_t *ps);
* reinitializes internal data structures.
*
* @param ps Decoder to be started.
* @param uttid String uniquely identifying this utterance. If NULL,
* one will be created.
* @return 0 for success, <0 on error.
*/
POCKETSPHINX_EXPORT
int ps_start_utt(ps_decoder_t *ps, char const *uttid);

/**
* Get current utterance ID.
*
* @param ps Decoder to query.
* @return Read-only string of the current utterance ID. This is
* valid only until the beginning of the next utterance.
*/
POCKETSPHINX_EXPORT
char const *ps_get_uttid(ps_decoder_t *ps);
int ps_start_utt(ps_decoder_t *ps);

/**
* Decode raw audio data.
Expand Down Expand Up @@ -416,13 +401,11 @@ int ps_end_utt(ps_decoder_t *ps);
*
* @param ps Decoder.
* @param out_best_score Output: path score corresponding to returned string.
* @param out_uttid Output: utterance ID for this utterance.
* @return String containing best hypothesis at this point in
* decoding. NULL if no hypothesis is available.
*/
POCKETSPHINX_EXPORT
char const *ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score,
char const **out_uttid);
char const *ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score);

/**
* Get hypothesis string and final flag.
Expand All @@ -446,11 +429,10 @@ char const *ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final);
* restrictions being lifted in future versions.
*
* @param ps Decoder.
* @param out_uttid Output: utterance ID for this utterance.
* @return Posterior probability of the best hypothesis.
*/
POCKETSPHINX_EXPORT
int32 ps_get_prob(ps_decoder_t *ps, char const **out_uttid);
int32 ps_get_prob(ps_decoder_t *ps);

/**
* Get word lattice.
Expand Down
22 changes: 12 additions & 10 deletions src/gst-plugin/gstpocketsphinx.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ enum
/* Default command line. (will go away soon and be constructed using properties) */
static char *default_argv[] = {
"gst-pocketsphinx",
"-samprate", "8000",
};
static const int default_argc = sizeof(default_argv)/sizeof(default_argv[0]);

Expand All @@ -117,7 +116,7 @@ static GstStaticPadTemplate sink_factory =
GST_STATIC_CAPS("audio/x-raw, "
"format = (string) { S16LE }, "
"channels = (int) 1, "
"rate = (int) 8000")
"rate = (int) 16000")
);

static GstStaticPadTemplate src_factory =
Expand Down Expand Up @@ -558,7 +557,7 @@ gst_pocketsphinx_chain(GstPad * pad, GstObject *parent, GstBuffer * buffer)
if (!ps->listening_started) {
ps->listening_started = TRUE;
ps->utt_started = FALSE;
ps_start_utt(ps->ps, NULL);
ps_start_utt(ps->ps);
}

gst_buffer_map (buffer, &info, GST_MAP_READ);
Expand All @@ -580,17 +579,16 @@ gst_pocketsphinx_chain(GstPad * pad, GstObject *parent, GstBuffer * buffer)
|| (GST_BUFFER_TIMESTAMP(buffer) - ps->last_result_time) > 100*10*1000) {
int32 score;
char const *hyp;
char const *uttid;

hyp = ps_get_hyp(ps->ps, &score, &uttid);
hyp = ps_get_hyp(ps->ps, &score);
ps->last_result_time = GST_BUFFER_TIMESTAMP(buffer);
if (hyp && strlen(hyp) > 0) {
if (ps->last_result == NULL || 0 != strcmp(ps->last_result, hyp)) {
g_free(ps->last_result);
ps->last_result = g_strdup(hyp);
/* Emit a signal for applications. */
g_signal_emit(ps, gst_pocketsphinx_signals[SIGNAL_PARTIAL_RESULT],
0, hyp, uttid);
0, hyp);
}
}
}
Expand All @@ -605,7 +603,6 @@ gst_pocketsphinx_finalize_utt(GstPocketSphinx *ps)
{
GstBuffer *buffer;
char const *hyp;
char const *uttid;
int32 score;

hyp = NULL;
Expand All @@ -614,19 +611,24 @@ gst_pocketsphinx_finalize_utt(GstPocketSphinx *ps)

ps_end_utt(ps->ps);
ps->listening_started = FALSE;
hyp = ps_get_hyp(ps->ps, &score, &uttid);
hyp = ps_get_hyp(ps->ps, &score);

/* Dump the lattice if requested. */
if (ps->latdir) {
char *latfile = string_join(ps->latdir, "/", uttid, ".lat", NULL);
char *latfile;
char uttid[16];

sprintf(uttid, "%09u", ps->uttno);
ps->uttno++;
latfile = string_join(ps->latdir, "/", uttid, ".lat", NULL);
ps_lattice_t *dag;
if ((dag = ps_get_lattice(ps->ps)))
ps_lattice_write(dag, latfile);
ckd_free(latfile);
}
if (hyp) {
g_signal_emit(ps, gst_pocketsphinx_signals[SIGNAL_RESULT],
0, hyp, uttid);
0, hyp);
buffer = gst_buffer_new_and_alloc(strlen(hyp) + 1);
gst_buffer_fill(buffer, 0, hyp, strlen(hyp));
gst_buffer_fill(buffer, strlen(hyp), "\n", 1);
Expand Down
1 change: 1 addition & 0 deletions src/gst-plugin/gstpocketsphinx.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ struct _GstPocketSphinx

gboolean utt_started;
gboolean listening_started;
gint uttno;

GstClockTime last_result_time; /**< Timestamp of last partial result. */
char *last_result; /**< String of last partial result. */
Expand Down
54 changes: 18 additions & 36 deletions src/libpocketsphinx/pocketsphinx.c
Original file line number Diff line number Diff line change
Expand Up @@ -410,17 +410,10 @@ ps_free(ps_decoder_t *ps)
acmod_free(ps->acmod);
logmath_free(ps->lmath);
cmd_ln_free_r(ps->config);
ckd_free(ps->uttid);
ckd_free(ps);
return 0;
}

char const *
ps_get_uttid(ps_decoder_t *ps)
{
return ps->uttid;
}

cmd_ln_t *
ps_get_config(ps_decoder_t *ps)
{
Expand Down Expand Up @@ -861,13 +854,13 @@ ps_lookup_word(ps_decoder_t *ps, const char *word)

long
ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
char const *uttid, long maxsamps)
long maxsamps)
{
int16 *data;
long total, pos, endpos;

ps_start_stream(ps);
ps_start_utt(ps, uttid);
ps_start_utt(ps);

/* If this file is seekable or maxsamps is specified, then decode
* the whole thing at once. */
Expand Down Expand Up @@ -910,9 +903,11 @@ ps_start_stream(ps_decoder_t *ps)
}

int
ps_start_utt(ps_decoder_t *ps, char const *uttid)
ps_start_utt(ps_decoder_t *ps)
{
int rv;
char uttid[16];

if (ps->search == NULL) {
E_ERROR("No search module is selected, did you forget to "
"specify a language model or grammar?\n");
Expand All @@ -922,17 +917,9 @@ ps_start_utt(ps_decoder_t *ps, char const *uttid)
ptmr_reset(&ps->perf);
ptmr_start(&ps->perf);

if (uttid) {
ckd_free(ps->uttid);
ps->uttid = ckd_salloc(uttid);
}
else {
char nuttid[16];
ckd_free(ps->uttid);
sprintf(nuttid, "%09u", ps->uttno);
ps->uttid = ckd_salloc(nuttid);
++ps->uttno;
}
sprintf(uttid, "%09u", ps->uttno);
++ps->uttno;

/* Remove any residual word lattice and hypothesis. */
ps_lattice_free(ps->search->dag);
ps->search->dag = NULL;
Expand All @@ -947,7 +934,7 @@ ps_start_utt(ps_decoder_t *ps, char const *uttid)
/* Start logging features and audio if requested. */
if (ps->mfclogdir) {
char *logfn = string_join(ps->mfclogdir, "/",
ps->uttid, ".mfc", NULL);
uttid, ".mfc", NULL);
FILE *mfcfh;
E_INFO("Writing MFCC log file: %s\n", logfn);
if ((mfcfh = fopen(logfn, "wb")) == NULL) {
Expand All @@ -960,7 +947,7 @@ ps_start_utt(ps_decoder_t *ps, char const *uttid)
}
if (ps->rawlogdir) {
char *logfn = string_join(ps->rawlogdir, "/",
ps->uttid, ".raw", NULL);
uttid, ".raw", NULL);
FILE *rawfh;
E_INFO("Writing raw audio log file: %s\n", logfn);
if ((rawfh = fopen(logfn, "wb")) == NULL) {
Expand All @@ -973,7 +960,7 @@ ps_start_utt(ps_decoder_t *ps, char const *uttid)
}
if (ps->senlogdir) {
char *logfn = string_join(ps->senlogdir, "/",
ps->uttid, ".sen", NULL);
uttid, ".sen", NULL);
FILE *senfh;
E_INFO("Writing senone score log file: %s\n", logfn);
if ((senfh = fopen(logfn, "wb")) == NULL) {
Expand Down Expand Up @@ -1015,12 +1002,11 @@ ps_search_forward(ps_decoder_t *ps)
}

int
ps_decode_senscr(ps_decoder_t *ps, FILE *senfh,
char const *uttid)
ps_decode_senscr(ps_decoder_t *ps, FILE *senfh)
{
int nfr, n_searchfr;

ps_start_utt(ps, uttid);
ps_start_utt(ps);
n_searchfr = 0;
acmod_set_insenfh(ps->acmod, senfh);
while ((nfr = acmod_read_scores(ps->acmod)) > 0) {
Expand Down Expand Up @@ -1137,14 +1123,14 @@ ps_end_utt(ps_decoder_t *ps)

/* Log a backtrace if requested. */
if (cmd_ln_boolean_r(ps->config, "-backtrace")) {
char const *uttid, *hyp;
const char* hyp;
ps_seg_t *seg;
int32 score;

hyp = ps_get_hyp(ps, &score, &uttid);
hyp = ps_get_hyp(ps, &score);

if (hyp != NULL) {
E_INFO("%s: %s (%d)\n", uttid, hyp, score);
E_INFO("%s (%d)\n", hyp, score);
E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
"word", "start", "end", "pprob", "ascr", "lscr", "lback");
for (seg = ps_seg_iter(ps, &score); seg;
Expand All @@ -1166,14 +1152,12 @@ ps_end_utt(ps_decoder_t *ps)
}

char const *
ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score, char const **out_uttid)
ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score)
{
char const *hyp;

ptmr_start(&ps->perf);
hyp = ps_search_hyp(ps->search, out_best_score, NULL);
if (out_uttid)
*out_uttid = ps->uttid;
ptmr_stop(&ps->perf);
return hyp;
}
Expand All @@ -1191,14 +1175,12 @@ ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final)


int32
ps_get_prob(ps_decoder_t *ps, char const **out_uttid)
ps_get_prob(ps_decoder_t *ps)
{
int32 prob;

ptmr_start(&ps->perf);
prob = ps_search_prob(ps->search);
if (out_uttid)
*out_uttid = ps->uttid;
ptmr_stop(&ps->perf);
return prob;
}
Expand Down
1 change: 0 additions & 1 deletion src/libpocketsphinx/pocketsphinx_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ struct ps_decoder_s {

/* Utterance-processing related stuff. */
uint32 uttno; /**< Utterance counter. */
char *uttid; /**< Utterance ID for current utterance. */
ptmr_t perf; /**< Performance counter for all of decoding. */
uint32 n_frame; /**< Total number of frames processed. */
char const *mfclogdir; /**< Log directory for MFCC files. */
Expand Down
2 changes: 0 additions & 2 deletions src/libpocketsphinx/ps_lattice.c
Original file line number Diff line number Diff line change
Expand Up @@ -1446,15 +1446,13 @@ int32
ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset,
float32 ascale)
{
ps_search_t *search;
logmath_t *lmath;
ps_latnode_t *node;
ps_latlink_t *link;
latlink_list_t *x;
ps_latlink_t *bestend;
int32 bestescr;

search = dag->search;
lmath = dag->lmath;

/* Reset all betas to zero. */
Expand Down
Loading

0 comments on commit b638fab

Please sign in to comment.