Skip to content

Commit

Permalink
Merge pull request #305 from cmusphinx/53-jsgf-grammer-not-working-as…
Browse files Browse the repository at this point in the history
…-expected

Revert incorrect optimizations to JSGF compiler (fixes #53)
  • Loading branch information
dhdaines committed Sep 29, 2022
2 parents 613ada8 + 271a91e commit 8c7d785
Show file tree
Hide file tree
Showing 16 changed files with 553 additions and 219 deletions.
15 changes: 14 additions & 1 deletion programs/pocketsphinx_jsgf2fsg.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ static const arg_t defn[] = {
"no",
"Compute grammar closure to speedup loading"},

{ "loglevel",
ARG_STRING,
"WARN",
"Minimum level of log messages (DEBUG, INFO, WARN, ERROR)" },

{ NULL, 0, NULL, NULL }
};

Expand Down Expand Up @@ -131,7 +136,7 @@ main(int argc, char *argv[])
jsgf_t *jsgf;
fsg_model_t *fsg;
cmd_ln_t *config;
const char *rule;
const char *rule, *loglevel;

if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL) {
/* This probably just means that we got no arguments. */
Expand All @@ -144,6 +149,14 @@ main(int argc, char *argv[])
usagemsg(argv[0]);
}

loglevel = ps_config_str(config, "loglevel");
if (loglevel) {
if (err_set_loglevel_str(loglevel) == NULL) {
E_ERROR("Invalid log level: %s\n", loglevel);
return -1;
}
}

jsgf = jsgf_parse_file(ps_config_str(config, "jsgf"), NULL);
if (jsgf == NULL) {
return 1;
Expand Down
123 changes: 102 additions & 21 deletions src/lm/fsg_model.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,26 +46,6 @@

#include "lm/fsg_model.h"

/**
* Adjacency list (opaque) for a state in an FSG.
*
* Actually we use hash tables so that random access is a bit faster.
* Plus it allows us to make the lookup code a bit less ugly.
*/

struct trans_list_s {
hash_table_t *null_trans; /* Null transitions keyed by state. */
hash_table_t *trans; /* Lists of non-null transitions keyed by state. */
};

/**
* Implementation of arc iterator.
*/
struct fsg_arciter_s {
hash_iter_t *itor, *null_itor;
gnode_t *gn;
};

#define FSG_MODEL_BEGIN_DECL "FSG_BEGIN"
#define FSG_MODEL_END_DECL "FSG_END"
#define FSG_MODEL_N_DECL "N"
Expand Down Expand Up @@ -713,7 +693,11 @@ fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw)
}
hash_table_free(vocab);

/* Do transitive closure on null transitions */
/* Do transitive closure on null transitions. FIXME: This is
* actually quite inefficient as it *creates* a lot of new links
* as opposed to just *calculating* the epsilon-closure for each
* state. Ideally we would epsilon-remove or determinize the FSG
* (but note that tag transitions are not really epsilons...) */
nulls = fsg_model_null_trans_closure(fsg, nulls);
glist_free(nulls);

Expand Down Expand Up @@ -938,3 +922,100 @@ fsg_model_writefile_symtab(fsg_model_t * fsg, char const *file)

fclose(fp);
}

static void
apply_closure(fsg_model_t *fsg, bitvec_t *active)
{
int state;

/* This is a bit slow, sorry. */
for (state = 0; state < fsg_model_n_state(fsg); ++state) {
hash_table_t *null_trans;
hash_iter_t *itor;

if (!bitvec_is_set(active, state))
continue;
null_trans = fsg->trans[state].null_trans;
if (null_trans == NULL)
continue;
/* We assume closure has already been done, so no need to
* continue following epsilons. */
for (itor = hash_table_iter(null_trans);
itor != NULL; itor = hash_table_iter_next(itor)) {
fsg_link_t *link = (fsg_link_t *)hash_entry_val(itor->ent);
bitvec_set(active, link->to_state);
E_INFO("epsilon %d -> %d\n", state, link->to_state);
}
}
}

int
fsg_model_accept(fsg_model_t *fsg, char const *words)
{
char *ptr, *mutable_words, *word, delimfound;
bitvec_t *active, *next;
int n, found = 0;

if (fsg == NULL || words == NULL)
return 0;

active = bitvec_alloc(fsg_model_n_state(fsg));
next = bitvec_alloc(fsg_model_n_state(fsg));
bitvec_set(active, fsg_model_start_state(fsg));

/* For each input word */
ptr = mutable_words = ckd_salloc(words);
while ((n = nextword(ptr, " \t\r\n\v\f",
&word, &delimfound)) >= 0) {
int wid = fsg_model_word_id(fsg, word);
int state;
bitvec_t *tmp;

E_INFO("word: %s\n", word);
/* Expand using previously calculated closure. */
apply_closure(fsg, active);

/* Consume the current word, following all non-epsilon
* transitions possible. */
if (wid < 0) {
/* Immediate fail */
E_INFO("word %s not found!\n", word);
goto done;
}
/* Again, my apologies, this is a bit slow. */
for (state = 0; state < fsg_model_n_state(fsg); ++state) {
fsg_arciter_t *itor;
if (!bitvec_is_set(active, state))
continue;
for (itor = fsg_model_arcs(fsg, state);
itor != NULL; itor = fsg_arciter_next(itor)) {
fsg_link_t *link = fsg_arciter_get(itor);
/* Ignore epsilons, we already did them. */
if (link->wid == wid) {
bitvec_set(next, link->to_state);
E_INFO("%s %d -> %d\n",
word, state, link->to_state);
}
}
}

/* Update active list. */
tmp = active;
active = next;
next = tmp;
bitvec_clear_all(next, fsg_model_n_state(fsg));

word[n] = delimfound;
ptr = word + n;
}
/* Did we reach the final state? First expand any epsilons, then
* we'll find out! */
apply_closure(fsg, active);
found = bitvec_is_set(active, fsg_model_final_state(fsg));

done:
bitvec_free(active);
bitvec_free(next);
ckd_free(mutable_words);
return found != 0;
}
25 changes: 22 additions & 3 deletions src/lm/fsg_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,15 @@ typedef struct fsg_link_s {

/**
* @struct trans_list_t
* @brief Adjacency list (opaque) for a state in an FSG.
* @brief Adjacency list for a state in an FSG.
*
* Actually we use hash tables so that random access is a bit faster.
* Plus it allows us to make the lookup code a bit less ugly.
*/
typedef struct trans_list_s trans_list_t;
typedef struct trans_list_s {
hash_table_t *null_trans; /* Null transitions keyed by state. */
hash_table_t *trans; /* Lists of non-null transitions keyed by state. */
} trans_list_t;

/**
* @struct fsg_model_t
Expand Down Expand Up @@ -129,8 +135,12 @@ typedef struct fsg_model_s {

/**
* Iterator over arcs.
* Implementation of arc iterator.
*/
typedef struct fsg_arciter_s fsg_arciter_t;
typedef struct fsg_arciter_s {
hash_iter_t *itor, *null_itor;
gnode_t *gn;
} fsg_arciter_t;

/**
* Have silence transitions been added?
Expand Down Expand Up @@ -341,6 +351,15 @@ void fsg_model_write_symtab(fsg_model_t *fsg, FILE *file);
POCKETSPHINX_EXPORT
void fsg_model_writefile_symtab(fsg_model_t *fsg, char const *file);

/**
* Check that an FSG accepts a word sequence
*
* @param words Whitespace-separated word sequence
* @return 1 if accept, 0 if not accept, -1 if "U.D.O."
*/
POCKETSPHINX_EXPORT
int fsg_model_accept(fsg_model_t *fsg, char const *words);

#ifdef __cplusplus
}
#endif
Expand Down
Loading

0 comments on commit 8c7d785

Please sign in to comment.