Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Initial implementation of discrete HMM recognizer.

This implementation does not do any pruning.  It still manages to
do 21,923 recognitions in 137.958s (about 6.3ms each).
  • Loading branch information...
commit bd0f2fa72be8de0f1e1fa1388b6b779b6dca7786 1 parent 8af5397
@cscott authored
Showing with 248 additions and 23 deletions.
  1. +13 −0 Makefile
  2. +186 −5 hmm.js
  3. +49 −18 recog.js
View
13 Makefile
@@ -142,6 +142,7 @@ accuracy: $(foreach n,$(ALLSTEPS),hmm$(n)/accuracy.txt)
qual: $(foreach n,$(ALLSTEPS),hmm$(n)/accuracy-qual.txt)
final-accuracy: hmm$(FINALSTEP)/accuracy.txt hmm$(FINALSTEP)/accuracy-qual.txt
json: $(JSONOUT)
+ echo $(JSONOUT) up to date.
# ta-da
$(JSONOUT): hmm$(FINALSTEP)/hmmdefs
@@ -159,6 +160,18 @@ $(JSONOUT): hmm$(FINALSTEP)/hmmdefs
HResults -p -I parm/all2.mlf parm/words $*/recout-qual.mlf | \
tee $@ | head -7
+# evaluate javascript implementation of recognizer
+js-recout.mlf: $(JSONOUT) parm/train.scr
+ ./recog.js -A -o $@ -S parm/train.scr $(JSONOUT)
+js-recout-qual.mlf: $(JSONOUT) parm/qual.scr
+ ./recog.js -A -o $@ -S parm/qual.scr $(JSONOUT)
+js-accuracy.txt: js-recout.mlf parm/all2.mlf parm/words
+ HResults -p -I parm/all2.mlf parm/words js-recout.mlf | \
+ tee $@ | head -7
+js-accuracy-qual.txt: js-recout-qual.mlf parm/all2.mlf parm/words
+ HResults -p -I parm/all2.mlf parm/words js-recout-qual.mlf | \
+ tee $@ | head -7
+
very-clean: clean
$(RM) -rf html parm codebook htk-config
clean:
View
191 hmm.js
@@ -1,16 +1,197 @@
if (typeof define !== 'function') {
var define = require('amdefine')(module);
}
-define([], function() {
+define(['./features'], function(Features) {
+ var tolog = function(x) {
+ return -Math.log(x)*2371.8;
+ };
+ var fromlog = function(x) {
+ return Math.exp(-x/2371.8);
+ };
- var make_recog = function(hmmdef) {
- return function(data_set) {
- // XXX IMPLEMENT ME XXX
- return "A1";
+ var Token = function(model, state) {
+ this.model = model;
+ this.state = state;
+ };
+
+ var omerge = function() {
+ var r = arguments.length ? arguments[0] : {}, o;
+ for (var i=1; i<arguments.length; i++) {
+ o = arguments[i];
+ for (name in o) {
+ if (o.hasOwnProperty(name)) {
+ r[name] = o[name];
+ }
+ }
+ }
+ return r;
+ };
+
+ var extract_models = function(hmmdef, mkmodel, process_codebook) {
+ var globals = {};
+ var models = [];
+
+ for (var i=0; i<hmmdef.length; i++) {
+ switch(hmmdef[i].type) {
+ case '<codebook>':
+ process_codebook(hmmdef[i].value);
+ break;
+ case '~o':
+ globals = omerge(globals, hmmdef[i].value);
+ break;
+ case '~h':
+ models.push(mkmodel(hmmdef[i].name, globals, hmmdef[i].value));
+ break;
+ default:
+ // ignore other definitions for now.
+ // XXX in the future we might want to expand macro references
+ break;
+ }
+ }
+ return models;
+ };
+
+ var make_discrete_recog = function(hmmdef) {
+ var vq_features;
+ var process_codebook = function(codebook) {
+ vq_features = Features.make_vq(codebook);
+ };
+ var expand_weightlist = function(a) {
+ var r = [];
+ for (var i=0; i<a.length; i++) {
+ for (var j=0; j<a[i][1]; j++) {
+ r.push(a[i][0]);
+ }
+ }
+ return r;
+ };
+ var mkmodel = function(name, globals, def) {
+ var states = [], i, j;
+ // process output probabilities
+ states.push({ id: 0, start: true, pred: [] }); /* entry state */
+ for (i=2; i < def.NumStates; i++) {
+ states.push({
+ id: states.length,
+ output: def.States[i].Streams.map(function(d) {
+ return expand_weightlist(d.DProb);
+ }),
+ // XXX we ignore stream weights
+ weights: def.States[i].SWeights,
+ pred: []
+ });
+ def.States[i].NumMixes.forEach(function(len, j) {
+ console.assert(states[i-1].output[j].length===len);
+ });
+ }
+ states.push({ id: states.length, pred: [] }); /* exit state */
+ // process transition matrix
+ console.assert(def.TransP.type==='square');
+ console.assert(def.TransP.rows===def.NumStates);
+ for (i=0; i < def.NumStates-1; i++) { /* from state */
+ for (j=0; j < def.NumStates; j++) { /* to state */
+ var aij = def.TransP.entries[(i*def.NumStates)+j];
+ if (aij > 0)
+ states[j].pred.push([states[i], tolog(aij)]);
+ }
+ }
+ return { name: name, states: states };
};
+ var models = extract_models(hmmdef, mkmodel, process_codebook);
+ console.assert(models.length);
+
+ var make_maxp = function(input) {
+ var phi = function(phi, state, t) {
+ var j;
+ if (state.start) {
+ return (t===0) ? 0 : Infinity; /* base case */
+ }
+ if (t===0) return Infinity;
+
+ // compute probability of emitting signal o_t in this state
+ var o_t = input[t-1];
+ var b_j = 0;
+ for (j = 0; j<o_t.length; j++) {
+ /* XXX ignoring stream weights here */
+ b_j += state.output[j][o_t[j]];
+ }
+
+ // maximized prob of reaching this state
+ // (log probs are negated, so max prob == Math.min)
+ console.assert(state.pred.length);
+ var bestp = phi(phi, state.pred[0][0], t-1) + state.pred[0][1];
+ for (j = 1; j < state.pred.length; j++) {
+ var p = phi(phi, state.pred[j][0], t-1) + state.pred[j][1];
+ if (p < bestp) { bestp = p; }
+ }
+ return bestp + b_j;
+ };
+ var phiN = function(phi, pred_state, aiN) {
+ /*
+ console.log('-- phi_N('+input.length+')',
+ phi(phi, pred_state, input.length),
+ '+', aiN);
+ */
+ return phi(phi, pred_state, input.length) + aiN;
+ };
+ var maxp = function(model) {
+ //console.log("Considering "+model.name);
+
+ // need to memoize the computation of phi
+ var memo_table = model.states.map(function(){ return [] });
+ var memoized_phi = function(_, state, t) {
+ if (!(t in memo_table[state.id])) {
+ memo_table[state.id][t] = phi(memoized_phi, state, t);
+ /*
+ console.log('phi_'+state.id+'('+t+')',
+ memo_table[state.id][t]);
+ */
+ }
+ return memo_table[state.id][t];
+ };
+
+ // our log probs are negated, so max prob == min phi
+ // (negate result so that 'maxp' makes sense to caller)
+ var pred = model.states[model.states.length-1].pred;
+ console.assert(pred.length > 0);
+
+ var bestp = phiN(memoized_phi, pred[0][0], pred[0][1]);
+ for (var j=1; j<pred.length; j++) {
+ var p = phiN(memoized_phi, pred[j][0], pred[j][1]);
+ if (p < bestp) bestp = p;
+ }
+ return -bestp;
+ };
+ return maxp;
+ };
+
+ return function(data_set) {
+ vq_features(data_set);
+ if (false) return ["A1", 0]; // DEBUGGING: time VQ in isolation
+
+ var maxp = make_maxp(data_set.vq);
+
+ var best=0, bestp = maxp(models[0]), p;
+ for (var i=1; i<models.length; i++) {
+ p = maxp(models[i]);
+ if (p > bestp) {
+ best = i;
+ bestp = p;
+ }
+ }
+ return [models[best].name, bestp];
+ }
+ }
+
+ var make_recog = function(hmmdef) {
+ // XXX handle other types of HMM
+ return make_discrete_recog(hmmdef);
};
return {
+ // utility functions
+ tolog: tolog,
+ fromlog: fromlog,
+ // main recognizer
make_recog: make_recog
};
});
View
67 recog.js
@@ -10,13 +10,15 @@ requirejs(['commander', 'fs', 'q', './features', './hmm', './version'], function
.option('-o, --output <outfile>',
'Output to the specified file (default stdout)',
null)
+ .option('-A, --strip_allograph', "Strip allograph suffix from result")
.option('-S, --script <script file>',
'File with additional command-line arguments',
null)
+ .option('-T, --time', "Don't emit output, just time the recognition.")
.parse(process.argv);
if (program.script) {
- var extra = fs.readFileSync(program.script, 'utf-8').split(/\s+/);
+ var extra= fs.readFileSync(program.script, 'utf-8').trim().split(/\s+/);
program.args.push.apply(program.args, extra);
}
if (program.args.length===0) {
@@ -25,14 +27,14 @@ requirejs(['commander', 'fs', 'q', './features', './hmm', './version'], function
}
var output = process.stdout;
if (program.output) {
- output = fs.createWriteStream(program.output, { encoding: 'utf-8' });
+ if (program.time) {
+ console.error("Timing recognition; skipping output.");
+ } else {
+ output=fs.createWriteStream(program.output, { encoding: 'utf-8' });
+ }
}
var hmmdef = JSON.parse(fs.readFileSync(program.args.shift(), 'utf-8'));
- var vq_features = null;
- if (hmmdef[0].type==='<codebook>') {
- vq_features = Features.make_vq(hmmdef[0].value);
- }
var recognizer = HMM.make_recog(hmmdef);
var readHTK = function(filename) {
@@ -55,20 +57,49 @@ requirejs(['commander', 'fs', 'q', './features', './hmm', './version'], function
};
// read the rest of the files.
- output.write('#!MLF!#\n');
- program.args.forEach(function(filename) {
- output.write(JSON.stringify(filename)+'\n');
+ var do_read = function(filename) {
// read HTK file.
- var data_set = readHTK(filename);
+ return [filename, readHTK(filename)];
+ };
+ var do_delta = function(args) {
+ var filename = args[0], data_set = args[1];
// add missing features
Features.delta_and_accel(data_set);
- if (vq_features) {
- vq_features(data_set);
- }
+ return args;
+ };
+ var do_recog = function(args) {
+ var filename = args[0], data_set = args[1];
// recognize!
- var result = recognizer(data_set);
- output.write(result);
- output.write('\n.\n');
- });
- if (program.output) output.end();
+ return [filename, recognizer(data_set)];
+ };
+ if (program.time) {
+ console.log(program.args.length+" input files.");
+
+ console.time('HTK file input');
+ var input = program.args.map(do_read);
+ console.timeEnd('HTK file input');
+
+ console.time('Delta computation');
+ input = input.map(do_delta);
+ console.timeEnd('Delta computation');
+
+ console.time('Recognition time');
+ var results = input.map(do_recog);
+ console.timeEnd('Recognition time');
+
+ // skip output step.
+ } else {
+ output.write('#!MLF!#\n');
+ program.args.forEach(function(filename) {
+ var result = do_recog(do_delta(do_read(filename)))[1];
+ output.write(JSON.stringify(filename)+'\n');
+ var model = result[0], score = result[1];
+ if (program.strip_allograph) {
+ model = model.replace(/[0-9]+$/, '');
+ }
+ output.write(model+"\t"+score);
+ output.write('\n.\n');
+ });
+ }
+ if (program.output && !program.time) output.end();
});
Please sign in to comment.
Something went wrong with that request. Please try again.