Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Create initial monophone ("single letter") HMMs.

  • Loading branch information...
commit d377e5aae36e53561f6c20a871a86d4036fb01d8 1 parent 7b0c912
@cscott authored
Showing with 90 additions and 38 deletions.
  1. +2 −0  .gitignore
  2. +52 −7 Makefile
  3. +36 −31 read.js
View
2  .gitignore
@@ -1,2 +1,4 @@
html
parm
+hmm?
+train.scr
View
59 Makefile
@@ -2,18 +2,63 @@ UPPER_LETTERS=A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
LOWER_LETTERS=a b c d e f g h i j k l m n o p q r s t u v w x y z
DIGITS=0 1 2 3 4 5 6 7 8 9
-ALL_PARMS=$(foreach l,$(UPPER_LETTERS) $(LOWER_LETTERS) $(DIGITS),parm/$(l).mlf)
-ALL_HTML=$(foreach l,$(UPPER_LETTERS) $(LOWER_LETTERS) $(DIGITS),html/$(l).html)
+# set aside 20% of the training data for evaluation.
+TRAINAMT=5
-all: global
+#SYMBOLS=$(UPPER_LETTERS) $(LOWER_LETTERS) $(DIGITS)
+SYMBOLS=$(UPPER_LETTERS)
+
+ALL_SCRIPT=$(foreach l,$(SYMBOLS),parm/$(l).scr)
+ALL_LABEL=$(foreach l,$(SYMBOLS),parm/$(l).mlf)
+ALL_HTML=$(foreach l,$(SYMBOLS),html/$(l).html)
+
+all: hmm3/hmmdefs
parms: $(ALL_PARMS)
html: $(ALL_HTML)
-html/%.html parm/%.mlf: json/%.json read.js
+html/%.html parm/%.mlf parm/%.scr: json/%.json read.js
@mkdir -p html parm/$*
- ./read.js -H html/$*.html -M parm/$*.mlf -P parm/$* $<
+ ./read.js -T $(TRAINAMT) -H html/$*.html \
+ -M parm/$*.mlf -P parm/$* -S parm/$*.scr \
+ $<
+
+parm/train.scr: $(ALL_SCRIPT)
+ cat $(ALL_SCRIPT) > $@
-global: $(ALL_PARMS)
+parm/all.mlf: $(ALL_LABEL)
+ echo "#!MLF!#" > $@
+ cat $^ | grep -v -F '#!MLF!#' >> $@
+
+# global mean/variance computation
+hmm0/proto hmm0/vFloors: htk-config proto parm/train.scr
mkdir -p hmm0
- HCompV -C htk-config -f 0.01 -m -M hmm0 proto parm/a/0000.htk parm/a/0001.htk
+ HCompV -C htk-config -f 0.01 -m -S parm/train.scr -M hmm0 proto
+
+# create flat-start monophone models
+hmm0/macros: hmm0/vFloors
+ echo "~o <VecSize> 27 <USER_D_A>" > $@
+ cat $< >> $@
+hmm0/hmmdefs hmm0/symbols: hmm0/proto
+ $(RM) -f hmm0/hmmdefs hmm0/symbols
+ touch hmm0/hmmdefs hmm0/symbols
+ for s in $(SYMBOLS); do \
+ echo $$s >> hmm0/symbols ; \
+ echo '~h "'$$s'"' >> hmm0/hmmdefs ; \
+ sed -e '0,/^~h/d' < hmm0/proto >> hmm0/hmmdefs ; \
+ done
+hmm1/hmmdefs: htk-config hmm0/macros hmm0/hmmdefs hmm0/symbols parm/all.mlf
+ mkdir -p hmm1
+ HERest -C htk-config -I parm/all.mlf -t 250 150 1000 \
+ -S parm/train.scr -H hmm0/macros -H hmm0/hmmdefs -M hmm1 hmm0/symbols
+hmm2/hmmdefs: htk-config hmm1/macros hmm1/hmmdefs hmm0/symbols parm/all.mlf
+ mkdir -p hmm2
+ HERest -C htk-config -I parm/all.mlf -t 250 150 1000 \
+ -S parm/train.scr -H hmm1/macros -H hmm1/hmmdefs -M hmm2 hmm0/symbols
+hmm3/hmmdefs: htk-config hmm2/macros hmm2/hmmdefs hmm0/symbols parm/all.mlf
+ mkdir -p hmm3
+ HERest -C htk-config -I parm/all.mlf -t 250 150 1000 \
+ -S parm/train.scr -H hmm2/macros -H hmm2/hmmdefs -M hmm3 hmm0/symbols
+
+clean:
+ $(RM) -rf html parm hmm0 hmm1 hmm2 hmm3
View
67 read.js
@@ -22,21 +22,33 @@ program
null)
.option('-H, --html <filename>', 'html file output for previewing data',
null)
+ .option('-T, --train <number>', 'omit 1 in <number> examples from training set', Number, 0)
+ .option('-S, --script <filename>', 'list of parameter files for training', null)
.parse(process.argv);
var input_file = program.args[0];
var data = JSON.parse(fs.readFileSync(input_file, 'utf-8'));
-var html_fd = -1;
-if (program.html) {
- html_fd = fs.openSync(program.html, 'w');
-}
-var p = function(s) {
- if (html_fd >= 0) {
- fs.writeSync(html_fd, s+"\n", null, 'utf8');
+var mklogfunc = function(what, opt_filename) {
+ var fd = -1;
+ if (opt_filename) {
+ fd = fs.openSync(opt_filename, 'w');
}
+ var f = function(s) {
+ if (fd >= 0) {
+ fs.writeSync(fd, s+"\n", null, 'utf8');
+ }
+ };
+ f.close = function() {
+ if (fd >= 0) {
+ fs.closeSync(fd);
+ console.log(what+": "+opt_filename);
+ }
+ };
+ return f;
};
+var p = mklogfunc('HTML output', program.html);
p("<!DOCTYPE HTML>");
p("<html><head><title>Character Visualization</title>");
p("<style type=\"text/css\">");
@@ -47,17 +59,11 @@ p("<body><h1>Character Visualization</h1>");
p("<p>" + data.set.length + " characters, ");
p("avg <span id='avglen'></span> samples.</p>");
-var mlf_fd = -1;
-if (program.mlf) {
- mlf_fd = fs.openSync(program.mlf, 'w');
-}
-var m = function(s) {
- if (mlf_fd >= 0) {
- fs.writeSync(mlf_fd, s+"\n", null, 'utf8');
- }
-};
+var m = mklogfunc('Label file', program.mlf);
m("#!MLF!#");
+var s = mklogfunc('Script file', program.script);
+
var Point = function(x, y, isUp) {
this.x = x; this.y = y; this.isUp = isUp || false;
};
@@ -324,7 +330,7 @@ var avg_len = 0;
var bar = new ProgressBar('Writing features: [:bar] :percent :etas',
{ total: data.set.length, width: 30 });
var featmin, featmax;
-for (var i=0; i<data.set.length; i++, bar.tick()) {
+for (var i=0, n=0; i<data.set.length; i++, bar.tick()) {
var label = data.set[i].name;
normalize(data.set[i]);
draw_letter(data.set[i], "Unipen");
@@ -375,9 +381,9 @@ for (var i=0; i<data.set.length; i++, bar.tick()) {
});
// convert to node-native buffer type and write file
- var filename = i + ".htk";
- while (filename.length < 8) { filename = "0" + filename; }
- var parm_fd = fs.openSync(program.parmdir+"/"+filename, 'w');
+ var filename = "" + i;
+ while (filename.length < 4) { filename = "0" + filename; }
+ var parm_fd = fs.openSync(program.parmdir+"/"+filename+".htk", 'w');
var w = function(arraybuf) {
var b = new Buffer(new Uint8Array(arraybuf));
fs.writeSync(parm_fd, b, 0, b.length, null);
@@ -386,28 +392,27 @@ for (var i=0; i<data.set.length; i++, bar.tick()) {
w(fbuf);
fs.closeSync(parm_fd);
- m('"*/'+program.parmdir+'/'+filename+'"');
+ m('"'+program.parmdir+'/'+filename+'.lab"');
m(label);
m('.');
+
+ if (program.train === 0 || (n % program.train !== 0)) {
+ s(program.parmdir+'/'+filename+'.htk');
+ }
+ n++; // keep separate count just in case we disqualify particular files
}
avg_len /= data.set.length;
p("<script type=\"text/javascript\">");
p("document.getElementById('avglen').innerHTML='"+avg_len+"';");
p("</script>");
-if (html_fd >= 0) {
- fs.closeSync(html_fd);
-}
-if (mlf_fd >= 0) {
- fs.closeSync(mlf_fd);
-}
-
// done w/ progress bar.
console.log("\r ");
// some stats
-if (html_fd >= 0) {
- console.log("HTML output: "+program.html);
-}
+p.close();
+m.close();
+s.close();
+
if (program.parmdir) {
console.log("Parameter files in: "+program.parmdir);
}
Please sign in to comment.
Something went wrong with that request. Please try again.