Skip to content

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
...
  • 9 commits
  • 6 files changed
  • 0 commit comments
  • 1 contributor
Showing with 352 additions and 53 deletions.
  1. +2 −0 .gitignore
  2. +2 −0 AUTHORS
  3. +9 −0 Makefile
  4. +0 −9 mkpages.sh
  5. +22 −0 package.json
  6. +317 −44 read.js
View
2 .gitignore
@@ -0,0 +1,2 @@
+html
+parm
View
2 AUTHORS
@@ -0,0 +1,2 @@
+C. Scott Ananian <cscott@laptop.org> (http://cscott.net)
+# Add yourself here!
View
9 Makefile
@@ -0,0 +1,9 @@
+UPPER_LETTERS=A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+LOWER_LETTERS=a b c d e f g h i j k l m n o p q r s t u v w x y z
+DIGITS=0 1 2 3 4 5 6 7 8 9
+
+parms: $(foreach l,$(UPPER_LETTERS) $(LOWER_LETTERS) $(DIGITS),html/$(l).html)
+
+html/%.html: json/%.json read.js
+ @mkdir -p html parm/$*
+ ./read.js -H $@ -d parm/$* $<
View
9 mkpages.sh
@@ -1,9 +0,0 @@
-#!/bin/bash
-mkdir -p html
-for l in \
- a b c d e f g h i j k l m n o p q r s t u v w x y z \
- A B C D E F G H I J K L M N O P Q R S T U V W X Y Z \
- 0 1 2 3 4 5 6 7 8 9 ; do
- echo $l
- ./read.js ../uptools3/json/${l}.json > html/${l}.html
-done
View
22 package.json
@@ -0,0 +1,22 @@
+{
+ "name": "nell-hand",
+ "version": "0.0.1",
+ "description": "handwriting recognition using HMMs",
+ "keywords": [ "nell", "handwriting" ],
+ "homepage": "https://github.com/cscott/nell-hand",
+ "bugs": {
+ "url": "https://github.com/cscott/nell-hand/issues"
+ },
+ "repository": {
+ "type": "git",
+ "url": "git://github.com/cscott/nell-hand.git"
+ },
+ "dependencies": {
+ "requirejs": "1.0.x"
+ },
+ "devDependencies": {},
+ "optionalDependencies": {},
+ "engines": {
+ "node": "~0.7.5"
+ },
+}
View
361 read.js
@@ -1,23 +1,41 @@
#!/usr/bin/node
// running with node 0.7.5, installed 'commander' and 'progress'
+// uses typed arrays, which landed in node 0.5.5.
var CANVAS_RESOLUTION = 100;
var RETINA_FACTOR = 1;
+var DOT_SIZE = 1/40;
+var SMOOTH_N = 3, SMOOTH_ALPHA = .25;
+var RESAMPLE_INTERVAL = 1/10;
+var RESAMPLE_HERTZ = 30; // sample rate written into parameter file
var program = require('commander');
var fs = require('fs');
+var ProgressBar = require('progress');
program
.version('0.1')
+ .usage('[options] <json input file>')
+ .option('-d, --parmdir <output dir>', 'directory for parameter file output',
+ null)
+ .option('-H, --html <filename>', 'html file output for previewing data',
+ null)
.parse(process.argv);
var input_file = program.args[0];
var data = JSON.parse(fs.readFileSync(input_file, 'utf-8'));
-function p(s) {
- console.log(s);
+var html_fd = -1;
+if (program.html) {
+ html_fd = fs.openSync(program.html, 'w');
}
+var p = function(s) {
+ if (html_fd >= 0) {
+ fs.writeSync(html_fd, s+"\n", null, 'utf8');
+ }
+};
+
p("<!DOCTYPE HTML>");
p("<html><head><title>Character Visualization</title>");
p("<style type=\"text/css\">");
@@ -25,18 +43,67 @@ p("canvas { width: "+CANVAS_RESOLUTION+"px; height: "+CANVAS_RESOLUTION+"px; bor
p("</style></head>");
p("<body><h1>Character Visualization</h1>");
//p('Version', data.version);
-p("<p>" + data.set.length + "samples</p>");
+p("<p>" + data.set.length + " characters, ");
+p("avg <span id='avglen'></span> samples.</p>");
-function normalize(data_set) {
- function mkpt(p) { return { x:p[0], y:p[1] }; }
+var Point = function(x, y, isUp) {
+ this.x = x; this.y = y; this.isUp = isUp || false;
+};
+Point.prototype = {
+ clone: function() { return new Point(this.x, this.y, this.isUp); },
+ equals: function(p) { return Point.equals(this, p); },
+ dist: function(p) { return Point.dist(this, p); },
+ interp: function(p, amt) { return Point.interp(this, p, amt); }
+};
+Point.equals = function(a, b) {
+ return (a.x === b.x) && (a.y === b.y) && (a.isUp === b.isUp);
+};
+Point.dist = function(a, b) {
+ var dx = a.x - b.x, dy = a.y - b.y;
+ return Math.sqrt(dx*dx + dy*dy);
+};
+Point.interp = function(p1, p2, amt) {
+ var x = p1.x + amt*(p2.x - p1.x);
+ var y = p1.y + amt*(p2.y - p1.y);
+ return new Point(x, y, p2.isUp);
+};
+
+var Box = function(tl, br) {
+ this.tl = tl;
+ this.br = br;
+};
+Box.prototype = {
+ unionPt: function(pt) {
+ if (pt.x < this.tl.x) { this.tl.x = pt.x; }
+ if (pt.y < this.tl.y) { this.tl.y = pt.y; }
+ if (pt.x > this.br.x) { this.br.x = pt.x; }
+ if (pt.y > this.br.y) { this.br.y = pt.y; }
+ },
+ union: function(box) {
+ this.unionPt(box.tl);
+ this.unionPt(box.br);
+ },
+ size: function() {
+ return { width: this.br.x - this.tl.x,
+ height: this.br.y - this.tl.y };
+ }
+};
+Box.fromPts = function(pts) {
+ // pts must have at least one element
+ var b = new Box(pts[0].clone(), pts[0].clone());
+ pts.forEach(function(p) { b.unionPt(p); });
+ return b;
+};
+
+var normalize = function(data_set) {
+ var mkpt = function(p) { return new Point(p[0], p[1]); };
// remove dups
data_set.strokes = data_set.strokes.map(function(stroke) {
stroke = stroke.map(mkpt);
var nstrokes = [stroke[0]];
for (var i=1; i<stroke.length; i++) {
- if (stroke[i].x == stroke[i-1].x &&
- stroke[i].y == stroke[i-1].y)
+ if (stroke[i].equals(stroke[i-1]))
continue;
nstrokes.push(stroke[i]);
}
@@ -44,40 +111,156 @@ function normalize(data_set) {
});
// find bounding box
- // Ugh: this should be a one-liner...
- function minpt(pta, ptb) {
- return {x:Math.min(pta.x, ptb.x), y:Math.min(pta.y, ptb.y)};
- }
- function maxpt(pta, ptb) {
- return {x:Math.max(pta.x, ptb.x), y:Math.max(pta.y, ptb.y)};
- }
- var max = data_set.strokes.map(function(stroke) {
- return stroke.reduce(maxpt);
- }).reduce(maxpt);
- var min = data_set.strokes.map(function(stroke) {
- return stroke.reduce(minpt);
- }).reduce(minpt);
+ var strokeBBs = data_set.strokes.map(function(stroke) {
+ return Box.fromPts(stroke);
+ });
+ var bbox = strokeBBs[0];
+ strokeBBs.forEach(function(bb) { bbox.union(bb); });
// use correct aspect ratio
- var x_size = (max.x - min.x), y_size = (max.y - min.y);
- var size = Math.max(x_size, y_size);
- function norm(pt) {
+ var size = bbox.size();
+ size = Math.max(size.width, size.height);
+ var norm = function(pt) {
// map to [0-1], y=0 at bottom (math style)
- var x = (pt.x - min.x) / size;
- var y = (pt.y - min.y) / size;
- return {x:x, y:y};
- }
+ var x = (pt.x - bbox.tl.x) / size;
+ var y = (pt.y - bbox.tl.y) / size;
+ return new Point(x, y);
+ };
// remove dups
data_set.strokes = data_set.strokes.map(function(stroke) {
return stroke.map(norm);
});
-}
+};
-function smooth(data_set) {
-}
+var smooth = function(data_set) {
+ data_set.strokes = data_set.strokes.map(function(stroke) {
+ var nstroke = [];
+ for (var i=0; i<stroke.length; i++) {
+ var acc = new Point(stroke[i].x * SMOOTH_ALPHA,
+ stroke[i].y * SMOOTH_ALPHA );
+ var n = SMOOTH_N;
+ // [0, 1, 2, 3, 4 ] .. N = 2, length=5
+ while (n>0 && (i<n || i>=(stroke.length-n)))
+ n--;
+ for (var j=1; j<=n; j++) {
+ acc.x += stroke[i-j].x + stroke[i+j].x;
+ acc.y += stroke[i-j].y + stroke[i+j].y;
+ }
+ acc.x /= (2*n + SMOOTH_ALPHA);
+ acc.y /= (2*n + SMOOTH_ALPHA);
+ nstroke.push(acc);
+ }
+ return nstroke;
+ });
+};
+
+var singleStroke = function(data_set) {
+ var nstroke = [];
+ data_set.strokes.forEach(function(stroke) {
+ // add "pen up" stroke.
+ var first = stroke[0];
+ nstroke.push(new Point(first.x, first.y, true/*up!*/));
+ for (var j = 1; j < stroke.length; j++) {
+ nstroke.push(stroke[j]);
+ }
+ });
+ data_set.strokes = [ nstroke ];
+};
+var equidist = function(data_set, dist) {
+ console.assert(data_set.strokes.length===1);
+ var stroke = data_set.strokes[0];
+ var nstroke = [];
+ var last = stroke[0];
+ var d2next = 0;
+ stroke.forEach(function(pt) {
+ var d = Point.dist(last, pt);
+
+ while (d2next <= d) {
+ var amt = (d===0)?0:(d2next/d);
+ nstroke.push(Point.interp(last, pt, amt));
+ d2next += dist;
+ }
+ d2next -= d;
+ last = pt;
+ });
+ // XXX: what should we do with the last point?
+ data_set.strokes = [ nstroke ];
+};
+
+var features = function(data_set) {
+ var points = data_set.strokes[0];
+ var features = points.map(function() { return []; });
+ for (var i=0; i<points.length; i++) {
+ var m2 = points[(i<2) ? 0 : (i-2)];
+ var m1 = points[(i<1) ? 0 : (i-1)];
+ var pt = points[i];
+ var p1 = points[((i+1)<points.length) ? (i+1) : (points.length-1)];
+ var p2 = points[((i+2)<points.length) ? (i+2) : (points.length-1)];
+
+ var dx1 = p1.x - m1.x, dy1 = p1.y - m1.y;
+ var ds1 = Math.sqrt(dx1*dx1 + dy1*dy1);
+
+ var dx2 = p2.x - m2.x, dy2 = p2.y - m2.y;
+ var ds2 = Math.sqrt(dx2*dx2 + dy2*dy2);
+
+ var bb = Box.fromPts([ m2, m1, pt, p1, p2 ]).size();
+ var L = m2.dist(m1) + m1.dist(pt) + pt.dist(p1) + p1.dist(p2);
+
+ // http://mathworld.wolfram.com/Point-LineDistance2-Dimensional.html
+ var dist2line = function(pp) {
+ // x0 = pp.x ; x1 = m2.x ; x2 = p2.x
+ // y0 = pp.y ; y1 = m2.y ; y2 = p2.y
+ // |(x2-x1)(y1-y0) - (x1-x0)(y2-y1)| / ds2
+ // | dx2 * (m2.y - pp.y) - (m2.x - pp.x)*dy2 | / ds2
+ return Math.abs(dx2*(m2.y-pp.y) - dy2*(m2.x-pp.x)) / ds2;
+ };
+ var d0 = dist2line(m1), d1 = dist2line(pt), d2 = dist2line(p1);
+ var dN = 3;
+ if (m1.equals(m2)) dN--;
+ if (p1.equals(p2)) dN--;
+
+ features[i] = [
+ // curvature (fill in in next pass)
+ 0,
+ 0,
+ // writing direction
+ dx1/ds1,
+ dy1/ds1,
+ // vertical position.
+ pt.y,
+ // aspect
+ (bb.height - bb.width) / (bb.height + bb.width),
+ // curliness
+ (L / Math.max(bb.height, bb.width)) - 2,
+ // linearity
+ (d0*d0 + d1*d1 + d2*d2) / dN,
+ // slope
+ dx2/ds2,
+ ];
+ }
+ // fill in curvature features
+ for (var i=0; i<features.length; i++) {
+ var m1 = features[(i<1) ? 0 : (i-1)];
+ var ft = features[i];
+ var p1 = features[((i+1)<features.length)? (i+1) : (features.length-1)];
+
+ var cosm1 = m1[2], sinm1 = m1[3];
+ var cosp1 = p1[2], sinp1 = p1[3];
+ ft[0] = (cosm1*cosp1) + (sinm1*sinp1);
+ ft[1] = (cosm1*sinp1) - (sinm1*cosp1);
+ }
+ // rescale to normalize to (approximately) [-1,1]
+ for (var i=0; i<features.length; i++) {
+ features[i][4] = (2 * features[i][4]) - 1;
+ features[i][6] = (((features[i][6] + 1) / 3.2) * 2) - 1;
+ features[i][7] = (features[i][7] * 100) - 1;
+ }
+ // save features
+ data_set.features = features;
+};
var canvas_id = 0;
-function draw_letter(data_set, caption) {
+var draw_letter = function(data_set, caption) {
// data_set should be normalized (range [0,1], dups removed)
var id = "c" + (canvas_id++);
p("<canvas id="+JSON.stringify(id)+" title="+JSON.stringify(caption)+"></canvas>");
@@ -87,35 +270,125 @@ function draw_letter(data_set, caption) {
p(" canvas.width = canvas.height = "+CANVAS_RESOLUTION*RETINA_FACTOR+";");
p(" var ctx = canvas.getContext(\"2d\");");
var norm = function(pt) {
- return { x: pt.x*CANVAS_RESOLUTION*RETINA_FACTOR,
- y: (1-pt.y)*CANVAS_RESOLUTION*RETINA_FACTOR };
+ return new Point( pt.x *CANVAS_RESOLUTION*RETINA_FACTOR,
+ (1-pt.y)*CANVAS_RESOLUTION*RETINA_FACTOR,
+ pt.isUp);
};
// set ctx.stroke style, whatever that property is.
data_set.strokes.forEach(function(stroke) {
p("ctx.beginPath();");
- var start = norm(stroke[0]);
- p("ctx.moveTo("+start.x+","+start.y+");");
- for (var i=1; i<stroke.length; i++) {
- var pt = norm(stroke[i]);
- p("ctx.lineTo("+pt.x+","+pt.y+");");
- }
+ stroke.map(norm).forEach(function(pt, i) {
+ if (i==0 || pt.isUp) {
+ p("ctx.moveTo("+pt.x+","+pt.y+");");
+ } else {
+ p("ctx.lineTo("+pt.x+","+pt.y+");");
+ }
+ });
p("ctx.stroke();");
});
+
+ // points
+ p("ctx.beginPath();");
+ data_set.strokes.forEach(function(stroke) {
+ stroke.map(norm).forEach(function(pt) {
+ p("ctx.arc("+pt.x+","+pt.y+","+
+ (DOT_SIZE*CANVAS_RESOLUTION*RETINA_FACTOR)+","+
+ "0,2*Math.PI,true);");
+ });
+ });
+ p("ctx.fill();");
+
// label data
p(" ctx.fillStyle=\"#008\";");
p(" ctx.font=\""+(10*RETINA_FACTOR)+"px sans-serif\";");
p(" ctx.fillText("+JSON.stringify(data_set.source+" "+data_set.start)+", 0, 10, "+CANVAS_RESOLUTION*RETINA_FACTOR+");");
p("})();");
p("</script>");
-}
+};
// okay, draw the letters!
-for (var i=0; i<data.set.length; i++) {
+var avg_len = 0;
+var bar = new ProgressBar('Writing features: [:bar] :percent :etas',
+ { total: data.set.length, width: 30 });
+var featmin, featmax;
+for (var i=0; i<data.set.length; i++, bar.tick()) {
normalize(data.set[i]);
draw_letter(data.set[i], "Unipen");
-/*
smooth(data.set[i]);
- draw_letter(data.set[i], "Smoothed");
-*/
+ singleStroke(data.set[i]);
+ //draw_letter(data.set[i], "Smoothed");
+
+ equidist(data.set[i], RESAMPLE_INTERVAL);
+ draw_letter(data.set[i], "Resampled");
+
+ avg_len += data.set[i].strokes[0].length;
+
+ features(data.set[i]);
+ if (i==0) {
+ featmax = data.set[i].features[0].slice(0);
+ featmin = data.set[i].features[0].slice(0);
+ }
+ data.set[i].features.forEach(function(featvect) {
+ featvect.forEach(function(f, j) {
+ if (f > featmax[j]) { featmax[j] = f; }
+ if (f < featmin[j]) { featmin[j] = f; }
+ });
+ });
+
+ if (!program.parmdir) continue;
+
+ // Make 12-byte header
+ var nfeat = data.set[i].features.length;
+ if (nfeat === 0) continue; // hm, strange.
+ var featvlen = data.set[i].features[0].length;
+
+ var hbuf = new ArrayBuffer(12);
+ // nSamples - number of samples in file (4-byte integer)
+ new Uint32Array(hbuf, 0)[0] = nfeat;
+ // sampPeriod - sample period in 100ns units (4-byte integer)
+ new Uint32Array(hbuf, 4)[0] = Math.round(10000000/RESAMPLE_HERTZ);
+ // sampSize - number of bytes per sample (2-byte integer)
+ new Uint16Array(hbuf, 8)[0] = featvlen * 4;
+ // parmKind - a code indicating the sample kind (2-byte integer)
+ new Uint16Array(hbuf,10)[0] = 9; // USER: user defined sample kind
+
+ // Make a Float32 array w/ the feature vector.
+ var fbuf = new ArrayBuffer(4*nfeat*featvlen);
+ var featv = new Float32Array(fbuf);
+ data.set[i].features.forEach(function (fv, j) {
+ featv.set(fv, j*featvlen);
+ });
+
+ // convert to node-native buffer type and write file
+ var filename = i + ".htk";
+ while (filename.length < 8) { filename = "0" + filename; }
+ var parm_fd = fs.openSync(program.parmdir+"/"+filename, 'w');
+ var w = function(arraybuf) {
+ var b = new Buffer(new Uint8Array(arraybuf));
+ fs.writeSync(parm_fd, b, 0, b.length, null);
+ };
+ w(hbuf);
+ w(fbuf);
+ fs.closeSync(parm_fd);
+}
+avg_len /= data.set.length;
+p("<script type=\"text/javascript\">");
+p("document.getElementById('avglen').innerHTML='"+avg_len+"';");
+p("</script>");
+
+if (html_fd >= 0) {
+ fs.closeSync(html_fd);
+}
+ // done w/ progress bar.
+console.log("\r ");
+// some stats
+if (html_fd >= 0) {
+ console.log("HTML output: "+program.html);
+}
+if (program.parmdir) {
+ console.log("Parameter files in: "+program.parmdir);
}
+console.log("Average # features: "+avg_len);
+console.log("Max feat: "+featmax);
+console.log("Min feat: "+featmin);

No commit comments for this range

Something went wrong with that request. Please try again.