Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

refactor training and collection scripts and add readme detailing tra…

…ining process
  • Loading branch information...
commit 9de1239826951ce276cfbb7a9c8ea6959110eb1d 1 parent 778d6fb
@harthur authored
View
19 features.js
@@ -0,0 +1,19 @@
+var hog = require("hog-descriptor"),
+ utils = require("./utils");
+
+var defaultParams = {
+ "cellSize": 6,
+ "blockSize": 2,
+ "blockStride": 1,
+ "bins": 6,
+ "norm": "L2"
+}
+
+var size = 48;
+
+exports.extractFeatures = function(canvas, params) {
+ canvas = utils.resizeCanvas(canvas, size, size);
+
+ var descriptor = hog.extractHOG(canvas, params || defaultParams);
+ return descriptor;
+}
View
72 kittydar.js
@@ -2,58 +2,41 @@ var brain = require("brain"),
hog = require("hog-descriptor"),
nms = require("./nms");
-var network = require("./network.js");
-
-var net = new brain.NeuralNetwork().fromJSON(network);
-
-if (process.arch) {
- // in node
+if (process.arch) { // in node
var Canvas = (require)('canvas');
}
-function createCanvas (width, height) {
- if (typeof Canvas !== 'undefined') {
- // have node-canvas
- return new Canvas(width, height);
- }
- else {
- // in browser
- var canvas = document.createElement('canvas');
- canvas.setAttribute('width', width);
- canvas.setAttribute('height', height);
- return canvas;
- }
-}
+var network = require("./network.json");
+var net = new brain.NeuralNetwork().fromJSON(network);
-var kittydar = {
+var params = {
patchSize: 48, // size of training images in px
-
minSize: 48, // starting window size
-
resize: 360, // initial image resize size in px
-
- threshold: 0.9999, // probablity threshold for classifying
-
+ threshold: 0.995, // probablity threshold for classifying
scaleStep: 6, // scaling step size in px
-
shiftBy: 6, // px to slide window by
-
- overlapThresh: 0.3, // min overlap ratio to classify as an overlap
-
+ overlapThresh: 0.5, // min overlap ratio to classify as an overlap
minOverlaps: 2, // minumum overlapping rects to classify as a head
-
HOGparams: { // parameters for HOG descriptor
cellSize: 6,
blockSize: 2,
blockStride: 1,
bins: 6,
norm: "L2"
- },
+ }
+}
+var kittydar = {
detectCats: function(canvas, options) {
- this.setOptions(options || {});
+ if (options) {
+ for (var opt in options) {
+ params[opt] = options[opt];
+ }
+ }
- var resizes = this.getAllSizes(canvas, this.minSize);
+ // get canvases of the image at different scales
+ var resizes = this.getAllSizes(canvas, params.minSize);
var cats = [];
resizes.forEach(function(resize) {
@@ -65,25 +48,18 @@ var kittydar = {
return cats;
},
- setOptions: function(options) {
- for (var opt in options) {
- this[opt] = options[opt];
- }
- },
-
getAllSizes: function(canvas, minSize) {
// For use with Worker threads, return canvas ImageDatas
// resized to accomodate various window sizes
- minSize = minSize || this.minSize;
+ minSize = minSize || params.minSize;
// resize canvas to cut down on number of windows to check
- var resize = this.resize;
var max = Math.max(canvas.width, canvas.height)
- var scale = Math.min(max, resize) / max;
+ var scale = Math.min(max, params.resize) / max;
var resizes = [];
- for (var size = minSize; size < max; size += this.scaleStep) {
+ for (var size = minSize; size < max; size += params.scaleStep) {
var winScale = (minSize / size) * scale;
var imagedata = this.scaleCanvas(canvas, winScale);
@@ -108,7 +84,7 @@ var kittydar = {
},
isCat: function(vectors) {
- var features = hog.extractHOGFromVectors(vectors, this.HOGparams);
+ var features = hog.extractHOGFromVectors(vectors, params.HOGparams);
var prob = net.runInput(features)[0];
return prob;
@@ -122,14 +98,14 @@ var kittydar = {
var width = imagedata.width,
height = imagedata.height;
- var size = this.patchSize;
+ var size = params.patchSize;
- for (var y = 0; y + size < height; y += this.shiftBy) {
- for (var x = 0; x + size < width; x += this.shiftBy) {
+ for (var y = 0; y + size < height; y += params.shiftBy) {
+ for (var x = 0; x + size < width; x += params.shiftBy) {
var win = getRect(vectors, x, y, size, size);
var prob = this.isCat(win);
- if (prob > this.threshold) {
+ if (prob > params.threshold) {
cats.push({
x: Math.floor(x / scale),
y: Math.floor(y / scale),
View
1  network.js
0 additions, 1 deletion not shown
View
6 testing/test.js
@@ -20,9 +20,13 @@ function runTest() {
if (err) throw err;
var images = files.filter(function(file) {
- return path.extname(file) == ".png";
+ return path.extname(file) == ".jpg";
})
+ images = images.slice(0, 3);
+
+ console.log(images);
+
async.forEach(images, function(file, done) {
file = dir + file;
View
93 training/collect.js
@@ -0,0 +1,93 @@
+var fs = require("fs"),
+ path = require("path"),
+ Canvas = require("canvas"),
+ utils = require("../utils");
+
+exports.collectImages = collectImages;
+exports.getDir = getDir;
+
+/*
+ * Collect the canvas representations of the images in the positive and
+ * negative directories and return
+ * an array of objects that look like:
+ * {
+ * canvas: <Canvas object>,
+ * file: 'test.jpg',
+ * iscat: true
+ * }
+ */
+function collectImages(posDir, negDir, samples, limit) {
+ // number of samples to extract from each negative, 0 for whole image
+ samples = samples || 0;
+
+ // max number of images to collect per directory
+ limit = limit || 1000;
+
+ var pos = getDir(posDir, true, 0, limit);
+ var neg = getDir(negDir, false, samples, limit);
+ return pos.concat(neg);
+}
+
+function getDir(dir, isCat, samples, limit) {
+ var files = fs.readdirSync(dir);
+
+ var images = files.filter(function(file) {
+ return (path.extname(file) == ".png"
+ || path.extname(file) == ".jpg");
+ });
+
+ images = images.slice(0, limit);
+
+ var data = [];
+ for (var i = 0; i < images.length; i++) {
+ var file = dir + "/" + images[i];
+ try {
+ var canvas = utils.drawImgToCanvasSync(file);
+ }
+ catch(e) {
+ console.log(e, file);
+ continue;
+ }
+
+ var canvases = extractSamples(canvas, samples);
+
+ for (var j = 0; j < canvases.length; j++) {
+ data.push({
+ canvas: canvases[j],
+ file: file,
+ isCat: isCat ? 1 : 0
+ });
+ }
+ }
+
+ return data;
+}
+
+
+function extractSamples(canvas, num) {
+ if (num == 0) {
+ // 0 means "don't sample"
+ return [canvas];
+ }
+
+ var min = 48;
+ var max = Math.min(canvas.width, canvas.height);
+
+ var canvases = [];
+ for (var i = 0; i < num; i++) {
+ var length = Math.max(min, Math.ceil(Math.random() * max));
+
+ var x = Math.floor(Math.random() * (max - length));
+ var y = Math.floor(Math.random() * (max - length));
+
+ canvases.push(cropCanvas(canvas, x, y, length, length));
+ }
+ return canvases;
+}
+
+function cropCanvas(canvas, x, y, width, height) {
+ var cropCanvas = new Canvas(width, height);
+ var context = cropCanvas.getContext("2d");
+ context.drawImage(canvas, x, y, width, height, 0, 0, width, height);
+ return cropCanvas;
+}
View
22 training/collection/README.md
@@ -0,0 +1,22 @@
+## collection
+
+the goal of collection is to get a folder of positive (cat head) images and a folder of negative (non-cat) images to train the classifier with.
+
+### creating the positives
+
+To get the positives, first download this [dataset of cat pictures](http://137.189.35.203/WebUI/CatDatabase/catData.html). There should be folders called CAT_00, CAT_01, etc. Take the images from all of these and combine into one directory. Also remove the file "00000003_019.jpg.cat" and add [00000003_015.jpg.cat](http://137.189.35.203/WebUI/CatDatabase/Data/00000003_015.jpg.cat).
+
+Run the script to rotate and the crop out the cat head from each image. If you put the cat dataset in a folder called "CATS" and you want to put the cropped images in a folder called "POSITIVES":
+
+`node make-positives.js CATS POSITIVES`
+
+### creating the negatives
+
+If you don't already have a bunch of non-cat pictures you can fetch recent images from Flickr and save them in a folder called "FLICKR" by running:
+
+`ruby fetch-negatives.rb NEGATIVES`
+
+You'll need at least 10,000 images.
+
+If you're getting images from Flickr, some will contain cats for sure, so you'll need to weed those out by taking a close look at your hard negatives.
+
View
10 training/collection/flickr.rb → training/collection/fetch-negatives.rb
@@ -5,22 +5,22 @@
FlickRaw.api_key="0cc11cffc8a238efef4dfa6dca255a44"
FlickRaw.shared_secret="5f76a97053f99673"
-$count = 0
-
$fetched = Hash.new
+$dir = ARGV[0]
+
def getPage(page)
list = flickr.photos.getRecent :per_page => 500, :page => page
list.each do |photo|
- url = "http://farm#{photo.farm}.staticflickr.com/#{photo.server}/#{photo.id}_#{photo.secret}_c.jpg"
+ url = "http://farm#{photo.farm}.staticflickr.com/#{photo.server}/#{photo.id}_#{photo.secret}_-.jpg"
if $fetched[url] != 1
$fetched[url] = 1
name = rand(100000000000)
- file = "NEGS_FLICKR/#{name}.jpg"
+ file = "#{$dir}/#{name}.jpg"
puts file
@@ -28,11 +28,11 @@ def getPage(page)
file << open(url).read
end
puts "saved to #{file}"
- $count+=1
end
end
end
+# gets 120 x 500 = 60,000 images
120.times do |i|
getPage(i)
end
View
50 training/collection/make-negs.js → training/collection/make-negatives.js
@@ -1,61 +1,65 @@
-var http = require("http"),
- url = require("url"),
- fs = require("fs"),
+var fs = require("fs"),
path = require("path"),
+ nomnom = require("nomnom"),
Canvas = require("canvas"),
- _ = require("underscore"),
utils = require("../../utils");
-var dir = __dirname + "/NEGS_FLICKR/";
-var outdir = __dirname + "/NEGS_SAMPLED3/";
+var opts = nomnom.options({
+ indir: {
+ position: 0,
+ default: __dirname + "/NEGS_FLICKR/",
+ help: "Directory of full-sizes negative images"
+ },
+ outdir: {
+ position: 1,
+ default: __dirname + "/NEGATIVES/",
+ help: "Directory to save cropped image sections"
+ }
+}).colors().parse();
-var part = parseInt(process.argv[2]);
-var perFile = 1;
-
-fs.readdir(dir, function(err, files) {
+fs.readdir(opts.indir, function(err, files) {
if (err) throw err;
var images = files.filter(function(file) {
return path.extname(file) == ".jpg";
});
- images = images.slice(9500 * part, 9500 * (part + 1));
- console.log(images.length);
+ console.log(images.length, "images to process");
images.forEach(function(image) {
+ var file = opts.indir + "/" + image;
try {
- var canvas = utils.drawImgToCanvasSync(dir + image);
+ var canvas = utils.drawImgToCanvasSync(file);
}
catch(e) {
- console.log(e, dir + image);
+ console.log(e, file);
return;
}
- var canvases = generateFromRaw(canvas);
+ var canvases = extractSamples(canvas);
canvases.forEach(function(canvas) {
var name = Math.floor(Math.random() * 10000000000);
- var file = outdir + name + ".jpg";
+ var file = opts.outdir + "/" + name + ".jpg";
- utils.writeCanvasToFile(canvas, file, function() {
- console.log("wrote to", file)
- });
+ utils.writeCanvasToFileSync(canvas, file);
});
});
})
-function generateFromRaw(canvas) {
+function extractSamples(canvas, num) {
var min = 48;
var max = Math.min(canvas.width, canvas.height);
- var canvases = _.range(0, perFile).map(function() {
+ var canvases = [];
+ for (var i = 0; i < num; i++) {
var length = Math.max(48, Math.ceil(Math.random() * max));
var x = Math.floor(Math.random() * (max - length));
var y = Math.floor(Math.random() * (max - length));
- return cropCanvas(canvas, x, y, length, length);
- })
+ canvases.push(cropCanvas(canvas, x, y, length, length));
+ }
return canvases;
}
View
2  training/collection/make-positives.js
@@ -1,9 +1,7 @@
var fs = require("fs"),
path = require("path"),
- async = require("async"),
nomnom = require("nomnom"),
Canvas = require("canvas"),
- cropper = require("./cropper"),
utils = require("../../utils");
var opts = nomnom.options({
View
95 training/collection/mine-negatives.js
@@ -1,95 +0,0 @@
-var fs = require("fs"),
- path = require("path"),
- brain = require("brain"),
- async = require("async"),
- _ = require("underscore"),
- features = require("../../features"),
- utils = require("../../utils");
-
-var trained = require("../network-6-random.json");
-
-var dir = __dirname + "/NEGS_SAMPLED/";
-var minedDir = __dirname + "/NEGS_HARD1_RAND/";
-
-var params = {
- cellSize: 6
-}
-
-var part = parseInt(process.argv[2]);
-
-console.log("mining hard negatives from part", part);
-
-fs.readdir(dir, function(err, files) {
- if (err) throw err;
-
- var images = files.filter(function(file) {
- return path.extname(file) == ".jpg";
- });
-
- // to get around open fd limit
- images = images.slice(9500 * part, 9500 * (part + 1));
-
- console.log(images.length)
-
- async.map(images, function(file, done) {
- file = dir + file;
-
- utils.drawImgToCanvas(file, function(err, canvas) {
- done(null, {
- canvas: canvas,
- file: file,
- isCat: false,
- err: err
- });
- });
- },
- function(err, canvases) {
- saveFalsePos(canvases);
- });
-})
-
-function saveFalsePos(canvases) {
- canvases = canvases.filter(function(canvas) {
- return !canvas.err;
- });
-
- var data = canvases.map(function(canvas) {
- var fts = features.extractFeatures(canvas.canvas, params);
- return {
- file: canvas.file,
- input: fts,
- output: [canvas.isCat]
- };
- });
-
- data = _(data).sortBy(function() {
- return Math.random();
- });
-
- var network = new brain.NeuralNetwork().fromJSON(trained);
-
- var stats = network.test(data);
-
- console.log(stats.misclasses.length, "misclasses")
- console.log(stats.falsePos, "false positives");
- console.log(stats.trueNeg, "true negatives");
- console.log(stats.total, "total");
-
- stats.misclasses.forEach(function(misclass) {
- if (misclass.expected == 0) {
- var file = minedDir + path.basename(misclass.file);
- copyFile(misclass.file, file);
- }
- });
-}
-
-
-function copyFile(source, dest, callback) {
- newFile = fs.createWriteStream(dest);
- oldFile = fs.createReadStream(source);
- oldFile.pipe(newFile);
-
- oldFile.on('end', function() {
- if (callback) callback();
- })
-}
View
197 training/cross-validate.js
@@ -1,110 +1,131 @@
var fs = require("fs"),
- brain = require("brain"),
path = require("path"),
- async = require("async"),
_ = require("underscore"),
+ nomnom = require("nomnom"),
+ brain = require("brain"),
+ features = require("../features"),
utils = require("../utils"),
- features = require("../features");
-
+ collect = require("./collect");
-testParams({
- cellSize: 4
-});
+var opts = nomnom.options({
+ posDir: {
+ position: 0,
+ default: __dirname + "/collection/POSITIVES/",
+ help: "Directory of cat head images"
+ },
+ negDir: {
+ position: 1,
+ default: __dirname + "/collection/NEGATIVES/",
+ help: "Directory of negative images"
+ },
+ limit: {
+ default: 10000,
+ help: "Max images to collect from each directory"
+ }
+}).colors().parse();
+
+
+var combos = [{
+ HOG: {
+ cellSize: 6,
+ blockSize: 2,
+ blockStride: 1,
+ bins: 6,
+ norm: "L2"
+ },
+ nn: {
+ hiddenLayers: [10, 10]
+ },
+ train: {
+ errorThresh: 0.007
+ }
+}];
+
+/*
+{
+ HOG: {
+ cellSize: 6,
+ blockSize: 2,
+ blockStride: 1,
+ bins: 6,
+ norm: "L2"
+ },
+ nn: {
+ hiddenLayers: [10, 10]
+ },
+ train: {
+ errorThresh: 0.007
+ }
+}
+];
+*/
-function testParams(params) {
- getCanvases(function(canvases) {
- canvases = canvases.filter(function(canvas) {
- return canvas.err === null;
- });
+console.log("testing", combos.length, "combinations");
- var data = canvases.map(function(canvas) {
- var fts = features.extractFeatures(canvas.canvas, params);
- return {
- file: canvas.file,
- input: fts,
- output: [canvas.isCat]
- };
- });
+testAll(combos);
- console.log("training on", data.length)
+function testAll(combos) {
+ var canvases = collect.collectImages(opts.posDir, opts.negDir, 1);
- var opts = {
- hiddenLayers: [2]
- };
- var trainOpts = {
- errorThresh: 0.006,
- log: true
- };
+ console.log("collected", canvases.length);
- var stats = brain.crossValidate(brain.NeuralNetwork, data, opts, trainOpts);
- stats.featureSize = data[0].input.length;
-
- console.log("params", stats.params);
- console.log("stats", stats.stats);
- console.log("avgs", stats.avgs);
-
- fs.writeFile('misclasses.json', JSON.stringify(stats.misclasses, 4), function (err) {
- if (err) throw err;
- console.log('saved misclasses to misclasses.json');
- });
-
- var minError = 1;
- var network;
-
- stats.sets.forEach(function(set) {
- if (set.error < minError) {
- minError = set.error;
- network = set.network;
- }
- })
-
- var json = JSON.stringify(network, 4)
- fs.writeFile('cv-network.json', json, function (err) {
- if (err) throw err;
- console.log('saved network to cv-network.json');
- });
- })
-}
+ var tests = [];
-function getCanvases(callback) {
- var posDir = __dirname + "/POSITIVES_TRAIN/";
+ for (var i = 0; i < combos.length; i++) {
+ var params = combos[i];
+ console.log("testing", i + 1, params)
- fs.readdir(posDir, function(err, files) {
- if (err) throw err;
+ var stats = testParams(canvases, params);
+ var test = {
+ params: params,
+ featureSize: stats.featureSize,
+ avgs: stats.avgs,
+ stats: stats.stats
+ };
- getDir(posDir, files, 1, function(posData) {
- var negsDir = __dirname + "/NEGATIVES_MIXED/";
- fs.readdir(negsDir, function(err, files) {
- if (err) throw err;
+ tests.push(test);
+ console.log(test);
- getDir(negsDir, files, 0, function(negData) {
- var data = posData.concat(negData);
+ if (i == combos.length - 1) {
+ console.log("\n" + getPrintout(tests));
- callback(data);
- })
- })
- })
- });
+ fs.writeFile('tests.json', JSON.stringify(tests, 4), function (err) {
+ if (err) throw err;
+ console.log('saved tests to tests.json');
+ });
+ }
+ }
}
-function getDir(dir, files, isCat, callback) {
- var limit = 5000;
- var images = files.filter(function(file) {
- return path.extname(file) == ".jpg";
- });
- images = images.slice(0, limit);
+function testParams(canvases, params) {
+ var data = canvases.map(function(canvas) {
+ var fts = features.extractFeatures(canvas.canvas, params.HOG);
+ return {
+ input: fts,
+ output: [canvas.isCat]
+ };
+ })
- var data = [];
+ var trainOpts = params.train;
+ trainOpts.log = true;
- async.map(images, function(file, done) {
- file = dir + file;
+ var stats = brain.crossValidate(brain.NeuralNetwork, data,
+ params.nn, trainOpts);
+ stats.featureSize = data[0].input.length;
+ return stats;
+}
- utils.drawImgToCanvas(file, function(err, canvas) {
- done(null, {canvas: canvas, file: file, isCat: isCat, err: err});
- });
- },
- function(err, canvases) {
- console.log("got one directory of images")
- callback(canvases);
+function getPrintout(tests) {
+ var sorted = _(tests).sortBy(function(test) {
+ return test.stats.falsePos;
+ });
+ var lines = sorted.map(function(test) {
+ return JSON.stringify(test.params) + " "
+ + "size: " + test.featureSize + " "
+ + "p: " + test.stats.precision.toFixed(3) + " "
+ + "r: " + test.stats.recall.toFixed(3) + " "
+ + "a: " + test.stats.accuracy.toFixed(3) + " "
+ + "fp: " + test.stats.falsePos;
});
+ return lines.join("\n");
}
View
167 training/find-params.js
@@ -1,167 +0,0 @@
-var fs = require("fs"),
- brain = require("brain"),
- path = require("path"),
- async = require("async"),
- _ = require("underscore"),
- utils = require("../utils"),
- features = require("../features");
-
-var limit = 4000;
-
-
-function getCombos() {
- var cellSizes = [4, 6];
- var bins = [6];
- var strides = [1]; //, 0.5];
- var norms = ["L2"];
- var blockSize = 2;
-
- var combos = [];
- cellSizes.forEach(function(cellSize) {
- strides.forEach(function(stride) {
- bins.forEach(function(bin) {
- norms.forEach(function(norm) {
- combos.push({
- cellSize: cellSize,
- blockSize: blockSize,
- blockStride: blockSize * stride,
- bins: bin,
- norm: norm
- })
- })
- })
- })
- })
-
- return combos;
-}
-
-var combos = [
-{
- cellSize: 4,
- blockSize: 2,
- blockStride: 1,
- bins: 6,
- norm: "L2"
-},
-{
- cellSize: 4,
- blockSize: 2,
- blockStride: 1,
- bins: 7,
- norm: "L2"
-}
-];
-
-console.log("testing", combos.length, "combinations");
-
-testAll(combos)
-
-function testAll(combos) {
- getCanvases(function(canvases) {
- var tests = [];
-
- for (var i = 0; i < combos.length; i++) {
- var params = combos[i];
- console.log("testing", i + 1, params)
-
- var stats = testParams(canvases, params);
- var test = {
- params: params,
- featureSize: stats.featureSize,
- avgs: stats.avgs,
- stats: stats.stats
- };
-
- tests.push(test);
- console.log(test);
-
- if (i == combos.length - 1) {
- console.log("\n" + getPrintout(tests));
-
- fs.writeFile('tests.json', JSON.stringify(tests, 4), function (err) {
- if (err) throw err;
- console.log('saved tests to tests.json');
- });
- }
- }
- })
-}
-
-
-function getPrintout(tests) {
- var sorted = _(tests).sortBy(function(test) {
- return test.stats.falsePos;
- });
- var lines = sorted.map(function(test) {
- return JSON.stringify(test.params) + " "
- + test.featureSize + " "
- + test.stats.precision + " "
- + test.stats.falsePos;
- });
- return lines.join("\n");
-}
-
-function testParams(canvases, params) {
- var data = canvases.map(function(canvas) {
- var fts = features.extractFeatures(canvas.canvas, params);
- return {
- input: fts,
- output: [canvas.isCat]
- };
- })
-
- var opts = {
- hiddenLayers: [30]
- };
- var trainOpts = {
- errorThresh: 0.006,
- log: true
- };
-
- var stats = brain.crossValidate(brain.NeuralNetwork, data, opts, trainOpts);
- stats.featureSize = data[0].input.length;
- return stats;
-}
-
-
-function getCanvases(callback) {
- var posDir = __dirname + "/POSITIVES/";
-
- fs.readdir(posDir, function(err, files) {
- if (err) throw err;
-
- getDir(posDir, files, 1, function(posData) {
- var negsDir = __dirname + "/NEGATIVES/";
- fs.readdir(negsDir, function(err, files) {
- if (err) throw err;
-
- getDir(negsDir, files, 0, function(negData) {
- var data = posData.concat(negData);
-
- callback(data);
- })
- })
- })
- });
-}
-
-function getDir(dir, files, isCat, callback) {
- var images = files.filter(function(file) {
- return path.extname(file) == ".jpg";
- });
- images = images.slice(0, limit);
-
- var data = [];
-
- async.map(images, function(file, done) {
- file = dir + file;
-
- utils.drawImgToCanvas(file, function(err, canvas) {
- done(null, {canvas: canvas, file: file, isCat: isCat});
- });
- },
- function(err, canvases) {
- callback(canvases);
- });
-}
View
111 training/make-network.js
@@ -1,111 +0,0 @@
-var fs = require("fs"),
- brain = require("brain"),
- path = require("path"),
- async = require("async"),
- _ = require("underscore"),
- utils = require("../utils"),
- features = require("../features");
-
-var networkFile = __dirname + "/network-june13-6.json";
-
-console.log("training with 6 pixels per cell mainly hard");
-trainNetwork({
- cellSize: 6
-})
-
-function trainNetwork(params) {
- getCanvases(function(canvases) {
- canvases = canvases.filter(function(canvas) {
- return !canvas.err;
- })
-
- var data = canvases.map(function(canvas) {
- try {
- var fts = features.extractFeatures(canvas.canvas, params);
- } catch(e) {
- console.log("err getting features", e, canvas.file);
- }
- return {
- input: fts,
- output: [canvas.isCat]
- };
- });
-
- data = _(data).sortBy(function() {
- return Math.random();
- });
-
- console.log(data[0].input.length)
-
- console.log("training with", data.length);
-
- var opts = {
- hiddenLayers: [30]
- };
- var trainOpts = {
- errorThresh: 0.005,
- log: true,
- logPeriod: 1
- };
-
- var network = new brain.NeuralNetwork(opts);
-
- var stats = network.train(data, trainOpts);
-
- console.log("stats:", stats);
- console.log("parameters:", opts);
-
- var json = JSON.stringify(network.toJSON(), 4)
-
- fs.writeFile(networkFile, json, function (err) {
- if (err) throw err;
- console.log('saved network to', networkFile);
- });
- })
-}
-
-function getCanvases(callback) {
- var posDir = __dirname + "/POSITIVES/";
-
- fs.readdir(posDir, function(err, files) {
- if (err) throw "pos" + err;
-
- getDir(posDir, files, 1, 0, 9500, function(posData) {
- var negsDir = __dirname + "/NEGS_ALL/";
- fs.readdir(negsDir, function(err, files) {
- if (err) throw "neg" + err;
-
- getDir(negsDir, files, 0, 0, 9500, function(negData) {
- var data = posData.concat(negData);
-
- callback(data);
- })
- })
- })
- });
-}
-
-function getDir(dir, files, isCat, min, limit, callback) {
- var images = files.filter(function(file) {
- return path.extname(file) == ".jpg";
- });
- images = images.slice(min, limit);
-
- console.log(images.length)
-
- var data = [];
-
- async.map(images, function(file, done) {
- file = dir + file;
-
- utils.drawImgToCanvas(file, function(err, canvas) {
- if (err) {
- console.log(err, file);
- }
- done(null, {canvas: canvas, file: file, isCat: isCat, err: err});
- });
- },
- function(err, canvases) {
- callback(canvases);
- });
-}
View
69 training/mine-negatives.js
@@ -0,0 +1,69 @@
+var fs = require("fs"),
+ path = require("path"),
+ brain = require("brain"),
+ nomnom = require("nomnom"),
+ features = require("../features"),
+ utils = require("../utils")
+ collect = require("./collect");
+
+var opts = nomnom.options({
+ negDir: {
+ position: 0,
+ default: __dirname + "/collection/NEGATIVES/",
+ required: true,
+ help: "Directory of negatives"
+ },
+ minedDir: {
+ position: 1,
+ default: __dirname + "/collection/MINED_NEGATIVES/",
+ required: true,
+ help: "Directory to put mined hard negatives in"
+ },
+ network: {
+ default: __dirname + "/network.json",
+ help: "Neural network JSON file"
+ },
+ samples: {
+ default: 1,
+ help: "How many times to sub-sample full negative image"
+ },
+ limit: {
+ default: undefined,
+ help: "Max number of negative images to process from directory"
+ },
+ threshold: {
+ default: 0.9,
+ help: "How wrong the classification is, from 0.5+ to 1.0"
+ }
+}).colors().parse();
+
+mineNegatives();
+
+function mineNegatives() {
+ var samples = collect.getDir(opts.negDir, false, opts.samples, opts.limit);
+
+ console.log("mining negatives from " + samples.length);
+
+ var trained = require(opts.network);
+ var network = new brain.NeuralNetwork().fromJSON(trained);
+
+ var falsePositives = 0;
+ for (var i = 0; i < samples.length; i++) {
+ var sample = samples[i];
+ var fts = features.extractFeatures(sample.canvas);
+ var result = network.run(fts);
+
+ if (result >= opts.threshold) {
+ console.log(result);
+ falsePositives++;
+
+ var rand = Math.floor(Math.random() * 1000);
+ var file = opts.minedDir + "/" + rand + "_" + path.basename(sample.file);
+
+ utils.writeCanvasToFile(sample.canvas, file, function(err) {
+ if (err) throw err;
+ })
+ }
+ }
+ console.log(falsePositives + " false positives found");
+}
View
138 training/test-network.js
@@ -3,93 +3,69 @@ var fs = require("fs"),
brain = require("brain"),
async = require("async"),
_ = require("underscore"),
+ nomnom = require("nomnom"),
features = require("../features"),
- utils = require("../utils");
-
-testNetwork({
- cellSize: 4
-})
-
-function testNetwork(params) {
- getCanvases(function(canvases) {
- canvases = canvases.filter(function(canvas) {
- return !canvas.err;
- })
-
- var data = canvases.map(function(canvas) {
- var fts = features.extractFeatures(canvas.canvas, params);
- return {
- input: fts,
- output: [canvas.isCat]
- };
- });
-
- data = _(data).sortBy(function() {
- return Math.random();
- });
-
- console.log("testing with", data.length);
-
- var json = require("./network-4-big.json")
- var network = new brain.NeuralNetwork().fromJSON(json);
- var stats = network.test(data);
-
- console.log(stats.error, "error");
- console.log(stats.precision, "precision")
- console.log(stats.recall, "recall")
- console.log(stats.accuracy, "accuracy")
-
- console.log(stats.truePos, "true positives");
- console.log(stats.trueNeg, "true negatives");
- console.log(stats.falsePos, "false positives");
- console.log(stats.falseNeg, "false negatives");
- console.log(stats.total, "total");
- })
-}
-
-function getCanvases(callback) {
- var posDir = __dirname + "/POSITIVES_TEST/";
-
- fs.readdir(posDir, function(err, files) {
- if (err) throw err;
-
- getDir(posDir, files, 1, 0, 8000, function(posData) {
- var negsDir = __dirname + "/NEGATIVES_TEST/";
- fs.readdir(negsDir, function(err, files) {
- if (err) throw err;
-
- getDir(negsDir, files, 0, 0, 6000, function(negData) {
- var data = posData.concat(negData);
-
- callback(data);
- })
- })
- })
+ utils = require("../utils"),
+ collect = require("./collect");
+
+var opts = nomnom.options({
+ posDir: {
+ position: 0,
+ default: __dirname + "/collection/POSITIVES_TEST/",
+ help: "Directory of test positives"
+ },
+ negDir: {
+ position: 1,
+ default: __dirname + "/collection/NEGATIVES_TEST/",
+ help: "Directory of test negatives"
+ },
+ network: {
+ default: __dirname + "/network.json",
+ help: "Neural network JSON file"
+ },
+ sample: {
+ default: true,
+ help: "sub-sample the negative images"
+ }
+}).colors().parse();
+
+testNetwork();
+
+function testNetwork() {
+ var canvases = collect.collectImages(opts.posDir, opts.negDir,
+ opts.sample ? 1 : 0);
+ console.log("testing on", canvases.length);
+
+ var data = canvases.map(function(canvas) {
+ try {
+ var fts = features.extractFeatures(canvas.canvas);
+ } catch(e) {
+ console.log("err getting features", e, canvas.file);
+ }
+ return {
+ input: fts,
+ output: [canvas.isCat]
+ };
});
-}
-function getDir(dir, files, isCat, min, limit, callback) {
- var images = files.filter(function(file) {
- return path.extname(file) == ".jpg";
+ data = _(data).sortBy(function() {
+ return Math.random();
});
- images = images.slice(min, limit);
- console.log(images.length)
+ console.log("testing with", data.length);
- var data = [];
+ var json = require(opts.network)
+ var network = new brain.NeuralNetwork().fromJSON(json);
+ var stats = network.test(data);
- async.map(images, function(file, done) {
- file = dir + file;
+ console.log(stats.error + " error");
+ console.log(stats.precision + " precision")
+ console.log(stats.recall + " recall")
+ console.log(stats.accuracy + " accuracy")
- utils.drawImgToCanvas(file, function(err, canvas) {
- if (err) {
- console.log(err);
- }
- done(null, {canvas: canvas, file: file, isCat: isCat, err: err});
- });
- },
- function(err, canvases) {
- console.log("got one directory of images");
- callback(canvases);
- });
+ console.log(stats.truePos + " true positives");
+ console.log(stats.trueNeg + " true negatives");
+ console.log(stats.falsePos + " false positives");
+ console.log(stats.falseNeg + " false negatives");
+ console.log(stats.total + " total");
}
View
97 training/train-network.js
@@ -0,0 +1,97 @@
+var fs = require("fs"),
+ brain = require("brain"),
+ path = require("path"),
+ async = require("async"),
+ _ = require("underscore"),
+ nomnom = require("nomnom"),
+ utils = require("../utils"),
+ features = require("../features"),
+ collect = require("./collect");
+
+var opts = nomnom.options({
+ posDir: {
+ position: 0,
+ default: __dirname + "/collection/POSITIVES/",
+ help: "Directory of cat head images"
+ },
+ negDir: {
+ position: 1,
+ default: __dirname + "/collection/NEGATIVES/",
+ help: "Directory of negative images"
+ },
+ outfile: {
+ default: __dirname + "/network.json",
+ help: "file to save network JSON to"
+ },
+ sample: {
+ flag: true,
+ help: "whether to sub-sample the negative images"
+ },
+ limit: {
+ default: 10000,
+ help: "maximum number of images to use from each directory"
+ }
+}).colors().parse();
+
+var params = {
+ HOG: {
+ cellSize: 6,
+ blockSize: 2,
+ blockStride: 1,
+ bins: 6,
+ norm: "L2"
+ },
+ nn: {
+ hiddenLayers: [10, 10]
+ },
+ train: {
+ errorThresh: 0.007,
+ log: true,
+ logPeriod: 1
+ }
+};
+
+trainNetwork(params)
+
+function trainNetwork(params) {
+ var samples = opts.sample ? 1 : 0;
+ var canvases = collect.collectImages(opts.posDir, opts.negDir,
+ samples, opts.limit);
+
+ console.log("training on", canvases.length);
+
+ var data = canvases.map(function(canvas) {
+ try {
+ var fts = features.extractFeatures(canvas.canvas, params.HOG);
+ } catch(e) {
+ console.log("err getting features", e, canvas.file);
+ }
+ return {
+ input: fts,
+ output: [canvas.isCat]
+ };
+ });
+
+ console.log(data[0]);
+
+ data = _(data).sortBy(function() {
+ return Math.random();
+ });
+
+ console.log("feature size:", data[0].input.length)
+ console.log("training with:", data.length, "samples");
+
+ var network = new brain.NeuralNetwork(params.nn);
+
+ var stats = network.train(data, params.train);
+
+ console.log("stats:", stats);
+ console.log("parameters:", params);
+
+ var json = JSON.stringify(network.toJSON(), 4)
+
+ fs.writeFile(opts.outfile, json, function (err) {
+ if (err) throw err;
+ console.log('saved network to', opts.outfile);
+ });
+}
View
54 training/upload.js
@@ -1,54 +0,0 @@
-var fs = require("fs"),
- path = require("path"),
- async = require("async"),
- cradle = require("cradle"),
- utils = require("../utils"),
- features = require("../features");
-
-var negsDir = __dirname + "/NEGATIVES/";
-var posDir = __dirname + "/POSITIVES/";
-
-var db = new(cradle.Connection)().database('cats-hog-c6-b9');
-
-var count = 0;
-
-//uploadDir(posDir, 1, 5000);
-uploadDir(negsDir, 0, 5000);
-
-function uploadDir(dir, isCat, limit) {
- var docs = [];
-
- fs.readdir(dir, function(err, files) {
- if (err) throw err;
-
- var images = files.filter(function(file) {
- return path.extname(file) == ".jpg";
- });
-
- images = images.slice(0, limit);
-
- async.forEach(images, function(file, done) {
- file = dir + file;
-
- utils.drawImgToCanvas(file, function(canvas) {
- var fts = features.extractFeatures(canvas);
-
- docs.push({
- file: file,
- input: fts,
- output: [isCat]
- });
-
- if (++count % 1000 == 0) {
- console.log("processed", count)
- }
- done();
- });
- },
- function() {
- db.save(docs, function(err) {
- if (err) throw err;
- });
- });
- });
-}
Please sign in to comment.
Something went wrong with that request. Please try again.