Permalink
Browse files

refactor training and collection scripts

  • Loading branch information...
1 parent e1723d2 commit bf378543a1d5d877ca37236418be2b377130850c @harthur committed Oct 15, 2012
View
2 kittydar.js
@@ -6,7 +6,7 @@ if (process.arch) { // in node
var Canvas = (require)('canvas');
}
-var network = require("./network.json");
+var network = require("./network.js");
var net = new brain.NeuralNetwork().fromJSON(network);
var params = {
View
1 nms.js
@@ -18,7 +18,6 @@ exports.combineOverlaps = combineOverlaps;
* to be included in the final returned set.
*/
function combineOverlaps(rects, minRatio, minOverlaps) {
- console.log(minRatio, minOverlaps);
minRatio = minRatio || 0.5;
minOverlaps = minOverlaps || 1;
View
31 package.json
@@ -1,16 +1,19 @@
{
- "name": "kittydar",
- "description": "Cat detection",
- "version": "0.1.2",
- "author": "Heather Arthur <fayearthur@gmail.com>",
- "repository": {
- "type": "git",
- "url": "http://github.com/harthur/kittydar.git"
- },
- "dependencies" : {
- "canvas" : "~0.10.0",
- "brain" : "~0.6.0",
- "hog-descriptor" : "~0.4.0"
- },
- "main": "./kittydar"
+ "name": "kittydar",
+ "description": "Cat detection",
+ "version": "0.1.2",
+ "author": "Heather Arthur <fayearthur@gmail.com>",
+ "repository": {
+ "type": "git",
+ "url": "http://github.com/harthur/kittydar.git"
+ },
+ "dependencies" : {
+ "canvas" : "~0.13.1",
+ "brain" : "~0.6.0",
+ "hog-descriptor" : "~0.4.0"
+ },
+ "devDependencies" : {
+ "nomnom" : "~1.5.2"
+ },
+ "main": "./kittydar"
}
View
60 training/README.md
@@ -0,0 +1,60 @@
+# Training
+
+The goal of training is to create a classifier (in this case a neural network) that can be used to classify cat head images.
+
+After a final round of training you should have the JSON state of a neural network in the file "network.json", which can be imported and used by kittydar.
+
+## collection
+
+First you need to collect positive and negative images to train the network with. See the `collection` directory for more information.
+
+## train the classifier
+
+You can train a network with:
+
+```
+node train-network.js POSITIVES NEGATIVES
+```
+
+where POSITIVES is the directory of positive images (cat head crops), and NEGATIVES is a directory of samples from non-cat images.
+
+This will write the network to "network.json".
+
+## test the classifier
+
+After training the network you can test the network on a set of test positive and negative images (different from the ones that trained it):
+
+```
+node test-network.js POSITIVES_TEST NEGATIVES_TEST --network ./network.json
+```
+
+This will report the neural network error, as well as binary classification statistics like precision and recall.
+
+## optional: finding optimal parameters
+
+Find the best parameters for the feature extraction and classifier with cross-validation. Edit the `combos` object to add a combination and run with:
+
+```
+node cross-validate.js POSITIVES NEGATIVES
+```
+
+This will cross-validate on each combination of parameteres and report statistics on each combination, including the precision, recall, accuracy, and error of the test set.
+
+## optional: mining hard negatives
+
+After you've trained a classifier, you can test the classifier on a different set of negative images and save any false positives as "hard negatives". You can take the hard negatives and the positives and train a new (more precise) classifier.
+
+```
+node mine-negatives.js NEGATIVES_EXTRA HARD --samples 1 --network ./network.json
+```
+
+where `HARD` is a new directory to hold the mined negatives. The `threshold` param determines when a negative is classified as hard. It's a number from 0.5 to 1.0 (from "leaning positive" to very false positive).
+
+`samples` is the number of times to sample each negative image. It can take a lot of images to find a few hard negatives if you're classifier is good enough, so specifying a higher value will mine more hard negatives in the end.
+
+You can then train a new classifier with:
+
+```
+node train-network.js POSITIVES HARD
+```
+
View
107 training/collect.js
@@ -0,0 +1,107 @@
+var fs = require("fs"),
+ path = require("path"),
+ Canvas = require("canvas"),
+ utils = require("../utils")
+ features = require("../features");
+
+exports.collectData = collectData;
+exports.getDir = getDir;
+exports.extractSamples = extractSamples;
+
+/*
+ * Collect the canvas representations of the images in the positive and
+ * negative directories and return
+ * an array of objects that look like:
+ * {
+ * input: <Array of floats> from image features
+ * output: [0,1] (depending if it's a cat or not)
+ * file: 'test.jpg'
+ * }
+ */
+function collectData(posDir, negDir, samples, limit, params) {
+ // number of samples to extract from each negative, 0 for whole image
+ samples = samples || 0;
+ params = params || {};
+
+ var pos = getDir(posDir, true, 0, limit, params);
+ var neg = getDir(negDir, false, samples, limit, params);
+
+ var data = pos.concat(neg);
+
+ // randomize so neural network doesn't get biased toward one set
+ data.sort(function() {
+ return 1 - 2 * Math.round(Math.random());
+ });
+ return data;
+}
+
+function getDir(dir, isCat, samples, limit, params) {
+ var files = fs.readdirSync(dir);
+
+ var images = files.filter(function(file) {
+ return (path.extname(file) == ".png"
+ || path.extname(file) == ".jpg");
+ });
+
+ images = images.slice(0, limit);
+
+ var data = [];
+ for (var i = 0; i < images.length; i++) {
+ var file = dir + "/" + images[i];
+ try {
+ var canvas = utils.drawImgToCanvasSync(file);
+ }
+ catch(e) {
+ console.log(e, file);
+ continue;
+ }
+
+ var canvases = extractSamples(canvas, samples);
+
+ for (var j = 0; j < canvases.length; j++) {
+ var fts;
+ try {
+ fts = features.extractFeatures(canvases[j], params.HOG);
+ } catch(e) {
+ console.log("error extracting features", e, file);
+ continue;
+ }
+ data.push({
+ input: fts,
+ output: [isCat ? 1 : 0],
+ file: file,
+ });
+ }
+ }
+
+ return data;
+}
+
+
+function extractSamples(canvas, num) {
+ if (num == 0) {
+ // 0 means "don't sample"
+ return [canvas];
+ }
+
+ var min = 48;
+ var max = Math.min(canvas.width, canvas.height);
+
+ var canvases = [];
+ for (var i = 0; i < num; i++) {
+ var length = Math.max(min, Math.ceil(Math.random() * max));
+
+ var x = Math.floor(Math.random() * (max - length));
+ var y = Math.floor(Math.random() * (max - length));
+
+ canvases.push(cropCanvas(canvas, x, y, length, length));
+ }
+ return canvases;
+}
+
+function cropCanvas(canvas, x, y, width, height) {
+ var cropCanvas = new Canvas(width, height);
+ var context = cropCanvas.getContext("2d");
+ context.drawImage(canvas, x, y, width, height, 0, 0, width, height);
+ return cropCanvas;
+}
View
30 training/collection/README.md
@@ -0,0 +1,30 @@
+## collection
+
+the goal of collection is to get a folder of positive (cat head) images and a folder of negative (non-cat) images to train the classifier with.
+
+### creating the positives
+
+To get the positives, first download this [dataset of cat pictures](http://137.189.35.203/WebUI/CatDatabase/catData.html). There should be folders called CAT_00, CAT_01, etc. Take the images from all of these and combine into one directory. Also remove the file "00000003_019.jpg.cat" and add [00000003_015.jpg.cat](http://137.189.35.203/WebUI/CatDatabase/Data/00000003_015.jpg.cat).
+
+Run the script to rotate and the crop out the cat head from each image. If you put the cat dataset in a folder called "CATS" and you want to put the cropped images in a folder called "POSITIVES":
+
+`node make-positives.js CATS POSITIVES`
+
+### creating the negatives
+
+If you don't already have a bunch of non-cat pictures you can fetch recent images from Flickr and save them in a folder called "FLICKR" by running:
+
+`ruby fetch-negatives.rb NEGATIVES`
+
+You'll need at least 10,000 images.
+
+To turn the full-sized images into negatives that can be used directly for training or testing, sample them with:
+
+`node make-negatives NEGATIVES NEGATIVES_SAMPLED`
+
+Where `NEGATIVES_SAMPLED` is the directory to contain the sampled images.
+
+If you're getting images from Flickr, some will contain cats for sure, so you'll need to weed those out by taking a close look at your hard negatives (see `training` directory above).
+
+
+
View
13 training/collection/flickr.rb → training/collection/fetch-negatives.rb
@@ -5,34 +5,31 @@
FlickRaw.api_key="0cc11cffc8a238efef4dfa6dca255a44"
FlickRaw.shared_secret="5f76a97053f99673"
-$count = 0
-
$fetched = Hash.new
+$dir = ARGV[0]
+
def getPage(page)
list = flickr.photos.getRecent :per_page => 500, :page => page
list.each do |photo|
- url = "http://farm#{photo.farm}.staticflickr.com/#{photo.server}/#{photo.id}_#{photo.secret}_c.jpg"
+ url = "http://farm#{photo.farm}.staticflickr.com/#{photo.server}/#{photo.id}_#{photo.secret}.jpg"
if $fetched[url] != 1
$fetched[url] = 1
name = rand(100000000000)
- file = "NEGS_FLICKR/#{name}.jpg"
-
- puts file
+ file = "#{$dir}/#{name}.jpg"
open(file, 'wb') do |file|
file << open(url).read
end
- puts "saved to #{file}"
- $count+=1
end
end
end
+# gets 120 x 500 = 60,000 images
120.times do |i|
getPage(i)
end
View
75 training/collection/make-negatives.js
@@ -0,0 +1,75 @@
+var fs = require("fs"),
+ path = require("path"),
+ nomnom = require("nomnom"),
+ Canvas = require("canvas"),
+ utils = require("../../utils");
+
+var opts = nomnom.options({
+ indir: {
+ position: 0,
+ default: __dirname + "/FLICKR/",
+ help: "Directory of full-sizes negative images"
+ },
+ outdir: {
+ position: 1,
+ default: __dirname + "/NEGATIVES/",
+ help: "Directory to save cropped image sections"
+ },
+ samples: {
+ default: 1,
+ help: "How many times to sub-sample each image"
+ }
+}).colors().parse();
+
+
+fs.readdir(opts.indir, function(err, files) {
+ if (err) throw err;
+
+ var images = files.filter(function(file) {
+ return path.extname(file) == ".jpg";
+ });
+
+ console.log(images.length, "images to process");
+
+ images.forEach(function(image) {
+ var file = opts.indir + "/" + image;
+ try {
+ var canvas = utils.drawImgToCanvasSync(file);
+ }
+ catch(e) {
+ console.log(e, file);
+ return;
+ }
+ var canvases = extractSamples(canvas, opts.samples);
+
+ canvases.forEach(function(canvas) {
+ var name = Math.floor(Math.random() * 10000000000);
+ var file = opts.outdir + "/" + name + ".jpg";
+
+ utils.writeCanvasToFileSync(canvas, file);
+ });
+ });
+})
+
+function extractSamples(canvas, num) {
+ var min = 48;
+ var max = Math.min(canvas.width, canvas.height);
+
+ var canvases = [];
+ for (var i = 0; i < num; i++) {
+ var length = Math.max(48, Math.ceil(Math.random() * max));
+
+ var x = Math.floor(Math.random() * (max - length));
+ var y = Math.floor(Math.random() * (max - length));
+
+ canvases.push(cropCanvas(canvas, x, y, length, length));
+ }
+ return canvases;
+}
+
+function cropCanvas(canvas, x, y, width, height) {
+ var cropCanvas = new Canvas(width, height);
+ var context = cropCanvas.getContext("2d");
+ context.drawImage(canvas, x, y, width, height, 0, 0, width, height);
+ return cropCanvas;
+}
View
67 training/collection/make-negs.js
@@ -1,67 +0,0 @@
-var http = require("http"),
- url = require("url"),
- fs = require("fs"),
- path = require("path"),
- Canvas = require("canvas"),
- _ = require("underscore"),
- utils = require("../../utils");
-
-var dir = __dirname + "/NEGS_FLICKR/";
-var outdir = __dirname + "/NEGS_SAMPLED3/";
-
-var part = parseInt(process.argv[2]);
-
-var perFile = 1;
-
-fs.readdir(dir, function(err, files) {
- if (err) throw err;
-
- var images = files.filter(function(file) {
- return path.extname(file) == ".jpg";
- });
-
- images = images.slice(9500 * part, 9500 * (part + 1));
- console.log(images.length);
-
- images.forEach(function(image) {
- try {
- var canvas = utils.drawImgToCanvasSync(dir + image);
- }
- catch(e) {
- console.log(e, dir + image);
- return;
- }
- var canvases = generateFromRaw(canvas);
-
- canvases.forEach(function(canvas) {
- var name = Math.floor(Math.random() * 10000000000);
- var file = outdir + name + ".jpg";
-
- utils.writeCanvasToFile(canvas, file, function() {
- console.log("wrote to", file)
- });
- });
- });
-})
-
-function generateFromRaw(canvas) {
- var min = 48;
- var max = Math.min(canvas.width, canvas.height);
-
- var canvases = _.range(0, perFile).map(function() {
- var length = Math.max(48, Math.ceil(Math.random() * max));
-
- var x = Math.floor(Math.random() * (max - length));
- var y = Math.floor(Math.random() * (max - length));
-
- return cropCanvas(canvas, x, y, length, length);
- })
- return canvases;
-}
-
-function cropCanvas(canvas, x, y, width, height) {
- var cropCanvas = new Canvas(width, height);
- var context = cropCanvas.getContext("2d");
- context.drawImage(canvas, x, y, width, height, 0, 0, width, height);
- return cropCanvas;
-}
View
2 training/collection/make-positives.js
@@ -1,9 +1,7 @@
var fs = require("fs"),
path = require("path"),
- async = require("async"),
nomnom = require("nomnom"),
Canvas = require("canvas"),
- cropper = require("./cropper"),
utils = require("../../utils");
var opts = nomnom.options({
View
95 training/collection/mine-negatives.js
@@ -1,95 +0,0 @@
-var fs = require("fs"),
- path = require("path"),
- brain = require("brain"),
- async = require("async"),
- _ = require("underscore"),
- features = require("../../features"),
- utils = require("../../utils");
-
-var trained = require("../network-6-random.json");
-
-var dir = __dirname + "/NEGS_SAMPLED/";
-var minedDir = __dirname + "/NEGS_HARD1_RAND/";
-
-var params = {
- cellSize: 6
-}
-
-var part = parseInt(process.argv[2]);
-
-console.log("mining hard negatives from part", part);
-
-fs.readdir(dir, function(err, files) {
- if (err) throw err;
-
- var images = files.filter(function(file) {
- return path.extname(file) == ".jpg";
- });
-
- // to get around open fd limit
- images = images.slice(9500 * part, 9500 * (part + 1));
-
- console.log(images.length)
-
- async.map(images, function(file, done) {
- file = dir + file;
-
- utils.drawImgToCanvas(file, function(err, canvas) {
- done(null, {
- canvas: canvas,
- file: file,
- isCat: false,
- err: err
- });
- });
- },
- function(err, canvases) {
- saveFalsePos(canvases);
- });
-})
-
-function saveFalsePos(canvases) {
- canvases = canvases.filter(function(canvas) {
- return !canvas.err;
- });
-
- var data = canvases.map(function(canvas) {
- var fts = features.extractFeatures(canvas.canvas, params);
- return {
- file: canvas.file,
- input: fts,
- output: [canvas.isCat]
- };
- });
-
- data = _(data).sortBy(function() {
- return Math.random();
- });
-
- var network = new brain.NeuralNetwork().fromJSON(trained);
-
- var stats = network.test(data);
-
- console.log(stats.misclasses.length, "misclasses")
- console.log(stats.falsePos, "false positives");
- console.log(stats.trueNeg, "true negatives");
- console.log(stats.total, "total");
-
- stats.misclasses.forEach(function(misclass) {
- if (misclass.expected == 0) {
- var file = minedDir + path.basename(misclass.file);
- copyFile(misclass.file, file);
- }
- });
-}
-
-
-function copyFile(source, dest, callback) {
- newFile = fs.createWriteStream(dest);
- oldFile = fs.createReadStream(source);
- oldFile.pipe(newFile);
-
- oldFile.on('end', function() {
- if (callback) callback();
- })
-}
View
175 training/cross-validate.js
@@ -1,110 +1,107 @@
var fs = require("fs"),
- brain = require("brain"),
path = require("path"),
- async = require("async"),
- _ = require("underscore"),
+ nomnom = require("nomnom"),
+ brain = require("brain"),
+ features = require("../features"),
utils = require("../utils"),
- features = require("../features");
-
-
-testParams({
- cellSize: 4
-});
-
-function testParams(params) {
- getCanvases(function(canvases) {
- canvases = canvases.filter(function(canvas) {
- return canvas.err === null;
- });
-
- var data = canvases.map(function(canvas) {
- var fts = features.extractFeatures(canvas.canvas, params);
- return {
- file: canvas.file,
- input: fts,
- output: [canvas.isCat]
- };
- });
+ collect = require("./collect");
- console.log("training on", data.length)
-
- var opts = {
- hiddenLayers: [2]
- };
- var trainOpts = {
- errorThresh: 0.006,
- log: true
- };
+var opts = nomnom.options({
+ posDir: {
+ position: 0,
+ default: __dirname + "/collection/POSITIVES/",
+ help: "Directory of cat head images"
+ },
+ negDir: {
+ position: 1,
+ default: __dirname + "/collection/NEGATIVES/",
+ help: "Directory of negative images"
+ },
+ sample: {
+ flag: true,
+ help: "Sub-sample negative images"
+ },
+ limit: {
+ default: 10000,
+ help: "Max images to collect from each directory"
+ }
+}).colors().parse();
+
+
+var combos = [{
+ HOG: {
+ cellSize: 6,
+ blockSize: 2,
+ blockStride: 1,
+ bins: 6,
+ norm: "L2"
+ },
+ nn: {
+ hiddenLayers: [10, 10]
+ },
+ train: {
+ errorThresh: 0.007
+ }
+}];
- var stats = brain.crossValidate(brain.NeuralNetwork, data, opts, trainOpts);
- stats.featureSize = data[0].input.length;
+console.log("testing", combos.length, "combinations");
- console.log("params", stats.params);
- console.log("stats", stats.stats);
- console.log("avgs", stats.avgs);
+testAll(combos);
- fs.writeFile('misclasses.json', JSON.stringify(stats.misclasses, 4), function (err) {
- if (err) throw err;
- console.log('saved misclasses to misclasses.json');
- });
+function testAll(combos) {
+ var tests = [];
- var minError = 1;
- var network;
+ for (var i = 0; i < combos.length; i++) {
+ var params = combos[i];
+ var samples = opts.sample ? 1 : 0;
+ var data = collect.collectData(opts.posDir, opts.negDir, samples,
+ opts.limit, params);
- stats.sets.forEach(function(set) {
- if (set.error < minError) {
- minError = set.error;
- network = set.network;
- }
- })
+ console.log("testing", i + 1 + ": " + params, "on " + data.length)
- var json = JSON.stringify(network, 4)
- fs.writeFile('cv-network.json', json, function (err) {
- if (err) throw err;
- console.log('saved network to cv-network.json');
- });
- })
-}
+ var stats = testParams(data, params);
+ var test = {
+ params: params,
+ featureSize: stats.featureSize,
+ avgs: stats.avgs,
+ stats: stats.stats
+ };
+ tests.push(test);
-function getCanvases(callback) {
- var posDir = __dirname + "/POSITIVES_TRAIN/";
+ console.log(test);
- fs.readdir(posDir, function(err, files) {
- if (err) throw err;
+ if (i == combos.length - 1) {
+ console.log("\n" + getPrintout(tests));
- getDir(posDir, files, 1, function(posData) {
- var negsDir = __dirname + "/NEGATIVES_MIXED/";
- fs.readdir(negsDir, function(err, files) {
+ fs.writeFile('tests.json', JSON.stringify(tests, 4), function (err) {
if (err) throw err;
+ console.log('saved tests to tests.json');
+ });
+ }
+ }
+}
- getDir(negsDir, files, 0, function(negData) {
- var data = posData.concat(negData);
+function testParams(data, params) {
+ var trainOpts = params.train;
+ trainOpts.log = true;
- callback(data);
- })
- })
- })
- });
+ var stats = brain.crossValidate(brain.NeuralNetwork, data,
+ params.nn, trainOpts);
+ stats.featureSize = data[0].input.length;
+ return stats;
}
-function getDir(dir, files, isCat, callback) {
- var limit = 5000;
- var images = files.filter(function(file) {
- return path.extname(file) == ".jpg";
+function getPrintout(tests) {
+ tests.sort(function(test1, test2) {
+ return test1.stats.falsePos > test2.stats.falsePos;
});
- images = images.slice(0, limit);
-
- var data = [];
-
- async.map(images, function(file, done) {
- file = dir + file;
-
- utils.drawImgToCanvas(file, function(err, canvas) {
- done(null, {canvas: canvas, file: file, isCat: isCat, err: err});
- });
- },
- function(err, canvases) {
- console.log("got one directory of images")
- callback(canvases);
+ var lines = tests.map(function(test) {
+ return JSON.stringify(test.params) + " "
+ + "size: " + test.featureSize + " "
+ + "p: " + test.stats.precision.toFixed(3) + " "
+ + "r: " + test.stats.recall.toFixed(3) + " "
+ + "a: " + test.stats.accuracy.toFixed(3) + " "
+ + "fp: " + test.stats.falsePos;
});
+ return lines.join("\n");
}
View
167 training/find-params.js
@@ -1,167 +0,0 @@
-var fs = require("fs"),
- brain = require("brain"),
- path = require("path"),
- async = require("async"),
- _ = require("underscore"),
- utils = require("../utils"),
- features = require("../features");
-
-var limit = 4000;
-
-
-function getCombos() {
- var cellSizes = [4, 6];
- var bins = [6];
- var strides = [1]; //, 0.5];
- var norms = ["L2"];
- var blockSize = 2;
-
- var combos = [];
- cellSizes.forEach(function(cellSize) {
- strides.forEach(function(stride) {
- bins.forEach(function(bin) {
- norms.forEach(function(norm) {
- combos.push({
- cellSize: cellSize,
- blockSize: blockSize,
- blockStride: blockSize * stride,
- bins: bin,
- norm: norm
- })
- })
- })
- })
- })
-
- return combos;
-}
-
-var combos = [
-{
- cellSize: 4,
- blockSize: 2,
- blockStride: 1,
- bins: 6,
- norm: "L2"
-},
-{
- cellSize: 4,
- blockSize: 2,
- blockStride: 1,
- bins: 7,
- norm: "L2"
-}
-];
-
-console.log("testing", combos.length, "combinations");
-
-testAll(combos)
-
-function testAll(combos) {
- getCanvases(function(canvases) {
- var tests = [];
-
- for (var i = 0; i < combos.length; i++) {
- var params = combos[i];
- console.log("testing", i + 1, params)
-
- var stats = testParams(canvases, params);
- var test = {
- params: params,
- featureSize: stats.featureSize,
- avgs: stats.avgs,
- stats: stats.stats
- };
-
- tests.push(test);
- console.log(test);
-
- if (i == combos.length - 1) {
- console.log("\n" + getPrintout(tests));
-
- fs.writeFile('tests.json', JSON.stringify(tests, 4), function (err) {
- if (err) throw err;
- console.log('saved tests to tests.json');
- });
- }
- }
- })
-}
-
-
-function getPrintout(tests) {
- var sorted = _(tests).sortBy(function(test) {
- return test.stats.falsePos;
- });
- var lines = sorted.map(function(test) {
- return JSON.stringify(test.params) + " "
- + test.featureSize + " "
- + test.stats.precision + " "
- + test.stats.falsePos;
- });
- return lines.join("\n");
-}
-
-function testParams(canvases, params) {
- var data = canvases.map(function(canvas) {
- var fts = features.extractFeatures(canvas.canvas, params);
- return {
- input: fts,
- output: [canvas.isCat]
- };
- })
-
- var opts = {
- hiddenLayers: [30]
- };
- var trainOpts = {
- errorThresh: 0.006,
- log: true
- };
-
- var stats = brain.crossValidate(brain.NeuralNetwork, data, opts, trainOpts);
- stats.featureSize = data[0].input.length;
- return stats;
-}
-
-
-function getCanvases(callback) {
- var posDir = __dirname + "/POSITIVES/";
-
- fs.readdir(posDir, function(err, files) {
- if (err) throw err;
-
- getDir(posDir, files, 1, function(posData) {
- var negsDir = __dirname + "/NEGATIVES/";
- fs.readdir(negsDir, function(err, files) {
- if (err) throw err;
-
- getDir(negsDir, files, 0, function(negData) {
- var data = posData.concat(negData);
-
- callback(data);
- })
- })
- })
- });
-}
-
-function getDir(dir, files, isCat, callback) {
- var images = files.filter(function(file) {
- return path.extname(file) == ".jpg";
- });
- images = images.slice(0, limit);
-
- var data = [];
-
- async.map(images, function(file, done) {
- file = dir + file;
-
- utils.drawImgToCanvas(file, function(err, canvas) {
- done(null, {canvas: canvas, file: file, isCat: isCat});
- });
- },
- function(err, canvases) {
- callback(canvases);
- });
-}
View
111 training/make-network.js
@@ -1,111 +0,0 @@
-var fs = require("fs"),
- brain = require("brain"),
- path = require("path"),
- async = require("async"),
- _ = require("underscore"),
- utils = require("../utils"),
- features = require("../features");
-
-var networkFile = __dirname + "/network-june13-6.json";
-
-console.log("training with 6 pixels per cell mainly hard");
-trainNetwork({
- cellSize: 6
-})
-
-function trainNetwork(params) {
- getCanvases(function(canvases) {
- canvases = canvases.filter(function(canvas) {
- return !canvas.err;
- })
-
- var data = canvases.map(function(canvas) {
- try {
- var fts = features.extractFeatures(canvas.canvas, params);
- } catch(e) {
- console.log("err getting features", e, canvas.file);
- }
- return {
- input: fts,
- output: [canvas.isCat]
- };
- });
-
- data = _(data).sortBy(function() {
- return Math.random();
- });
-
- console.log(data[0].input.length)
-
- console.log("training with", data.length);
-
- var opts = {
- hiddenLayers: [30]
- };
- var trainOpts = {
- errorThresh: 0.005,
- log: true,
- logPeriod: 1
- };
-
- var network = new brain.NeuralNetwork(opts);
-
- var stats = network.train(data, trainOpts);
-
- console.log("stats:", stats);
- console.log("parameters:", opts);
-
- var json = JSON.stringify(network.toJSON(), 4)
-
- fs.writeFile(networkFile, json, function (err) {
- if (err) throw err;
- console.log('saved network to', networkFile);
- });
- })
-}
-
-function getCanvases(callback) {
- var posDir = __dirname + "/POSITIVES/";
-
- fs.readdir(posDir, function(err, files) {
- if (err) throw "pos" + err;
-
- getDir(posDir, files, 1, 0, 9500, function(posData) {
- var negsDir = __dirname + "/NEGS_ALL/";
- fs.readdir(negsDir, function(err, files) {
- if (err) throw "neg" + err;
-
- getDir(negsDir, files, 0, 0, 9500, function(negData) {
- var data = posData.concat(negData);
-
- callback(data);
- })
- })
- })
- });
-}
-
-function getDir(dir, files, isCat, min, limit, callback) {
- var images = files.filter(function(file) {
- return path.extname(file) == ".jpg";
- });
- images = images.slice(min, limit);
-
- console.log(images.length)
-
- var data = [];
-
- async.map(images, function(file, done) {
- file = dir + file;
-
- utils.drawImgToCanvas(file, function(err, canvas) {
- if (err) {
- console.log(err, file);
- }
- done(null, {canvas: canvas, file: file, isCat: isCat, err: err});
- });
- },
- function(err, canvases) {
- callback(canvases);
- });
-}
View
90 training/mine-negatives.js
@@ -0,0 +1,90 @@
+var fs = require("fs"),
+ path = require("path"),
+ brain = require("brain"),
+ nomnom = require("nomnom"),
+ features = require("../features"),
+ utils = require("../utils")
+ collect = require("./collect");
+
+var opts = nomnom.options({
+ negDir: {
+ position: 0,
+ default: __dirname + "/collection/NEGATIVES/",
+ required: true,
+ help: "Directory of negatives"
+ },
+ minedDir: {
+ position: 1,
+ default: __dirname + "/collection/MINED_NEGATIVES/",
+ required: true,
+ help: "Directory to put mined hard negatives in"
+ },
+ network: {
+ default: __dirname + "/network.json",
+ help: "Neural network JSON file"
+ },
+ samples: {
+ default: 1,
+ help: "How many times to sub-sample full negative image"
+ },
+ limit: {
+ default: undefined,
+ help: "Max number of negative images to process from directory"
+ },
+ threshold: {
+ default: 0.9,
+ help: "How wrong the classification is, from 0.5+ to 1.0"
+ }
+}).colors().parse();
+
+var trained = require(opts.network);
+var network = new brain.NeuralNetwork().fromJSON(trained);
+
+mineNegatives();
+
+function mineNegatives() {
+ var files = fs.readdirSync(opts.negDir);
+
+ var images = files.filter(function(file) {
+ return (path.extname(file) == ".png"
+ || path.extname(file) == ".jpg");
+ });
+ images = images.slice(0, opts.limit);
+
+ console.time("mined in");
+ console.log("mining negatives from " + images.length);
+
+ var falsePositives = 0;
+ for (var i = 0; i < images.length; i++) {
+ var image = images[i];
+ var file = opts.negDir + "/" + image;
+
+ try {
+ var canvas = utils.drawImgToCanvasSync(file);
+ }
+ catch (e) {
+ console.log(e, file);
+ }
+ var samples = collect.extractSamples(canvas, opts.samples);
+
+ for (var j = 0; j < samples.length; j++) {
+ var fp = testSample(image, samples[j]);
+ falsePositives += fp ? 1 : 0;
+ }
+ }
+ console.log(falsePositives + " hard negatives mined");
+ console.timeEnd("mined in");
+}
+
+function testSample(file, canvas) {
+ var fts = features.extractFeatures(canvas);
+ var result = network.run(fts);
+
+ if (result >= opts.threshold) {
+ var rand = Math.floor(Math.random() * 1000);
+ var file = opts.minedDir + "/" + rand + "_" + path.basename(file);
+ utils.writeCanvasToFileSync(canvas, file);
+ return true;
+ }
+ return false;
+}
View
135 training/test-network.js
@@ -1,95 +1,52 @@
var fs = require("fs"),
path = require("path"),
brain = require("brain"),
- async = require("async"),
- _ = require("underscore"),
+ nomnom = require("nomnom"),
features = require("../features"),
- utils = require("../utils");
-
-testNetwork({
- cellSize: 4
-})
-
-function testNetwork(params) {
- getCanvases(function(canvases) {
- canvases = canvases.filter(function(canvas) {
- return !canvas.err;
- })
-
- var data = canvases.map(function(canvas) {
- var fts = features.extractFeatures(canvas.canvas, params);
- return {
- input: fts,
- output: [canvas.isCat]
- };
- });
-
- data = _(data).sortBy(function() {
- return Math.random();
- });
-
- console.log("testing with", data.length);
-
- var json = require("./network-4-big.json")
- var network = new brain.NeuralNetwork().fromJSON(json);
- var stats = network.test(data);
-
- console.log(stats.error, "error");
- console.log(stats.precision, "precision")
- console.log(stats.recall, "recall")
- console.log(stats.accuracy, "accuracy")
-
- console.log(stats.truePos, "true positives");
- console.log(stats.trueNeg, "true negatives");
- console.log(stats.falsePos, "false positives");
- console.log(stats.falseNeg, "false negatives");
- console.log(stats.total, "total");
- })
-}
-
-function getCanvases(callback) {
- var posDir = __dirname + "/POSITIVES_TEST/";
-
- fs.readdir(posDir, function(err, files) {
- if (err) throw err;
-
- getDir(posDir, files, 1, 0, 8000, function(posData) {
- var negsDir = __dirname + "/NEGATIVES_TEST/";
- fs.readdir(negsDir, function(err, files) {
- if (err) throw err;
-
- getDir(negsDir, files, 0, 0, 6000, function(negData) {
- var data = posData.concat(negData);
-
- callback(data);
- })
- })
- })
- });
-}
-
-function getDir(dir, files, isCat, min, limit, callback) {
- var images = files.filter(function(file) {
- return path.extname(file) == ".jpg";
- });
- images = images.slice(min, limit);
-
- console.log(images.length)
-
- var data = [];
-
- async.map(images, function(file, done) {
- file = dir + file;
-
- utils.drawImgToCanvas(file, function(err, canvas) {
- if (err) {
- console.log(err);
- }
- done(null, {canvas: canvas, file: file, isCat: isCat, err: err});
- });
+ utils = require("../utils"),
+ collect = require("./collect");
+
+var opts = nomnom.options({
+ posDir: {
+ position: 0,
+ default: __dirname + "/collection/POSITIVES_TEST/",
+ help: "Directory of test positives"
+ },
+ negDir: {
+ position: 1,
+ default: __dirname + "/collection/NEGATIVES_TEST/",
+ help: "Directory of test negatives"
+ },
+ network: {
+ default: __dirname + "/network.json",
+ help: "Neural network JSON file"
},
- function(err, canvases) {
- console.log("got one directory of images");
- callback(canvases);
- });
+ sample: {
+ flag: true,
+ help: "sub-sample the negative images"
+ }
+}).colors().parse();
+
+testNetwork();
+
+function testNetwork() {
+ var data = collect.collectData(opts.posDir, opts.negDir, opts.sample ? 1 : 0);
+ console.log("testing on", data.length);
+
+ console.log("feature size", data[0].input.length);
+
+ var json = require(opts.network)
+ var network = new brain.NeuralNetwork().fromJSON(json);
+ var stats = network.test(data);
+
+ console.log("error: " + stats.error);
+ console.log("precision: " + stats.precision)
+ console.log("recall: " + stats.recall)
+ console.log("accuracy: " + stats.accuracy)
+
+ console.log(stats.truePos + " true positives");
+ console.log(stats.trueNeg + " true negatives");
+ console.log(stats.falsePos + " false positives");
+ console.log(stats.falseNeg + " false negatives");
+ console.log(stats.total + " total");
}
View
76 training/train-network.js
@@ -0,0 +1,76 @@
+var fs = require("fs"),
+ brain = require("brain"),
+ path = require("path"),
+ nomnom = require("nomnom"),
+ utils = require("../utils"),
+ features = require("../features"),
+ collect = require("./collect");
+
+var opts = nomnom.options({
+ posDir: {
+ position: 0,
+ default: __dirname + "/collection/POSITIVES/",
+ help: "Directory of cat head images"
+ },
+ negDir: {
+ position: 1,
+ default: __dirname + "/collection/NEGATIVES/",
+ help: "Directory of negative images"
+ },
+ outfile: {
+ default: __dirname + "/network.json",
+ help: "file to save network JSON to"
+ },
+ sample: {
+ flag: true,
+ help: "whether to sub-sample the negative images"
+ },
+ limit: {
+ default: 10000,
+ help: "maximum number of images to use from each directory"
+ }
+}).colors().parse();
+
+var params = {
+ HOG: {
+ cellSize: 6,
+ blockSize: 2,
+ blockStride: 1,
+ bins: 6,
+ norm: "L2"
+ },
+ nn: {
+ hiddenLayers: [10, 10],
+ learningRate: 0.2
+ },
+ train: {
+ errorThresh: 0.05,
+ log: true,
+ logPeriod: 1
+ }
+};
+
+trainNetwork(params)
+
+function trainNetwork(params) {
+ var samples = opts.sample ? 1 : 0;
+ var data = collect.collectData(opts.posDir, opts.negDir, samples,
+ opts.limit, params);
+
+ console.log("training on", data.length);
+ console.log("feature size:", data[0].input.length)
+
+ var network = new brain.NeuralNetwork(params.nn);
+
+ var stats = network.train(data, params.train);
+
+ console.log("stats:", stats);
+ console.log("parameters:", params);
+
+ var json = JSON.stringify(network.toJSON(), 4)
+
+ fs.writeFile(opts.outfile, json, function (err) {
+ if (err) throw err;
+ console.log('saved network to', opts.outfile);
+ });
+}
View
54 training/upload.js
@@ -1,54 +0,0 @@
-var fs = require("fs"),
- path = require("path"),
- async = require("async"),
- cradle = require("cradle"),
- utils = require("../utils"),
- features = require("../features");
-
-var negsDir = __dirname + "/NEGATIVES/";
-var posDir = __dirname + "/POSITIVES/";
-
-var db = new(cradle.Connection)().database('cats-hog-c6-b9');
-
-var count = 0;
-
-//uploadDir(posDir, 1, 5000);
-uploadDir(negsDir, 0, 5000);
-
-function uploadDir(dir, isCat, limit) {
- var docs = [];
-
- fs.readdir(dir, function(err, files) {
- if (err) throw err;
-
- var images = files.filter(function(file) {
- return path.extname(file) == ".jpg";
- });
-
- images = images.slice(0, limit);
-
- async.forEach(images, function(file, done) {
- file = dir + file;
-
- utils.drawImgToCanvas(file, function(canvas) {
- var fts = features.extractFeatures(canvas);
-
- docs.push({
- file: file,
- input: fts,
- output: [isCat]
- });
-
- if (++count % 1000 == 0) {
- console.log("processed", count)
- }
- done();
- });
- },
- function() {
- db.save(docs, function(err) {
- if (err) throw err;
- });
- });
- });
-}

0 comments on commit bf37854

Please sign in to comment.