Skip to content

Commit

Permalink
De-lints package js files
Browse files Browse the repository at this point in the history
  • Loading branch information
desmondmorris committed Jan 1, 2014
1 parent 9d4b4e0 commit 9fc8d13
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 74 deletions.
16 changes: 10 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

A simple wrapper for the Tesseract OCR package for node.js

## Requirements

* Tesseract 3.01 or higher is needed for this to work

## Installation
npm install node-tesseract

Expand All @@ -17,7 +21,7 @@ npm install node-tesseract
var tesseract = require('node-tesseract');

// Recognize text of any language in any format
nodecr.process(__dirname + '/path/to/image.jpg',function(err, text) {
tesseract.process(__dirname + '/path/to/image.jpg',function(err, text) {
if(err) {
console.error(err);
} else {
Expand All @@ -26,24 +30,24 @@ nodecr.process(__dirname + '/path/to/image.jpg',function(err, text) {
});

// Recognize German text in a single uniform block of text
nodecr.process(__dirname + '/path/to/image.jpg',function(err, text) {
tesseract.process(__dirname + '/path/to/image.jpg',function(err, text) {
if(err) {
console.error(err);
} else {
console.log(text);
}
}, 'deu', 6);

// Recognise text of any language in any format but preprocess the image
// Recognize text of any language in any format but preprocess the image
// with ImageMagick 'convert' (This requires ImageMagick to be installed)

// You can write and use your own preprocessors easily, just have a look at lib/nodecr.js
nodecr.process(__dirname + '/path/to/image.jpg',function(err, text) {
// You can write and use your own preprocessors easily, just have a look at lib/tesseract.js
tesseract.process(__dirname + '/path/to/image.jpg',function(err, text) {
if(err) {
console.error(err);
} else {
console.log(text);
}
console.log(text);
}, null, null, null, nodecr.preprocessors.convert);
}, null, null, null, tesseract.preprocessors.convert);
```
3 changes: 3 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
'use strict';

module.exports = require('./lib/tesseract');
129 changes: 62 additions & 67 deletions lib/tesseract.js
Original file line number Diff line number Diff line change
@@ -1,27 +1,21 @@
var exec = require('child_process').exec,
fs = require('fs'),
tmp = require('tmp');
/*jslint indent:2 */
'use strict';

var exec = require('child_process').exec;
var fs = require('fs');
var tmp = require('tmp');

/**
* Attention: Tesseract 3.01 or higher is needed for this to work
*/
var tesseract = {

/**
*
* @param image Can be any format that your installed Leptonica library can process
* (additional libraries might be required by Leptonica)
* @param image
*
* @param callback A function pointer
* this function is called after the recognition has taken place
* with a possible error as first and the resulting recognized text as second parameter
* @param callback
*
* @param languageCode (Optional) a language code for the language to recognise
* see http://code.google.com/p/tesseract-ocr/downloads/list for available languages (xxx.traineddata.gz)
* any language you pass as an argument here must be unzipped into the tessdata directory beforehand
* @param languageCode (Optional) a language code for the language to recognize
*
* @param pageSegMode (Optional) The page segmentation mode.
* As of March 4, 2012 tesseract supports the following options:
*
* 0 = Orientation and script detection (OSD) only.
* 1 = Automatic page segmentation with OSD.
Expand All @@ -35,125 +29,126 @@ var tesseract = {
* 9 = Treat the image as a single word in a circle.
* 10 = Treat the image as a single character.
*
* See http://code.google.com/p/tesseract-ocr/source/browse/trunk/api/tesseractmain.cpp#95 for current state of options
*
* @param config (Optional) A config file name
*/
process: function process(image, callback, languageCode, pageSegMode, config, preprocessor) {
(preprocessor || tesseract.preprocessor)(image, function(err, processedImage, cleanup) {
if(err) {
(preprocessor || tesseract.preprocessor)(image, function (err, processedImage, cleanup) {
if (err) {
// error in preprocessor
callback(err, null);
return;
}
tesseract._runTesseract(processedImage, function(err, text) {
if(typeof cleanup == 'function') {
console.log("node-tesseract: Preprocessor cleanup");
tesseract.run(processedImage, function (err, text) {
if (typeof cleanup === 'function') {
console.log('node-tesseract: Preprocessor cleanup');
cleanup();
}
callback(err, text);
}, languageCode, pageSegMode, config);
});
},

_runTesseract: function(image, callback, languageCode, pageSegMode, config) {
run: function (image, callback, languageCode, pageSegMode, config) {
// generate output file name
tmp.tmpName(function(err, output) {
if(err) {
tmp.tmpName(function (err, output) {
if (err) {
// Something went wrong when generating the temporary filename
callback(err, null);
return;
}

// assemble tesseract command
// assemble tesseract command
var command = [tesseract.binary, image, output];

if(languageCode) {
if (languageCode) {
command.push('-l');
command.push(languageCode);
}
if(typeof pageSegMode != 'undefined' && pageSegMode !== null) {
if (pageSegMode !== undefined && pageSegMode !== null) {
command.push('-psm');
command.push(pageSegMode);
}
if(config) {
if (config) {
command.push(config);
}

command = command.join(' ');

// Run the tesseract command
console.log("node-tesseract: Running '" + command + "'");
exec(command, function(err, stdout, stderr){
if(err) {
console.log('node-tesseract: Running \'' + command + '\'');
exec(command, function (err) {
if (err) {
// Something went wrong executing the assembled command
callback(err, null);
return;
}

var outputFile = output + '.txt';
fs.readFile(outputFile, function(err, data) {
if(!err) {
fs.readFile(outputFile, function (err, data) {
if (!err) {
// There was no error, so get the text
data = data.toString(tesseract.outputEncoding);
}
console.log("node-tesseract: Deleting '"+outputFile+"'");
fs.unlink(outputFile, function (err) {
// ignore any errors here as it just means we have a temporary file left somewehere
});
console.log('node-tesseract: Deleting \'' + outputFile + '\'');
fs.unlink(outputFile);

// We got the result (or an error)
callback(err, data);
}); // end reaFile

}); // end exec

}); // end output filename
},

/**
* A no-op preprocessor
* A no-op preprocessor
*
* @param inputFile The file to process
* @param callback The callback to call when the processing is done (1st argument error, 2nd the outputfile (the processed input file))
* @param callback The callback to call when the processing is done
* (1st argument error, 2nd the outputfile
**/
preprocessor: function(inputFile, callback) {
// the default preprocessor does nothing...
var error = null,
outputFile = inputFile,
cleanup = function() {
// clean up here
// this gets called after the preprocessed image has been used
};
callback(error,outputFile,cleanup);
preprocessor: function (inputFile, callback) {
var error = null;
callback(error, inputFile);
},

binary: 'tesseract',
outputEncoding: 'UTF-8'
}
};

// OTB preprocessors

var ConvertPreprocessor = function(inputFile, callback) {
console.log("node-tesseract: preprocessor: convert: Processing '"+inputFile+"'");
tmp.tmpName({postfix: '.tif'}, function(err, outputFile) {
if(err) {
var ConvertPreprocessor = function (inputFile, callback) {
console.log('node-tesseract: preprocessor: convert: Processing \'' + inputFile + '\'');
tmp.tmpName({postfix: '.tif'}, function (err, outputFile) {
if (err) {
// Something went wrong when generating the temporary filename
callback(err, null);
return;
}

var command = ['convert', '-type','Grayscale', '-resize','200%', '-sharpen','10', inputFile, outputFile].join(' ');
console.log("node-tesseract: preprocessor: convert: Running '"+command+"'");
exec(command, function(err, stdout, stderr){
if(err) {

var command = [
'convert',
'-type',
'Grayscale',
'-resize',
'200%',
'-sharpen',
'10',
inputFile,
outputFile
].join(' ');

console.log('node-tesseract: preprocessor: convert: Running \'' + command + '\'');
exec(command, function (err) {
if (err) {
// Something went wrong executing the convert command
callback(err, null);
} else {
var cleanup = function() {
console.log("node-tesseract: preprocessor: convert: Deleting '"+outputFile+"'");
fs.unlink(outputFile, function (err) {
// ignore any errors here as it just means we have a temporary file left somewehere
});
var cleanup = function () {
console.log('node-tesseract: preprocessor: convert: Deleting \'' + outputFile + '\'');
fs.unlink(outputFile);
};
callback(null, outputFile, cleanup);
}
Expand All @@ -166,4 +161,4 @@ var ConvertPreprocessor = function(inputFile, callback) {
module.exports.process = tesseract.process;
module.exports.preprocessors = {
convert: ConvertPreprocessor
};
};
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"version": "0.0.2",
"author": "Desmond Morris <hi@desmondmorris.com>",
"description": "A simple wrapper for the Tesseract OCR package",
"main": "./lib/tesseract.js",
"main": "index.js",
"repository": {
"type": "git",
"url": "https://github.com/desmondmorris/node-tesseract/"
Expand Down

0 comments on commit 9fc8d13

Please sign in to comment.