From f1329daa1944ef3da1c69ff40eaee2ddb68b303b Mon Sep 17 00:00:00 2001 From: ashipunov Date: Mon, 8 Nov 2010 16:17:04 -0600 Subject: [PATCH] v1.11 --- NEWS | 2 ++ README | 4 ++-- TODO | 2 +- img2djvu | 12 ++++++++---- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/NEWS b/NEWS index cd5c94b..402533e 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,5 @@ +1.11 Version number in default output; option for number of OCR jobs (thenks to Kyrill Detinov) + 1.10 Bug fixes: bugs introduced in previous version :) 1.9 Bug fix: filenames with spaces (thanks to Kyrill Detinov) diff --git a/README b/README index 3d91e5c..0c64417 100644 --- a/README +++ b/README @@ -58,9 +58,9 @@ Will use four-fold downsampling for color layers; result will be extremely compa Will NOT use blur and contrast for processing color layers -> img2djvu -l 1 -r rus -e cuneiform -a 1 out +> img2djvu -l 1 -r rus -e cuneiform -j 2 -a 1 out -After creation of final DjVu, will run cuneiform with "-rus" language option via ocrodjvu and insert text layer in place +After creation of final DjVu, will run two OCR jobs of cuneiform with "-rus" language option via ocrodjvu and insert text layer in place > img2djvu -c 1 out diff --git a/TODO b/TODO index 85922e0..59eea7b 100644 --- a/TODO +++ b/TODO @@ -1,4 +1,4 @@ -Make minidjvu accept filenames with spaces +Make minidjvu accept filenames with spaces, probablly sequentially rename files immediately after conversion (?) MMR and JPEG (ald also 2k?) chunks (with djvumake) instead of JB2 and IW44 for B&W and color pages, respectively diff --git a/img2djvu b/img2djvu index afad269..03a4c97 100755 --- a/img2djvu +++ b/img2djvu @@ -1,6 +1,5 @@ #!/bin/bash -### Version 1.10 ### Inspired by script pdf-trim-to-djvu.sh (http://gist.github.com/315791) ### PUBLIC DOMAIN @@ -16,6 +15,7 @@ ocrengine="" ocrlanguage="" verbmini=0 tmp=0 +ocrjobs=1 # internal prog="$0" dpidefault="$DPI" @@ -28,10 +28,12 @@ ocrenginedefault="$ocrengine" ocrlanguagedefault="$ocrlanguage" verbminidefault="$verbmini" tmpdefault="$tmp" +ocrjobsdefault="$ocrjobs" function usage() { me=`basename "$prog"` cat << END +img2djvu version 1.11 Usage: "$me" [options] relative_folder_name Options: @@ -39,6 +41,7 @@ Options: -c <0|1> make a choice of temporary directory, 0 for /tmp, 1 for current [default: "$tmpdefault"] -d resolution in DPI [default: "$dpidefault"] -e "str" if not empty, use OCR engine (supported by ocrodjvu) with this name [default: "$ocrenginedefault"] + -j number of OCR jobs [default: "$ocrjobsdefault"] -l if not 0, will use forced segmentation (with downsampling) [default: "$codefault"] -m if not 0, will use minidjvu (with dictionary size) instead of cjb2 [default: "$midefault"] -r "str" if not empty, use OCR engine with given language [default: "$ocrlanguagedefault"] @@ -50,7 +53,7 @@ Options: END } -opts=`getopt -l "help" "a:c:d:e:h:f:l:m:p:r:t:v:" "$@"` && eval set -- "$opts" +opts=`getopt -l "help" "a:c:d:e:h:f:j:l:m:p:r:t:v:" "$@"` && eval set -- "$opts" while true ; do case "$1" in -h|--help) usage ; exit 0 ;; @@ -58,6 +61,7 @@ while true ; do -c) tmp="$2" ; shift 2 ;; -d) DPI="$2" ; shift 2 ;; -e) ocrengine="$2"; shift 2 ;; + -j) ocrjobs="$2"; shift 2 ;; -l) usecodjvu="$2" ; shift 2 ;; -m) usemini="$2" ; shift 2 ;; -r) ocrlanguage="$2" ; shift 2 ;; @@ -278,7 +282,7 @@ function nomini { printf "\nDone.\n" && \ if [ "$useocr" -gt 0 ] ; then printf "Starting OCR...\n" - ocrodjvu --engine "$ocrengine" --language "$ocrlanguage" --in-place --on-error=resume "$djvu" + ocrodjvu --engine "$ocrengine" --language "$ocrlanguage" --jobs "$ocrjobs" --in-place --on-error=resume "$djvu" fi ) && rm -rf "$tmpdir" || ( printf "Failure\nTemporary directory left: %s\n" "$tmpdir" @@ -342,7 +346,7 @@ function mini { printf "\nDone.\n" && \ if [ "$useocr" -gt 0 ] ; then printf "Starting OCR...\n" - ocrodjvu --engine "$ocrengine" --language "$ocrlanguage" --in-place --on-error=resume "$djvu" + ocrodjvu --engine "$ocrengine" --language "$ocrlanguage" --jobs "$ocrjobs" --in-place --on-error=resume "$djvu" fi ) && rm -rf "$tmpdir" || ( printf "Failure\nTemporary directory left: %s\n" "$tmpdir"