forked from otiai10/gosseract
/
tesseract.go
72 lines (68 loc) · 1.59 KB
/
tesseract.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
package gosseract
import "fmt"
import "os/exec"
import "bytes"
import "regexp"
import "io/ioutil"
type tesseractCmd interface {
Version() string
Execute(args []string) (string, error)
}
const TESSERACT = "tesseract"
const tmpFILEPREFIX = "gosseract"
const outFILEEXTENSION = ".txt"
func getTesseractCmd() (tess tesseractCmd, e error) {
commandPath, e := lookPath()
if e != nil {
return
}
v, e := version()
if e != nil {
return
}
if regexp.MustCompile("^3.02").Match([]byte(v)) {
tess = tesseract0302{version: v, commandPath: commandPath}
return
}
if regexp.MustCompile("^3.03").Match([]byte(v)) {
tess = tesseract0303{version: v, commandPath: commandPath}
return
}
e = fmt.Errorf("No tesseract version is found, supporting 3.02~ and 3.03~")
return
}
func lookPath() (commandPath string, e error) {
return exec.LookPath(TESSERACT)
}
func version() (v string, e error) {
v, e = execTesseractCommandWithStderr("--version")
if e != nil {
return
}
exp := regexp.MustCompile("^tesseract ([0-9\\.]+)")
matches := exp.FindStringSubmatch(v)
if len(matches) < 2 {
e = fmt.Errorf("tesseract version not found: response is `%s`", v)
}
v = matches[1]
return
}
func execTesseractCommandWithStderr(opt string) (res string, e error) {
cmd := exec.Command(TESSERACT, opt)
var stderr bytes.Buffer
cmd.Stderr = &stderr
if e = cmd.Run(); e != nil {
return
}
res = stderr.String()
return
}
func generateTmpFile() (fname string, e error) {
myTmpDir := "" // TODO: enable to choose optionally
f, e := ioutil.TempFile(myTmpDir, tmpFILEPREFIX)
if e != nil {
return
}
fname = f.Name()
return
}