-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
executable file
·75 lines (58 loc) · 1.68 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
package main
import (
"flag"
"fmt"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
)
func main() {
inputFlag := flag.String("i", "", "Input file or directory")
outputFlag := flag.String("o", "", "Output file")
languageFlag := flag.String("l", "eng", "Tesseract OCR language(s)")
flag.Parse()
if *inputFlag == "" || *outputFlag == "" {
flag.PrintDefaults()
return
}
output, exception := os.OpenFile(*outputFlag, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0666)
handle(exception)
defer output.Close()
input, exception := os.Stat(*inputFlag)
handle(exception)
inputMode := input.Mode()
if inputMode.IsRegular() {
temporaryFile := "/tmp/go-tesseract-" + filepath.Base(*inputFlag)
tesseract(*inputFlag, temporaryFile, *languageFlag)
write(filepath.Base(*inputFlag), output, temporaryFile)
fmt.Println("Done!")
return
}
directory := strings.TrimPrefix(*inputFlag, "/")
inputs, exception := os.ReadDir(*inputFlag)
handle(exception)
for _, v := range inputs {
temporaryFile := "/tmp/go-tesseract-" + v.Name()
tesseract(directory+"/"+v.Name(), temporaryFile, *languageFlag)
write(v.Name(), output, temporaryFile)
}
fmt.Println("Done!")
}
func tesseract(i string, o string, l string) {
command := exec.Command("tesseract", "-l", l, i, o)
exception := command.Run()
handle(exception)
}
func write(i string, o *os.File, t string) {
data, exception := os.ReadFile(t + ".txt")
handle(exception)
regex, exception := regexp.Compile(`\s+|\n+`)
handle(exception)
o.WriteString("=== Start " + i + " file ===\n")
o.WriteString(regex.ReplaceAllString(string(data), " "))
o.WriteString("\n=== End " + i + " file ===\n\n")
os.Remove(t + ".txt")
fmt.Println(i, "converted")
}