/
blocks.go
72 lines (63 loc) · 1.4 KB
/
blocks.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
package textractutil
import (
"strings"
"github.com/aws/aws-sdk-go/service/textract"
"github.com/grokify/awsgo/textractutil/ocrutil"
"github.com/grokify/mogo/pointer"
)
type Blocks []*textract.Block
/*
func (b Blocks) Lines(blockTypes []string, textTypes []string) []string {
var lines []string
for _, bi := range b {
s := strings.TrimSpace(pointer.ToString(bi.Text))
if s != "" {
lines = append(lines, s)
}
}
return lines
}
*/
func (b Blocks) LinesByBlockText() map[string][]string {
m := map[string][]string{}
for _, bi := range b {
key := strings.Join(
[]string{
strings.TrimSpace(pointer.Dereference(bi.BlockType)),
strings.TrimSpace(pointer.Dereference(bi.TextType)),
},
"__")
s := strings.TrimSpace(pointer.Dereference(bi.Text))
m[key] = append(m[key], s)
}
return m
}
func (b Blocks) TextResults() ocrutil.TextResults {
tr := ocrutil.TextResults{
Lines: b.Lines(),
}
m := b.LinesByBlockText()
if w, ok := m["WORD__PRINTED"]; ok {
tr.WordsPrinted = w
}
return tr
}
func (b Blocks) Lines() []string {
lines := []string{}
for _, bi := range b {
if strings.TrimSpace(pointer.Dereference(bi.BlockType)) != BlockTypeLine {
continue
}
s := strings.TrimSpace(pointer.Dereference(bi.Text))
lines = append(lines, s)
}
return lines
}
/*
type TextResults struct {
OCRService string
OCRDateTime time.Time
Lines []string
WordsPrinted []string
}
*/