-
Notifications
You must be signed in to change notification settings - Fork 0
/
page.go
111 lines (86 loc) · 1.97 KB
/
page.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
package textractor
import (
"slices"
"sort"
"strings"
)
type Page struct {
id string
number int
width float64
height float64
childIDs []string
words []*Word
lines []*Line
keyValues []*KeyValue
tables []*Table
layouts []*Layout
queries []*Query
signatures []*Signature
}
func (p *Page) ID() string {
return p.id
}
func (p *Page) Number() int {
return p.number
}
func (p *Page) Width() float64 {
return p.width
}
func (p *Page) Height() float64 {
return p.height
}
func (p *Page) Words() []*Word {
return p.words
}
func (p *Page) Lines() []*Line {
return p.lines
}
func (p *Page) Tables() []*Table {
return p.tables
}
func (p *Page) KeyValues() []*KeyValue {
return p.keyValues
}
func (p *Page) Layouts() []*Layout {
return p.layouts
}
func (p *Page) Queries() []*Query {
return p.queries
}
func (p *Page) Signatures() []*Signature {
return p.signatures
}
func (p *Page) AddLayouts(layouts ...*Layout) {
p.layouts = append(p.layouts, layouts...)
}
func (p *Page) Text(optFns ...func(*TextLinearizationOptions)) string {
// Create a copy of the layouts to avoid modifying the original slice
sortedLayouts := make([]*Layout, len(p.layouts))
copy(sortedLayouts, p.layouts)
// Sort layouts based on the reading order
sort.Slice(sortedLayouts, func(i, j int) bool {
return sortedLayouts[i].BoundingBox().Top() < sortedLayouts[j].BoundingBox().Top()
})
pageTexts := make([]string, len(sortedLayouts))
for i, l := range sortedLayouts {
text := l.Text(optFns...)
pageTexts[i] = text
}
return strings.Join(pageTexts, "\n")
}
func (p *Page) SearchValueByKey(key string) []*KeyValue {
searchKey := strings.ToLower(key)
var result []*KeyValue
for _, kv := range p.keyValues {
if key := kv.Key(); key != nil {
if strings.Contains(strings.ToLower(key.Text()), searchKey) {
result = append(result, kv)
}
}
}
return result
}
func (p *Page) isChild(id string) bool {
return slices.Contains(p.childIDs, id)
}