/
selector.go
113 lines (95 loc) · 2.57 KB
/
selector.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
package indexer
import (
"errors"
"fmt"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/Sirupsen/logrus"
"github.com/yosssi/gohtml"
)
type filterBlock struct {
Name string `yaml:"name"`
Args interface{} `yaml:"args"`
}
type selectorBlock struct {
Selector string `yaml:"selector"`
TextVal string `yaml:"text"`
Attribute string `yaml:"attribute,omitempty"`
Remove string `yaml:"remove,omitempty"`
Filters []filterBlock `yaml:"filters,omitempty"`
Case map[string]string `yaml:"case,omitempty"`
}
func (s *selectorBlock) Match(selection *goquery.Selection) bool {
return !s.IsEmpty() && (selection.Find(s.Selector).Length() > 0 || s.TextVal != "")
}
func (s *selectorBlock) MatchText(from *goquery.Selection) (string, error) {
if s.TextVal != "" {
return s.TextVal, nil
}
if s.Selector != "" {
result := from.Find(s.Selector)
if result.Length() == 0 {
return "", fmt.Errorf("Failed to match selector %q", s.Selector)
}
return s.Text(result)
}
return s.Text(from)
}
func (s *selectorBlock) Text(el *goquery.Selection) (string, error) {
if s.TextVal != "" {
return s.applyFilters(s.TextVal)
}
if s.Remove != "" {
el.Find(s.Remove).Remove()
}
if s.Case != nil {
filterLogger.
WithFields(logrus.Fields{"case": s.Case}).
Debugf("Applying case to selection")
for pattern, value := range s.Case {
if el.Is(pattern) || el.Has(pattern).Length() >= 1 {
return s.applyFilters(value)
}
}
return "", errors.New("None of the cases match")
}
html, _ := goquery.OuterHtml(el)
filterLogger.
WithFields(logrus.Fields{"html": gohtml.Format(html)}).
Debugf("Extracting text from selection")
output := strings.TrimSpace(el.Text())
if s.Attribute != "" {
val, exists := el.Attr(s.Attribute)
if !exists {
return "", fmt.Errorf("Requested attribute %q doesn't exist", s.Attribute)
}
output = val
}
return s.applyFilters(output)
}
func (s *selectorBlock) applyFilters(val string) (string, error) {
for _, f := range s.Filters {
filterLogger.
WithFields(logrus.Fields{"args": f.Args, "before": val}).
Debugf("Applying filter %s", f.Name)
var err error
val, err = invokeFilter(f.Name, f.Args, val)
if err != nil {
return "", err
}
}
return val, nil
}
func (s *selectorBlock) IsEmpty() bool {
return s.Selector == "" && s.TextVal == ""
}
func (s *selectorBlock) String() string {
switch {
case s.Selector != "":
return fmt.Sprintf("Selector(%s)", s.Selector)
case s.TextVal != "":
return fmt.Sprintf("Text(%s)", s.TextVal)
default:
return "Empty"
}
}