-
Notifications
You must be signed in to change notification settings - Fork 8
/
descriptionlist_parser.go
80 lines (75 loc) · 2.37 KB
/
descriptionlist_parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
package htmlutil
import (
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
func TokenizerDescriptionLists(z *html.Tokenizer) (DescriptionLists, error) {
dls := DescriptionLists{}
for {
dl, err := TokenizerDescriptionListNext(z)
if err != nil {
return dls, err
}
if len(dl) == 0 {
break
}
dls = append(dls, dl)
}
return dls, nil
}
func TokenizerDescriptionListNext(z *html.Tokenizer) (DescriptionList, error) {
descriptionList := DescriptionList{}
opts := NextTokensOpts{
SkipErrors: false,
IncludeChain: true,
InclusiveMatch: true,
StartFilter: []html.Token{{DataAtom: atom.Dl, Type: html.StartTagToken}},
EndFilter: []html.Token{{DataAtom: atom.Dl, Type: html.EndTagToken}},
}
dlToks, err := NextTokens(z, opts)
// dlToks, err := TokensBetweenAtom(z, skipErrs, true, atom.Dl)
if err != nil {
return descriptionList, err
}
descriptionList = ParseDescriptionListTokens(dlToks...)
return descriptionList, nil
}
const (
matchingTerm = "term"
matchingDesc = "desc"
)
func ParseDescriptionListTokens(toks ...html.Token) DescriptionList {
dl := DescriptionList{}
var curDesc Description
staDt := Tokens([]html.Token{{Type: html.StartTagToken, DataAtom: atom.Dt}})
endDt := Tokens([]html.Token{{Type: html.EndTagToken, DataAtom: atom.Dt}})
staDd := Tokens([]html.Token{{Type: html.StartTagToken, DataAtom: atom.Dd}})
endDd := Tokens([]html.Token{{Type: html.EndTagToken, DataAtom: atom.Dd}})
// staDt := NewTokenFilter(html.StartTagToken, atom.Dt)
// endDt := NewTokenFilter(html.EndTagToken, atom.Dt)
// staDd := NewTokenFilter(html.StartTagToken, atom.Dd)
// endDd := NewTokenFilter(html.EndTagToken, atom.Dd)
matching := ""
for _, tok := range toks {
if staDt.MatchLeft(tok, nil) {
curDesc.Term = append(curDesc.Term, tok)
matching = matchingTerm
} else if endDt.MatchLeft(tok, nil) {
curDesc.Term = append(curDesc.Term, tok)
matching = ""
} else if matching == matchingTerm {
curDesc.Term = append(curDesc.Term, tok)
} else if staDd.MatchLeft(tok, nil) {
curDesc.Description = append(curDesc.Description, tok)
matching = matchingDesc
} else if endDd.MatchLeft(tok, nil) {
curDesc.Description = append(curDesc.Description, tok)
dl = append(dl, curDesc)
matching = ""
curDesc = Description{}
} else if matching == matchingDesc {
curDesc.Description = append(curDesc.Description, tok)
}
}
return dl
}