-
Notifications
You must be signed in to change notification settings - Fork 1
/
dump_parser.go
49 lines (41 loc) · 882 Bytes
/
dump_parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
package main
import (
"encoding/xml"
"io"
)
// That which emits wiki pages.
type Parser interface {
// Get the next page from the parser
Next() (*Page, error)
// Get the toplevel site info from the stream
SiteInfo() SiteInfo
}
type singleStreamParser struct {
siteInfo SiteInfo
decoder *xml.Decoder
}
// Get a wikipedia dump parser reading from the given reader.
func NewParser(r io.Reader) (Parser, error) {
decoder := xml.NewDecoder(r)
_, err := decoder.Token()
if err != nil {
return nil, err
}
siteInfo := SiteInfo{}
err = decoder.Decode(&siteInfo)
if err != nil {
return nil, err
}
return &singleStreamParser{
siteInfo: siteInfo,
decoder: decoder,
}, nil
}
func (p *singleStreamParser) Next() (rv *Page, err error) {
rv = &Page{}
err = p.decoder.Decode(rv)
return
}
func (p *singleStreamParser) SiteInfo() SiteInfo {
return p.siteInfo
}