/
reader.go
104 lines (91 loc) · 2.27 KB
/
reader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
package reader
import (
"encoding/xml"
"io/ioutil"
"net/http"
"time"
)
// Parse returns a slice of RssItem
func Parse(urls []string) ([]RssItem, error) {
channels, err := extractChannels(urls)
if err != nil {
return nil, err
}
return extractItems(channels), nil
}
// extractChannels given a slice of URLs
func extractChannels(urls []string) ([]rssChannel, error) {
channels := []rssChannel{}
for _, url := range urls {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
rss, err := parseRss(body)
if err != nil {
return nil, err
}
channels = append(channels, rss.Channels...)
}
return channels, nil
}
// exctractItems from a slice of channels
func extractItems(channels []rssChannel) []RssItem {
items := []RssItem{}
for _, channel := range channels {
for _, item := range channel.Items {
items = append(items, exportItem(item,
rssSource{
Title: channel.Title,
SourceURL: channel.Link,
}))
}
}
return items
}
// Convert internal rssItem to output RssItem.
// Source data is used for items which lack it. This should normally contain channel title and link.
func exportItem(item rssItem, source rssSource) RssItem {
returnItem := RssItem{
Title: item.Title,
Source: item.Source.Title,
SourceURL: item.Source.SourceURL,
Link: item.Link,
PublishDate: item.PublishDate.Time,
Description: item.Description,
}
// Use custom source data for items that lack it.
if returnItem.Source == "" {
returnItem.Source = source.Title
}
if returnItem.SourceURL == "" {
returnItem.SourceURL = source.SourceURL
}
return returnItem
}
// Custom unmarshaler for RSS pubDate format
func (c *customTime) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
var v string
if err := d.DecodeElement(&v, &start); err != nil {
return err
}
parse, err := time.Parse(time.RFC1123, v)
if err != nil {
// Try another variant of RFC1123
if parse, err = time.Parse(time.RFC1123Z, v); err != nil {
return err
}
}
*c = customTime{parse}
return nil
}
// parseRss into a struct
func parseRss(resp []byte) (rssBody, error) {
var channel rssBody
return channel, xml.Unmarshal(resp, &channel)
}