/
atom.go
359 lines (296 loc) · 11.4 KB
/
atom.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
// Package atom provides a parser for Atom feeds.
//
// Commentary taken from https://tools.ietf.org/html/rfc4287
package atom
import (
"encoding/xml"
"io"
"net/url"
"hawx.me/code/riviera/feed/common"
)
// Parser is capable of reading Atom feeds.
type Parser struct{}
// CanRead returns true if the reader provides data that is XML and contains the
// expected namespace for an Atom feed.
func (Parser) CanRead(r io.Reader, charset func(charset string, input io.Reader) (io.Reader, error)) bool {
decoder := xml.NewDecoder(r)
decoder.CharsetReader = charset
var token xml.Token
var err error
for {
if token, err = decoder.Token(); err != nil || token == nil {
break
}
if t, ok := token.(xml.StartElement); ok {
if t.Name.Space == "http://www.w3.org/2005/Atom" && t.Name.Local == "feed" {
return true
}
break
}
}
return false
}
func (Parser) Read(r io.Reader, _ *url.URL, charset func(charset string, input io.Reader) (io.Reader, error)) (foundChannels []*common.Channel, err error) {
decoder := xml.NewDecoder(r)
decoder.CharsetReader = charset
var feed atomFeed
if err = decoder.Decode(&feed); err != nil {
return
}
ch := &common.Channel{
Title: feed.Title.Text,
LastBuildDate: feed.Updated,
ID: feed.ID,
Rights: feed.Rights,
}
for _, link := range feed.Links {
ch.Links = append(ch.Links, common.Link{
Href: link.Href,
Rel: link.Rel,
Type: link.Type,
HrefLang: link.HrefLang,
})
}
if feed.SubTitle != nil {
ch.SubTitle = common.SubTitle{
Type: feed.SubTitle.Type,
Text: feed.SubTitle.Text,
}
}
if feed.Generator != nil {
ch.Generator = common.Generator{
URI: feed.Generator.URI,
Version: feed.Generator.Version,
Text: feed.Generator.Text,
}
}
if len(feed.Authors) > 0 {
ch.Author = common.Author{
Name: feed.Authors[0].Name,
URI: feed.Authors[0].URI,
Email: feed.Authors[0].Email,
}
}
for _, entry := range feed.Entries {
i := &common.Item{
Title: entry.Title,
ID: entry.ID,
PubDate: entry.Updated,
Description: entry.Summary,
}
for _, link := range entry.Links {
if link.Rel == "enclosure" {
i.Enclosures = append(i.Enclosures, common.Enclosure{
URL: link.Href,
Type: link.Type,
})
} else {
i.Links = append(i.Links, common.Link{
Href: link.Href,
Rel: link.Rel,
Type: link.Type,
HrefLang: link.HrefLang,
})
}
}
for _, contributor := range entry.Contributors {
i.Contributors = append(i.Contributors, contributor.Name)
}
for _, category := range entry.Categories {
i.Categories = append(i.Categories, common.Category{
Domain: "",
Text: category.Term,
})
}
if entry.Content != nil {
i.Content = &common.Content{
Type: entry.Content.Type,
Lang: entry.Content.Lang,
Base: entry.Content.Base,
Text: entry.Content.Text,
}
}
if len(entry.Authors) > 0 {
i.Author = common.Author{
Name: entry.Authors[0].Name,
URI: entry.Authors[0].URI,
Email: entry.Authors[0].Email,
}
}
if entry.MediaThumbnail != nil {
i.Thumbnail = &common.Image{
URL: entry.MediaThumbnail.URL,
}
if entry.MediaThumbnail.Width != nil {
i.Thumbnail.Width = *entry.MediaThumbnail.Width
}
if entry.MediaThumbnail.Height != nil {
i.Thumbnail.Height = *entry.MediaThumbnail.Height
}
}
ch.Items = append(ch.Items, i)
}
foundChannels = append(foundChannels, ch)
return
}
// The "atom:feed" element is the document (i.e., top-level) element of an Atom
// Feed Document, acting as a container for metadata and data associated with
// the feed. Its element children consist of metadata elements followed by zero
// or more atom:entry child elements.
type atomFeed struct {
XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
// atom:feed elements MUST contain one or more atom:author elements, unless
// all of the atom:feed element's child atom:entry elements contain at least
// one atom:author element.
Authors []atomAuthor `xml:"http://www.w3.org/2005/Atom author"`
// atom:feed elements MAY contain any number of atom:category elements.
Categories []atomCategory `xml:"http://www.w3.org/2005/Atom category"`
// atom:feed elements MAY contain any number of atom:contributor elements.
Contributors []atomContributor `xml:"http://www.w3.org/2005/Atom contributor"`
// atom:feed elements MUST NOT contain more than one atom:generator element.
Generator *atomGenerator `xml:"http://www.w3.org/2005/Atom generator"`
// atom:feed elements MUST NOT contain more than one atom:icon element.
// don't care
// atom:feed elements MUST NOT contain more than one atom:logo element.
// don't care
// atom:feed elements MUST contain exactly one atom:id element.
ID string `xml:"http://www.w3.org/2005/Atom id"`
// atom:feed elements SHOULD contain one atom:link element with a rel
// attribute value of "self". This is the preferred URI for retrieving Atom
// Feed Documents representing this Atom feed.
//
// atom:feed elements MUST NOT contain more than one atom:link element with a
// rel attribute value of "alternate" that has the same combination of type
// and hreflang attribute values.
//
// atom:feed elements MAY contain additional atom:link elements beyond those
// described above.
Links []atomLink `xml:"http://www.w3.org/2005/Atom link"`
// atom:feed elements MUST NOT contain more than one atom:rights element.
Rights string `xml:"http://www.w3.org/2005/Atom rights"`
// atom:feed elements MUST NOT contain more than one atom:subtitle element.
SubTitle *atomSubTitle `xml:"http://www.w3.org/2005/Atom subtitle"`
// atom:feed elements MUST contain exactly one atom:title element.
Title atomTitle `xml:"http://www.w3.org/2005/Atom title"`
// atom:feed elements MUST contain exactly one atom:updated element.
Updated string `xml:"http://www.w3.org/2005/Atom updated"`
Entries []atomEntry `xml:"http://www.w3.org/2005/Atom entry"`
}
type atomTitle struct {
Type string `xml:"http://www.w3.org/2005/Atom type,attr"`
Text string `xml:",chardata"`
}
type atomLink struct {
Href string `xml:"href,attr"`
Rel string `xml:"rel,attr"`
Type string `xml:"type,attr"`
HrefLang string `xml:"hreflang,attr"`
}
type atomSubTitle struct {
Type string `xml:"type,attr"`
Text string `xml:",chardata"`
}
type atomGenerator struct {
URI string `xml:"uri,attr"`
Version string `xml:"version,attr"`
Text string `xml:",chardata"`
}
type atomAuthor struct {
Name string `xml:"http://www.w3.org/2005/Atom name"`
URI string `xml:"http://www.w3.org/2005/Atom uri"`
Email string `xml:"http://www.w3.org/2005/Atom email"`
}
// The "atom:entry" element represents an individual entry, acting as a
// container for metadata and data associated with the entry. This element can
// appear as a child of the atom:feed element, or it can appear as the document
// (i.e., top-level) element of a stand-alone Atom Entry Document.
type atomEntry struct {
// atom:entry elements MUST contain one or more atom:author elements, unless
// the atom:entry contains an atom:source element that contains an atom:author
// element or, in an Atom Feed Document, the atom:feed element contains an
// atom:author element itself.
Authors []atomAuthor `xml:"http://www.w3.org/2005/Atom author"`
// atom:entry elements MAY contain any number of atom:category elements.
Categories []atomCategory `xml:"http://www.w3.org/2005/Atom category"`
// atom:entry elements MUST NOT contain more than one atom:content element.
Content *atomContent `xml:"http://www.w3.org/2005/Atom content"`
// atom:entry elements MAY contain any number of atom:contributor elements.
Contributors []atomContributor `xml:"http://www.w3.org/2005/Atom contributor"`
// atom:entry elements MUST contain exactly one atom:id element.
ID string `xml:"http://www.w3.org/2005/Atom id"`
// atom:entry elements that contain no child atom:content element MUST contain
// at least one atom:link element with a rel attribute value of "alternate".
//
// atom:entry elements MUST NOT contain more than one atom:link element with a
// rel attribute value of "alternate" that has the same combination of type
// and hreflang attribute values.
//
// atom:entry elements MAY contain additional atom:link elements beyond those
// described above.
Links []atomLink `xml:"http://www.w3.org/2005/Atom link"`
// atom:entry elements MUST NOT contain more than one atom:published element.
// I don't care?
// atom:entry elements MUST NOT contain more than one atom:rights element.
// I don't care?
// atom:entry elements MUST NOT contain more than one atom:source element.
// I don't care?
// atom:entry elements MUST contain an atom:summary element in either of the
// following cases:
//
// * the atom:entry contains an atom:content that has a "src" attribute (and
// is thus empty).
//
// * the atom:entry contains content that is encoded in Base64; i.e., the
// "type" attribute of atom:content is a MIME media type [MIMEREG], but is
// not an XML media type [RFC3023], does not begin with "text/", and does
// not end with "/xml" or "+xml".
//
// atom:entry elements MUST NOT contain more than one atom:summary element.
Summary string `xml:"http://www.w3.org/2005/Atom summary"`
// atom:entry elements MUST contain exactly one atom:title element.
Title string `xml:"http://www.w3.org/2005/Atom title"`
// atom:entry elements MUST contain exactly one atom:updated element.
Updated string `xml:"http://www.w3.org/2005/Atom updated"`
// http://www.rssboard.org/media-rss#media-thumbnails
//
// Allows particular images to be used as representative images for the media
// object. If multiple thumbnails are included, and time coding is not at
// play, it is assumed that the images are in order of importance. It has one
// required attribute and three optional attributes.
MediaThumbnail *struct {
// url specifies the url of the thumbnail. It is a required attribute.
URL string `xml:"url,attr"`
// height specifies the height of the thumbnail. It is an optional attribute.
Height *int `xml:"height,attr"`
// width specifies the width of the thumbnail. It is an optional attribute.
Width *int `xml:"width,attr"`
// time specifies the time offset in relation to the media object. Typically
// this is used when creating multiple keyframes within a single video. The
// format for this attribute should be in the DSM-CC's Normal Play Time
// (NTP) as used in RTSP [RFC 2326 3.6 Normal Play Time]. It is an optional
// attribute.
Time *string `xml:"time,attr"`
} `xml:"http://search.yahoo.com/mrss/ thumbnail"`
}
type atomContributor struct {
Name string `xml:"name"`
}
type atomCategory struct {
// The "term" attribute is a string that identifies the category to which the
// entry or feed belongs. Category elements MUST have a "term" attribute.
Term string `xml:"term,attr"`
// The "scheme" attribute is an IRI that identifies a categorization scheme.
// Category elements MAY have a "scheme" attribute.
Scheme string `xml:"scheme,attr"`
// The "label" attribute provides a human-readable label for display in
// end-user applications. Category elements MAY have a "label" attribute.
Label string `xml:"label,attr"`
}
// The "atom:content" element either contains or links to the content of the
// entry. The content of atom:content is Language-Sensitive.
type atomContent struct {
Type string `xml:"type,attr"`
Lang string `xml:"xml lang,attr"`
Base string `xml:"xml base,attr"`
Text string `xml:",chardata"`
}