-
Notifications
You must be signed in to change notification settings - Fork 20
/
rss.py
63 lines (55 loc) · 1.86 KB
/
rss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import datetime
import requests
import feedparser
import urllib.parse
from typing import List
from readability import Document
from .storyprovider import StoryProvider
from ..story import Story
class RSSFeedStoryProvider(StoryProvider):
def __init__(
self,
rss_path: str,
limit: int = 5,
since_days_ago: int = None,
) -> None:
self.limit = limit
self.feed_url = rss_path
self._since = (
datetime.datetime.now() - datetime.timedelta(days=since_days_ago)
if since_days_ago
else None
)
def get_stories(self, limit: int = 5, **kwargs) -> List[Story]:
feed = feedparser.parse(self.feed_url)
limit = min(limit, self.limit, len(feed.entries))
if limit == 0:
print(f"Sad honk :/ No entries found for feed {self.feed_url}...")
stories = []
for entry in feed.entries:
date = datetime.datetime(*entry.updated_parsed[:6])
if self._since is not None and date < self._since:
continue
req = requests.get(entry["link"])
# Source is the URL root:
source = urllib.parse.urlparse(entry["link"]).netloc
if not req.ok:
# Just return the headline content:
story = Story(
entry["title"],
body_html=entry["summary"],
byline=source,
date=date,
)
else:
doc = Document(req.content)
story = Story(
doc.title(),
body_html=doc.summary(),
byline=source,
date=date,
)
stories.append(story)
if len(stories) >= limit:
break
return list(filter(None, stories))