Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RC] Implementation of news functionality from archlinux.org #191

Merged
merged 23 commits into from Jun 3, 2018
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -3,3 +3,4 @@ __pycache__/
Dockerfile
locale/*.mo
locale/pikaur.pot
.idea/
151 changes: 151 additions & 0 deletions pikaur/news.py
@@ -0,0 +1,151 @@
import datetime
import urllib.request
import urllib.error
import xml.etree.ElementTree
import os
from http.client import HTTPResponse
from html.parser import HTMLParser

from typing import TextIO

from pikaur.config import CACHE_ROOT
import pikaur.pprint
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from pikaur.pprint import print_stdout, format_paragraph

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. 3407edd

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oops, from .pprint import....

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw you can run the check locally, ./maintenance_scripts/lint.sh

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The imports will be fixed in the last step, have some issues with my IDE.

from pikaur.pacman import


# TODO internationalization
# TODO get initial date (if dat-file not present) from last installed local package from the repo

class News(object):
_last_seen_news: str
URL = 'https://www.archlinux.org'
DIR = '/feeds/news/'

def __init__(self) -> None:
self._last_seen_news = self._get_last_seen_news()

def check_news(self) -> None:
rss_feed = self._get_rss_feed()
if not rss_feed: # could not get data
return
xml_feed: xml.etree.ElementTree.ElementTree = \
xml.etree.ElementTree.fromstring(rss_feed)
if self._is_new(self._last_online_news(xml_feed)):
self._print_news(xml_feed)

def _get_rss_feed(self) -> str:
try:
http_response: HTTPResponse = urllib.request.urlopen(
self.URL + self.DIR
)
except urllib.error.URLError:
pikaur.pprint.print_stdout('Could not fetch archlinux.org news')
return ''
str_response: str = ''
for line in http_response:
str_response += line.decode('UTF-8').strip()
return str_response

@staticmethod
def _last_online_news(xml_feed: xml.etree.ElementTree.ElementTree) -> str:
# we find the first 'pubDate' tag, which indicates
# the most recent entry
news_entry: xml.etree.ElementTree.Element
for news_entry in xml_feed.iter('item'):
child: xml.etree.ElementTree.Element
for child in news_entry:
if 'pubDate' in child.tag:
return child.text
# if we get to here, then something went really wrong
# no valid news found
return ''


@staticmethod
def _get_last_seen_news() -> str:
filename: str = os.path.join(CACHE_ROOT, 'last_seen_news.dat')
last_seen_fd: TextIO
try:
with open(filename) as last_seen_fd:
return last_seen_fd.readline().strip()
except IOError:
# if file doesn't exist, this feature was run the first time
# then we want to see all news from this moment on
now: datetime.datetime = datetime.datetime.utcnow()
time_formatted: str = now.strftime('%a, %d %b %Y %H:%M:%S +0000')
try:
with open(filename, 'w') as last_seen_fd:
last_seen_fd.write(time_formatted)
except IOError:
msg: str = 'Could not initialize {}'.format(filename)
pikaur.pprint.print_stdout(msg)
return time_formatted

def _is_new(self, last_online_news: str) -> bool:
last_seen_news_date = datetime.datetime.strptime(
self._last_seen_news, '%a, %d %b %Y %H:%M:%S %z'
)
if not last_online_news:
raise ValueError('The news feed could not be received or parsed.')
last_online_news_date: datetime = datetime.datetime.strptime(
last_online_news, '%a, %d %b %Y %H:%M:%S %z'
)
return last_online_news_date > last_seen_news_date

def _print_news(self, xml_feed: xml.etree.ElementTree.ElementTree):
news_entry: xml.etree.ElementTree.Element
for news_entry in xml_feed.iter('item'):
child: xml.etree.ElementTree.Element
for child in news_entry:
if 'pubDate' in child.tag:
if self._is_new(child.text):
self._print_one_entry(news_entry)
else:
# no more news
return

# noinspection PyUnboundLocalVariable
@staticmethod
def _print_one_entry(news_entry: xml.etree.ElementTree.Element) -> None:
child: xml.etree.ElementTree.Element
for child in news_entry:
if 'title' in child.tag:
title: str = child.text
if 'pubDate' in child.tag:
pub_date: str = child.text
if 'description' in child.tag:
description: str = child.text
pikaur.pprint.print_stdout(
pikaur.pprint.color_line(title, 11) + ' (' + pub_date + ')'
)
pikaur.pprint.print_stdout(
pikaur.pprint.format_paragraph(strip_tags(description))
)


class MLStripper(HTMLParser):
def error(self, message: object) -> None:
pass

def __init__(self) -> None:
super().__init__()
self.reset()
self.strict = False
self.convert_charrefs = True
self.fed = []

def handle_data(self, data: object) -> None:
self.fed.append(data)

def get_data(self) -> str:
return ''.join(self.fed)


def strip_tags(html: object) -> str:
mlstripper = MLStripper()
mlstripper.feed(html)
return mlstripper.get_data()


if __name__ == '__main__':
News().check_news()