Skip to content
Browse files

Add min width option on titles, so short titles are clickable in feed…

…viewer.

Fix another case where lxml gets confused by an encoding declaration.
  • Loading branch information...
1 parent 21a9003 commit 4c39846c088aa013aa56a4660965dfc9a7283d86 @akkana committed Mar 18, 2013
Showing with 24 additions and 5 deletions.
  1. +9 −2 feedme
  2. +15 −3 feedmeparser.py
View
11 feedme
@@ -388,13 +388,16 @@ def get_feed(feedname, config, cache, cachefile, last_time, msglog) :
# feedparser.parse() can throw unexplained errors like
# "xml.sax._exceptions.SAXException: Read failed (no details available)"
- # which will kill our whole process, so guard against that:
+ # which will kill our whole process, so guard against that.
+ # Sadly, feedparser usually doesn't give any details about what went wrong.
try :
+ print "Running: feedparser.parse(", sitefeedurl, ")"
feed = feedparser.parse(sitefeedurl)
+ # except xml.sax._exceptions.SAXException, e :
except Exception, e :
print "Couldn't parse feed: URL:", sitefeedurl
print str(e)
- #traceback.print_stack()
+ traceback.print_stack()
return
# feedparser has no error return! One way is to check len(feed.feed).
@@ -625,6 +628,10 @@ Which (default = s): """)
# http://www.mail-archive.com/plucker-list@rubberchicken.org/msg07314.html
# and the previous message.
indexstr += "<p><a name=\"%d\">&nbsp;</a>" % itemnum
+ minwidth = config.getint(feedname, 'min_width')
+ if len(item.title) < minwidth :
+ #item.title += '&nbsp;' * (minwidth - len(item.title) - 2) + '__'
+ item.title += '. ' * (minwidth - len(item.title)) + '__'
if levels > 1 :
itemlink = '<a href=\"' + fnam + anchor + '\">'
View
18 feedmeparser.py
@@ -279,8 +279,20 @@ def feed(self, uhtml) :
# something like this inserted at the beginning:
# <?xml version="1.0" encoding="utf-8"?>
# So if we've hit the error, try to remove it:
- tree = lxml.html.fromstring(re.sub(
- '<\?xml .*encoding=[\'"].*?[\'"]\?>', '', uhtml))
+ print >>sys.stderr, "Stupid lxml encoding error on:"
+ print >>sys.stderr, uhtml[:512].encode('utf-8',
+ 'xmlcharrefreplace'),
+ print '...'
+
+ # Some sample strings that screw up lxml and must be removed:
+ # <?xml version="1.0" encoding="ascii" ?>
+ uhtml = re.sub('<\?xml .*?encoding=[\'\"].*?[\'\"].*?\?>',
+ '', uhtml)
+ tree = lxml.html.fromstring(uhtml)
+ print "Tried to remove encoding: now"
+ print >>sys.stderr, uhtml[:512].encode('utf-8',
+ 'xmlcharrefreplace'),
+ print '...'
else :
raise ValueError
@@ -602,6 +614,7 @@ def read_config_file() :
'nonlocal_images' : 'false',
'skip_links' : 'false',
'when' : '', # Day, like tue, or month day, like 14
+ 'min_width' : '25', # min # chars in an item link
'ascii' : 'false'})
config.read(conffile)
return config
@@ -611,5 +624,4 @@ def read_config_file() :
parser = FeedmeHTMLParser(config, 'Freaktest')
parser.fetch_url('http://www.freakonomics.com/2011/12/21/what-to-do-with-cheating-students/', '/home/akkana/feeds/Freaktest/', 'test.html', "Freak Test")
- #parser.fetch_url('file:///home/akkana/what-to-do-with-cheating-students.htm', '/home/akkana/feeds/Freaktest/', 'test.html')

0 comments on commit 4c39846

Please sign in to comment.
Something went wrong with that request. Please try again.