Skip to content
Permalink
Browse files

event.py: refactoring and optimization

Several refactoring and optimization have taken place:
 - changed parser from BeautifulSoup to lxml's HTML parser
 - replaced accent stripping with UTF-8 encoding
 - use tiki calendar instead of front page
  • Loading branch information...
dnet committed May 31, 2011
1 parent c01b9e0 commit 578ce3908b60ad33e7e287dcafb288be2ecbfed6
Showing with 8 additions and 17 deletions.
  1. +8 −17 event.py
@@ -24,25 +24,16 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

from BeautifulSoup import BeautifulSoup
import urllib2
import re
import unicodedata
from lxml import html
from urllib2 import urlopen
from datetime import datetime
import sys

def strip_accents(s):
return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))

try:
req = urllib2.Request("http://hsbp.org/HomePage")
response = urllib2.urlopen(req)
soup = BeautifulSoup(response.read())
event = soup.find(attrs={'class': re.compile(r'\bvevent\b')})

start = event.find(attrs={'class': re.compile(r'\bdtstart\b')})
summary = event.find(attrs={'class': re.compile(r'\bsummary\b')})
title = start['title']
sum = strip_accents(unicode(summary.string).strip())
sys.stdout.write(title[0:10] + ' ' + title[11:16] + ' ' + sum)
events = html.parse('http://hsbp.org/tiki-calendar.php?viewlist=list').getroot()
start = datetime.fromtimestamp(int(filter(str.isdigit,
events.find_class('dtstart')[0][0].get('href')))).strftime('%Y-%m-%d %H:%M')
summary = events.find_class('summary')[0].text
sys.stdout.write('%s %s' % (start, summary.encode('utf-8')))
except:
pass

0 comments on commit 578ce39

Please sign in to comment.
You can’t perform that action at this time.