Skip to content

Commit

Permalink
Update what_crawler/what_crawler/spiders/request-spider.py
Browse files Browse the repository at this point in the history
added regex escaping of special characters
  • Loading branch information
Volvox committed Dec 22, 2012
1 parent 1f443b1 commit 20ea133
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions what_crawler/what_crawler/spiders/request-spider.py
Expand Up @@ -46,10 +46,11 @@ def parse_requests(self, response):

for album in albums:
item = WhatItem()

#regular expression for identifying links which incl. bracketed dates.
p1 = re.compile('\[[^\]]*\]')
# p2 = re.compile('\[[2012\]]*\]')


# find links with [date]
date = p1.search(album)
Expand All @@ -61,8 +62,9 @@ def parse_requests(self, response):
if date != None:
item['name'] = re.sub('\[[^\]]*\]','',album) #get rid of [date]
item['name'] = item['name'].strip()
item['name'] = re.escape(item['name'])
items.append(item)


for item in items:
yield item
yield item

0 comments on commit 20ea133

Please sign in to comment.