Permalink
Browse files

Update what_crawler/what_crawler/spiders/request-spider.py

added regex escaping of special characters
  • Loading branch information...
Volvox
Volvox committed Dec 22, 2012
1 parent 1f443b1 commit 20ea1337e83de650901462f5f90af9cdfd9954f2
Showing with 4 additions and 2 deletions.
  1. +4 −2 what_crawler/what_crawler/spiders/request-spider.py
@@ -46,10 +46,11 @@ def parse_requests(self, response):
for album in albums:
item = WhatItem()
-
+
#regular expression for identifying links which incl. bracketed dates.
p1 = re.compile('\[[^\]]*\]')
# p2 = re.compile('\[[2012\]]*\]')
+
# find links with [date]
date = p1.search(album)
@@ -61,8 +62,9 @@ def parse_requests(self, response):
if date != None:
item['name'] = re.sub('\[[^\]]*\]','',album) #get rid of [date]
item['name'] = item['name'].strip()
+ item['name'] = re.escape(item['name'])
items.append(item)
for item in items:
- yield item
+ yield item

0 comments on commit 20ea133

Please sign in to comment.