From 20ea1337e83de650901462f5f90af9cdfd9954f2 Mon Sep 17 00:00:00 2001 From: Volvox Date: Sat, 22 Dec 2012 00:33:25 -0500 Subject: [PATCH] Update what_crawler/what_crawler/spiders/request-spider.py added regex escaping of special characters --- what_crawler/what_crawler/spiders/request-spider.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/what_crawler/what_crawler/spiders/request-spider.py b/what_crawler/what_crawler/spiders/request-spider.py index cfabd86..85fb3f8 100644 --- a/what_crawler/what_crawler/spiders/request-spider.py +++ b/what_crawler/what_crawler/spiders/request-spider.py @@ -46,10 +46,11 @@ def parse_requests(self, response): for album in albums: item = WhatItem() - + #regular expression for identifying links which incl. bracketed dates. p1 = re.compile('\[[^\]]*\]') # p2 = re.compile('\[[2012\]]*\]') + # find links with [date] date = p1.search(album) @@ -61,8 +62,9 @@ def parse_requests(self, response): if date != None: item['name'] = re.sub('\[[^\]]*\]','',album) #get rid of [date] item['name'] = item['name'].strip() + item['name'] = re.escape(item['name']) items.append(item) for item in items: - yield item \ No newline at end of file + yield item