Skip to content

Commit

Permalink
scraper: save original url
Browse files Browse the repository at this point in the history
  • Loading branch information
andreslucena committed Jan 16, 2016
1 parent f3aba11 commit 4200e33
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def start(self):
ident = s['id']
print ident, url
xmlroot = self.get_content(url)
self.parse_and_save(xmlroot, ident)
self.parse_and_save(url, xmlroot, ident)
print 'FIN'

def get_content(self, url):
Expand All @@ -84,8 +84,9 @@ def get_content(self, url):
def convert_to_unicode(self, string):
return unicode(string.encode('latin-1'), "utf-8")

def parse_and_save(self, root, ident):
def parse_and_save(self, url, root, ident):
datos = {}
datos['url'] = url
datos['id'] = ident
# --- div datos_diputado ---
ext = root.xpath('//div[@id="datos_diputado"]/p[@class="logo_grupo"][1]/img/@src')
Expand Down

0 comments on commit 4200e33

Please sign in to comment.