scraper: save original url

andreslucena · Jan 16, 2016 · 4200e33 · 4200e33
1 parent f3aba11
commit 4200e33
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/scraper.py b/scraper.py
@@ -71,7 +71,7 @@ def start(self):
             ident = s['id']
             print ident, url
             xmlroot = self.get_content(url)
-            self.parse_and_save(xmlroot, ident)
+            self.parse_and_save(url, xmlroot, ident)
         print 'FIN'
 
     def get_content(self, url):
@@ -84,8 +84,9 @@ def get_content(self, url):
     def convert_to_unicode(self, string): 
         return unicode(string.encode('latin-1'), "utf-8")
 
-    def parse_and_save(self, root, ident):
+    def parse_and_save(self, url, root, ident):
         datos = {}
+        datos['url'] = url
         datos['id'] = ident
         # --- div datos_diputado ---
         ext = root.xpath('//div[@id="datos_diputado"]/p[@class="logo_grupo"][1]/img/@src')