Update scraper.py

hanzalot · Aug 12, 2016 · 85410fa · 85410fa
1 parent 20fb67b
commit 85410fa
Showing 1 changed file with 13 additions and 17 deletions.
diff --git a/scraper.py b/scraper.py
@@ -26,25 +26,21 @@
 from scrapy.crawler import CrawlerProcess
 import scraperwiki
 
-class ToiletSpider(scrapy.Spider):
-    name = "toilet"
+class NYTBSSpider(scrapy.Spider):
+    name = "nytbs"
     def start_requests(self):
-        yield scrapy.Request('http://www.fehd.gov.hk/english/pleasant_environment/cleansing/list_of_public_toilets.php?district=NTIs')
-        yield scrapy.Request('http://www.fehd.gov.hk/english/pleasant_environment/cleansing/list_of_public_toilets.php?district=HK')
-        yield scrapy.Request('http://www.fehd.gov.hk/english/pleasant_environment/cleansing/list_of_public_toilets.php?district=KLN')
+        yield scrapy.Request('http://www.nytimes.com/books/best-sellers/')
 
     def parse(self, response):
-        tables = response.xpath("//table")
-        print len(tables)
-        for table in tables:
-            rows = table.xpath("tr")
-            district = rows[0].xpath("td/text()").extract()[0]
-            for row in rows[2:]:
-                texts = row.xpath("td/text()")
-                name = texts[1].extract().replace("*", "")
-                address = texts[2].extract()
-                scraperwiki.sqlite.save(unique_keys=[], data={"district": district, "name": name, "address": address})
-scraperwiki.sqlite.execute("DROP table data")
+        headers = response.xpath("//*[@id='subnavigation']/form/div")
+        print len(headers)
+        for header in headers:
+            links = header.xpath("//*/option")
+            for link in links:
+                value = link.xpath("//@value").extract()
+                if value:
+                    print value
+
 process = CrawlerProcess()
-process.crawl(ToiletSpider)
+process.crawl(NYTBSSpider)
 process.start()