-
Notifications
You must be signed in to change notification settings - Fork 206
/
ford_dealers_us.py
35 lines (30 loc) · 1.13 KB
/
ford_dealers_us.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from locations.categories import Categories, apply_category
from locations.structured_data_spider import StructuredDataSpider
class FordDealersUSSpider(CrawlSpider, StructuredDataSpider):
name = "ford_dealers_us"
item_attributes = {
"brand": "Ford",
"brand_wikidata": "Q44294",
}
start_urls = ["https://www.ford.com/dealerships/dealer-directory/browse-all/"]
wanted_types = ["AutoDealer"]
json_parser = "chompjs"
rules = [
Rule(
LinkExtractor(allow=r"^https://www.ford.com/dealerships/dealer-directory/"),
),
Rule(
LinkExtractor(
allow=(
r"^https://www.ford.com/content/brand_ford/en_us/brand/dealerships/dealer-details/.*.html$",
r"^https://www.ford.com/dealerships/dealer-details/.*$",
),
),
callback="parse_sd",
),
]
def post_process_item(self, item, response, ld_data, **kwargs):
apply_category(Categories.SHOP_CAR, item)
yield item