In [188]:
import requests
import time
from bs4 import BeautifulSoup
import re
import pandas as pd

def parse_text(unparsed):
    parsed = unparsed.get_text().lstrip('\n\r\n').lstrip('\n\r\n ').rstrip('\n\r\n').rstrip('\n\r\n ')
    return parsed

def extract_uuid(string):
    uuid_regex = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")
    return re.search(uuid_regex, string).group(0)
    
def remove_non_numeric(string):
    try: 
        string = string.strip().replace(",", "")
        return re.sub("[^0-9]", "", string)
    except AttributeError:
        pass

def extract_autotrader_year(string):
    try:
        return remove_non_numeric(string)[:4]
    except TypeError:
        pass


#URL Headers
headers = {"User-Agent":"Mozilla/5.0"}

In [180]:
#Listings Dicts

cazoo_listings = []

autotrader_listings = []

In [194]:
#Cazoo Listings

cazoo_listings.clear() # delete all values in the cazoo listings dict

cazoo_page = requests.get("https://www.cazoo.co.uk/cars/mercedes-benz/a-class/?colour=Grey&fuelType=Petrol", headers=headers)

cazoo_soup = BeautifulSoup(cazoo_page.content, 'html.parser')
cazoo_search_results = cazoo_soup.find_all("div", {"class" : re.compile('.*vehicle-cardstyles__Card-sc.*')})

for s in cazoo_search_results:
    
    car_id = extract_uuid(s.find('a', href=True)['href'])

    details = s.findAll("div", {"class" : re.compile('.*vehicle-cardstyles__DetailWrap-sc.*')})
    
    for d in details:
        tags = d.find_next('ul').find_all('li')

        title = parse_text(d.find("p", {"class": re.compile('.*vehicle-cardstyles__Title-sc.*')}))
        description = parse_text(d.find("p", {"class" : re.compile('.*vehicle-cardstyles__DisplayVariant-sc.*')}))
        mileage = remove_non_numeric(parse_text(tags[0]))
        year = remove_non_numeric(parse_text(tags[1]))
        transmission = parse_text(tags[2])
        fuel = parse_text(tags[3])

    price_gbp = remove_non_numeric(parse_text(s.find("p", {"data-test-id": "card-pricing-full-price-gb"})))

    listing_details = { 'car_id': car_id,
                        'car_title': title,
                        'car_decsription': description,
                        'mileage': mileage,
                        'year': year,
                        'transmission': transmission,
                        'fuel': fuel,
                        'price_gbp': price_gbp,
                        'source': 'Cazoo',
                        'url': f"https://www.cazoo.co.uk/car-details/{car_id}/"
                        }
    
    cazoo_listings.append(listing_details)

print(f"found {len(cazoo_listings)} car listings on Cazoo")


found 22 car listings on Cazoo


In [195]:
#Autotrader Listings

autotrader_listings.clear() # delete all values in the cazoo listings dict

for page in range(1, 40): #checks the first 40 pages of paginated results

    autotrader_page = requests.get(f"https://www.autotrader.co.uk/car-search?sort=price-asc&postcode=WC2N5DU&radius=1500&make=Mercedes-Benz&model=A%20Class&include-delivery-option=on&year-from=2020&fuel-type=Petrol&exclude-writeoff-categories=on&page={page}", headers=headers)
    time.sleep(2) # space requests 2 seconds apart to avoid being rate limited
    autotrader_soup = BeautifulSoup(autotrader_page.content, 'html.parser')
    autotrader_search_results = autotrader_soup.find_all("li", {"class" : "search-page__result"})

    for r in autotrader_search_results: 

        car_id = r.get('id')
        title = parse_text(r.find("h3", {"class": "product-card-details__title"}))
        description = parse_text(r.find("p", {"class": "product-card-details__subtitle"}))
        price_gbp = remove_non_numeric(parse_text(r.find("div", {"class": "product-card-pricing__price"})))

        key_specs = r.find_all("li", {"class": "atc-type-picanto--medium"})
        key_specs_list = [parse_text(key_spec) for key_spec in key_specs]
        
        mileage = remove_non_numeric(next((x for x in key_specs_list if 'miles' in x), None))
        year = extract_autotrader_year(next((x for x in key_specs_list if 'reg)' in x), None))
        fuel = ('Petrol' if 'Petrol' in key_specs_list else 'Diesel')
        transmission = ('Manual' if 'Manual' in key_specs_list else 'Automatic')


        listing_details = { 'car_id': car_id,
                            'car_title': title,
                            'car_decsription': description,
                            'mileage': mileage,
                            'year': year,
                            'transmission': transmission,
                            'fuel': fuel,
                            'price_gbp': price_gbp,
                            'source': 'Autotrader',
                            'url': f"https://www.autotrader.co.uk/car-details/{car_id}"
                            }

        autotrader_listings.append(listing_details)

print(f"found {len(autotrader_listings)} car listings on Autotrader")

found 429 car listings on Autotrader
