In [119]:
import requests
from bs4 import BeautifulSoup as bs
import csv
import re

In [155]:
def get_page(url):
    response = requests.get(url)
    
    if not response.ok:
        print('Server Responded: ', response.status_code)
    else:
        soup = bs(response.text, 'lxml')
    return soup


def get_data(soup):
    try:
        title = soup.find('h1', class_='text-bold text-size-400 text-size-sm-700').get_text().strip()
    except:
        title = ''
        
    try:
        price = soup.find('span', class_='first-price').get_text()
    except:
        price = ''
        
    try:
        specs = soup.find_all('div', class_='col-xs-8')
        specs_list = [item.get_text() for item in specs]
        spec_names = ['Mileage', 'Drive Type', 'Engine', 'Transmission', 'Fuel Type', 'MPG', 'Exterior', 'Interior', 'VIN']
        combined_specs = zip(spec_names, specs_list)
        specs_dict = dict(combined_specs)
    except:
        specs = ''
        
    data_dict = {
        'Title': title,
        'Price': price,
    }
    
    data_dict.update(specs_dict)
    
    return data_dict


def get_index_data(soup):
    try:
        links = soup.find_all('a', attrs={'rel':'nofollow'})
    except:
        links = []
    
    urls = [link['href'] for link in links]
    actual = [url for url in urls if not url.startswith('tel')]
    full_urls = [f'https://www.kbb.com{url}' for url in actual][::2]
    
    return full_urls

In [183]:
def write_csv(data):
    with open('kbb_scraper.csv', 'a') as csvfile:
        writer = csv.writer(csvfile)
        
        try:
            row = [data['Title'], data['Price'], data['Mileage'], data['Drive Type'], data['Engine'],
                  data['Transmission'], data['Fuel Type'], data['MPG'], data['Exterior'], data['Interior']]
            
            writer.writerow(row)
        except:
            try: 
                row = [data['Title'], data['Price'], data['Mileage'], data['Drive Type'], data['Engine'],
                data['Transmission'], data['Fuel Type'], data['MPG'], data['Exterior']]
                
                writer.writerow(row)
            except:
                try:
                    row = [data['Title'], data['Price'], data['Mileage'], data['Drive Type'], data['Engine'],
                    data['Transmission'], data['Fuel Type'], data['MPG']]
            
                    writer.writerow(row)
                except:
                    row = [data['Title']]
                    writer.writerow(row)

In [171]:
url = 'https://www.kbb.com/cars-for-sale/all/?distance=75'
get_index_data(get_page(url))

url_records = [f'https://www.kbb.com/cars-for-sale/all/columbia-mo-65201?distance=75&dma=&channel=KBB&searchRadius=75&isNewSearch=false&marketExtension=include&showAccelerateBanner=false&sortBy=relevance&numRecords=25&firstRecord={x}' 
              for x in range(1000) if x % 25 == 0]

In [185]:
url_records_test = url_records[::50]

for url in url_records:
    get_data(get_page(url))
    car_urls = get_index_data(get_page(url))

    for idx, link in enumerate(car_urls):
        data = get_data(get_page(link))
        write_csv(data)
    
        if idx % 25 == 0:
            print(f'{idx} iteration complete')
            print(data)
            print('-----------------------------------------------------------')

{'Title': 'Certified 2019 Ford Escape FWD S', 'Price': '17,243', 'Mileage': '9,848', 'Drive Type': '2 wheel drive - front', 'Engine': '4-Cylinder', 'Transmission': '6-Speed Automatic', 'Fuel Type': 'Flexible Fuel', 'MPG': '21 City / 29 Highway', 'Exterior': 'Agate Black Metallic', 'Interior': 'Chromite Gray/Charcoal Black', 'VIN': '1FMCU0F76KUB60168'}
{'Title': 'Used 2014 Chevrolet Corvette Stingray Coupe', 'Price': '38,590', 'Mileage': '34,047', 'Drive Type': '2 wheel drive - rear', 'Engine': '8-Cylinder', 'Transmission': '6-Speed Automatic', 'Fuel Type': 'Gasoline', 'MPG': '16 City / 27 Highway', 'Exterior': 'Blue', 'Interior': 'Black', 'VIN': '1G1YD2D74E5125290'}
{'Title': 'Used 2019 BMW 530i xDrive', 'Price': '37,987', 'Mileage': '24,491', 'Drive Type': 'All wheel drive', 'Engine': '4-Cylinder Turbo', 'Transmission': '8-Speed Automatic', 'Fuel Type': 'Gasoline', 'MPG': '23 City / 33 Highway', 'Exterior': 'Mineral White Metallic', 'Interior': 'Ivory White', 'VIN': 'WBAJA7C5XKWC77854

{'Title': 'Used 2010 Ford Flex 2WD Limited', 'Price': '9,591', 'Mileage': '136,920', 'Drive Type': '2 wheel drive - front', 'Engine': '6-Cylinder', 'Transmission': '6-Speed Automatic', 'Fuel Type': 'Gasoline', 'MPG': '17 City / 24 Highway', 'Exterior': 'Cinnamon Metallic', 'Interior': 'Charcoal Black', 'VIN': '2FMGK5DC8ABB13989'}
{'Title': 'New 2021 Chevrolet Silverado 1500 4x4 Crew Cab LT', 'Price': '39,283', 'Mileage': '0', 'Drive Type': '4 wheel drive', 'Engine': '8-Cylinder', 'Transmission': '8-Speed Automatic', 'Fuel Type': 'Gasoline', 'MPG': '5\'10"', 'Exterior': '19 City / 22 Highway', 'Interior': 'Satin Steel Metallic - Gray', 'VIN': 'Jet Black Cloth'}
{'Title': 'Certified 2017 Ford Escape FWD S', 'Price': '16,570', 'Mileage': '14,365', 'Drive Type': '2 wheel drive - front', 'Engine': '4-Cylinder', 'Transmission': '6-Speed Automatic', 'Fuel Type': 'Gasoline', 'MPG': '21 City / 29 Highway', 'Exterior': 'Shadow Black', 'Interior': 'Charcoal Black', 'VIN': '1FMCU0F75HUC77359'}
{'T

{'Title': 'Used 2016 Nissan Rogue SL', 'Price': '18,895', 'Mileage': '35,281', 'Drive Type': 'All wheel drive', 'Engine': '4-Cylinder', 'Transmission': 'Continuously Variable Automatic', 'Fuel Type': 'Gasoline', 'MPG': '25 City / 31 Highway', 'Exterior': 'Magnetic Black', 'Interior': 'Charcoal', 'VIN': '5N1AT2MV7GC762338'}
{'Title': 'Used 2019 Nissan Sentra SV', 'Price': '13,939', 'Mileage': '31,705', 'Drive Type': '2 wheel drive - front', 'Engine': '4-Cylinder', 'Transmission': 'Continuously Variable Automatic', 'Fuel Type': 'Gasoline', 'MPG': '29 City / 37 Highway', 'Exterior': 'Brilliant Silver Metallic', 'Interior': 'Charcoal', 'VIN': '3N1AB7AP8KY299218'}
{'Title': 'Used 2017 Chevrolet Silverado 1500 LT', 'Price': '35,950', 'Mileage': '30,973', 'Drive Type': '4 wheel drive', 'Engine': '8-Cylinder', 'Transmission': '6-Speed Automatic', 'Fuel Type': 'Gasoline', 'MPG': '5\'10"', 'Exterior': '16 City / 22 Highway', 'Interior': 'Black', 'VIN': 'Jet Black'}
{'Title': 'Used 2017 GMC Acadi

{'Title': 'Used 2014 Toyota 4Runner Limited', 'Price': '27,590', 'Mileage': '88,403', 'Drive Type': '4 wheel drive', 'Engine': '6-Cylinder', 'Transmission': '5-Speed Automatic', 'Fuel Type': 'Gasoline', 'MPG': '17 City / 21 Highway', 'Exterior': 'Black', 'Interior': 'Beige', 'VIN': 'JTEBU5JR9E5151531'}
{'Title': 'Used 2017 Jeep Grand Cherokee 4WD Laredo', 'Price': '25,700', 'Mileage': '24,798', 'Drive Type': '4 wheel drive', 'Engine': '6-Cylinder', 'Transmission': '8-Speed Automatic', 'Fuel Type': 'Gasoline', 'MPG': '18 City / 25 Highway', 'Exterior': 'Diamond Black Crystal Pearlcoat', 'Interior': 'Black', 'VIN': '1C4RJFAG8HC749741'}
{'Title': 'Used 2016 Ford Fusion SE', 'Price': '15,391', 'Mileage': '47,703', 'Drive Type': '2 wheel drive - front', 'Engine': '4-Cylinder Turbo', 'Transmission': '6-Speed Automatic', 'Fuel Type': 'Gasoline', 'MPG': '22 City / 34 Highway', 'Exterior': 'Shadow Black', 'Interior': 'Ebony', 'VIN': '3FA6P0HD3GR169166'}
{'Title': 'Used 2020 Chevrolet Blazer AWD

In [186]:
!jupyter nbconvert --to script kbb-scraper.ipynb

[NbConvertApp] Converting notebook kbb-scraper.ipynb to script
[NbConvertApp] Writing 3521 bytes to kbb-scraper.py
