In [4]:
import requests
import pandas as pd
import re
import time
from bs4 import BeautifulSoup
from selenium import webdriver
import lxml
import cchardet
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

In [5]:
def get_urls():
    url = 'https://bringatrailer.com/models/'
    
    try:
        html = requests.get(url)
    except requests.exceptions.RequestException as e:  # This is the correct syntax
        print(e)
        pass
    
    # <a class="previous-listing-image-link" href="https://bringatrailer.com/acoma/">
    soup = BeautifulSoup(html.content, 'lxml')
    found_urls = soup.find_all("a", href=True, class_ = "previous-listing-image-link")
    final_urls = [url["href"] for url in found_urls]
    return final_urls

In [6]:
def get_vehicle_urls(model_html):
    '''
    Get all urls of previously auction vehicles pertaining to a specific model
    
    :input soup str: Beautiful soup string of html data
    
    :return urls list: List containing urls of previoulsy auction vehicles
    '''
    
    get_models = model_html.find_all("div", class_ = "blocks")
    
    urls = []
    for a in get_models[0].find_all("a", href=True):
        if a["href"] not in urls:
            urls.append(a["href"])
        else:
            pass
    print(f"Found {len(urls)} vehicle auction URL's.")
    return urls

In [7]:
def get_model_html(model_url_dir):
    '''
    Get html of overlying vehicle model webpage
    
    :input model_url_dir str: Subdirectory url of vehicle model
    
    :return model_html str: Html content of vehicle model overview page
    '''
    try:
        driver = webdriver.Safari()
    except:
        pass
        # Using Chrome
        # from selenium import webdriver
        # from selenium.webdriver.chrome.service import Service as ChromeService
        # from webdriver_manager.chrome import ChromeDriverManager
        # driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
        
        # Using IE
        # from selenium import webdriver
        # from selenium.webdriver.ie.service import Service as IEService
        # from webdriver_manager.microsoft import IEDriverManager
        # driver = webdriver.Ie(service=IEService(IEDriverManager().install()))
        
        # # Using Edge
        # from selenium import webdriver
        # from selenium.webdriver.edge.service import Service as EdgeService
        # from webdriver_manager.microsoft import EdgeChromiumDriverManager
        # driver = webdriver.Edge(service=EdgeService(EdgeChromiumDriverManager().install()))
    
    driver.get(model_url_dir)
    
    # Scroll to click 'show more' button to get all previously auctioned vehicle URL's
    try:
        # Used for main auctions page # button_selector = 'body > main > div:nth-child(6) > div > div > div > div.overlayable > div.auctions-footer.auctions-footer-previous > button'
        button_selector = 'body > main > div.container > div > div > div.filter-group > div.overlayable > div.auctions-footer.auctions-footer-previous > button'
        while(driver.find_element(By.CSS_SELECTOR, button_selector)):
            # try:
            WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, button_selector))).click()
            time.sleep(1) # Needed for page to load and "show more" button clicked
            #     WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, button_selector))).click()
            # except:
            #     WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, button_selector)))#.click()
            #     time.sleep(1) # Needed for page to load and "show more" button clicked
            #     WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, button_selector))).click()
            #     time.sleep(1)
            
    except Exception as e:
        pass
        # print(e)
        # print('No more vehicles to show')
    
    html = driver.page_source
    driver.quit()
    model_html = BeautifulSoup(html, 'lxml')
        
    return model_html

## Get List of Model URL's on BAT and Save to CSV

In [9]:
model_urls = get_urls()
print(f'Found {len(model_urls)} total model directories')

model_xls_fp = r'/Users/andyczeropski/Desktop/BAT_Analytics/v2/urls/models_urls/'
pd.DataFrame(model_urls, columns=['url']).to_csv(model_xls_fp + "model_urls_final.csv", index=True)
print('Models saved to .csv')

Found 935 total model directories
Models saved to .csv


## Using Selenium, Go To Each Model Page and Get All Vehicle URL's

In [10]:
vehicle_urls = pd.read_csv(r"/Users/andyczeropski/Desktop/BAT_Analytics/v2/urls/makes_urls/vehicle_urls.csv")['URL'].to_list()
model_urls = pd.read_csv(r"/Users/andyczeropski/Desktop/BAT_Analytics/v2/urls/models_urls/model_urls_final.csv")['url'].to_list()
model_url_errors = []

In [12]:
# Iterate through every model's directory page and collect all vehicle urls
for model_url in model_urls:
    try:
        print(f'Searching for {model_url}..')
        soup = get_model_html(model_url)
        url_list = get_vehicle_urls(soup)
        for url in url_list:
            if url not in vehicle_urls:
                vehicle_urls.append(url)
                
    except:
        model_url_errors.append(model_url)
        print(f'{model_url} not collected due to error')

print(vehicle_urls[:5])
print(model_url_errors[:5])

Searching for https://bringatrailer.com/ac/..
Found 29 vehicle auction URL's.
Searching for https://bringatrailer.com/acoma/..
Found 3 vehicle auction URL's.
Searching for https://bringatrailer.com/acura/integra/..
Found 82 vehicle auction URL's.
Searching for https://bringatrailer.com/acura/integra-type-r/..
Found 116 vehicle auction URL's.
Searching for https://bringatrailer.com/acura/legend/..
Found 29 vehicle auction URL's.
Searching for https://bringatrailer.com/acura/nsx-na1-na2/..
Found 402 vehicle auction URL's.
Searching for https://bringatrailer.com/acura/nsx-nc1/..
Found 21 vehicle auction URL's.
Searching for https://bringatrailer.com/acura/rsx/..
Found 26 vehicle auction URL's.
Searching for https://bringatrailer.com/alfa-romeo/105-sedan/..
Found 90 vehicle auction URL's.
Searching for https://bringatrailer.com/alfa-romeo/gtv/..
Found 386 vehicle auction URL's.
Searching for https://bringatrailer.com/alfa-romeo/spider/..
Found 520 vehicle auction URL's.
Searching for https

In [17]:
print(f'Found {len(vehicle_urls)} total vehicle urls.')

vehicle_xls_fp = r'/Users/andyczeropski/Desktop/BAT_Analytics/v2/urls/makes_urls/'
pd.DataFrame(vehicle_urls, columns=['url']).to_csv(vehicle_xls_fp + "vehicle_urls_final.csv", index=True)
print('Vehicle urls saved to vehicle_urls_final.csv')

Found 90753 total vehicle urls.
Vehicle urls saved to vehicle_urls_final.csv


## Testing Gathering Model Info by Reading Title Info

In [36]:
def get_listing_post_title(vehicle_data_soup, make, model):
    '''
    Get the title of the listing, to extract extra model details
    
    :input vehicle_data_soup str: Vehicle html data loaded by BeautifulSoup

    :return post_title str: Vehicle make ie. "Porsche 911 Carrera 4S Coupe 6-Speed"
    '''
    try:
        post_title_soup = vehicle_data_soup.find("h1", class_ = 'post-title').text
        post_title = ""

        for idx, word in enumerate(post_title_soup.split()):
            if word.isdigit():
                post_title = " ".join(post_title_soup.split()[idx+1:]).lower()
                break
                
        post_title = [post_title.replace(s, '') for s in make.split()][0]
        post_title = [post_title.replace(s, '') for s in model.split()][0]
        
    except:
        post_title = ""

    return post_title.strip(' ')

In [21]:
def get_make_and_model(vehicle_data_soup):
    '''
    Get make and model from vehicle html data
    
    :input vehicle_data_soup str: Vehicle html data loaded by BeautifulSoup

    :return vehicle_make str: Vehicle make ie. Honda
    :return vehicle_model str: Vehicle model ie. Accord
    '''
    try:
        vehicle_make, vehicle_model = [s.text for s in vehicle_data_soup.find_all("button", class_ = 'group-title')][0:2]
        vehicle_make = vehicle_make.replace('Make', '').strip(' ')
        vehicle_model = vehicle_model.replace('Model', '').replace(vehicle_make, '').strip(' ')
    except:
        vehicle_make = "BAD"
        vehicle_model = "URL"

    return vehicle_make.lower(), vehicle_model.lower()

In [37]:
url = r"https://bringatrailer.com/listing/2019-ford-mustang-shelby-gt350r-39/"
html = requests.get(url)
vehicle_data_soup = BeautifulSoup(html.content, 'lxml')
make, model = get_make_and_model(vehicle_data_soup)
print(make, model)

ford mustang s550


In [38]:
title = get_listing_post_title(vehicle_data_soup, make, model)
title

'shelby gt350r'

In [39]:
urls = pd.read_csv(r'/Users/andyczeropski/Desktop/BAT_Analytics/v2/urls/makes_urls/vehicle_urls_final.csv')['url']

In [40]:
for url in urls[32100:32110]:
    html = requests.get(url)
    vehicle_data_soup = BeautifulSoup(html.content, 'lxml')
    make, model = get_make_and_model(vehicle_data_soup)
    title = get_listing_post_title(vehicle_data_soup, make, model)
    print('Make:', make)
    print('Model:', model)
    print('Model2:', title)
    print(url)


Make: ford
Model: mustang sn95 1994-2004
Model2: cobra r
https://bringatrailer.com/listing/2000-ford-mustang-cobra-r-3/
Make: ford
Model: mustang sn95 1994-2004
Model2: svt cobra convertible
https://bringatrailer.com/listing/2003-ford-mustang-svt-cobra-26/
Make: ford
Model: mustang sn95 1994-2004
Model2: svt cobra
https://bringatrailer.com/listing/2003-ford-mustang-svt-cobra-24/
Make: ford
Model: mustang sn95 1994-2004
Model2: svt cobra convertible
https://bringatrailer.com/listing/2004-ford-mustang-20/
Make: ford
Model: mustang sn95 1994-2004
Model2: svt cobra convertible
https://bringatrailer.com/listing/2003-ford-mustang-svt-cobra-23/
Make: ford
Model: mustang sn95 1994-2004
Model2: svt cobra
https://bringatrailer.com/listing/2004-ford-mustang-23/
Make: ford
Model: mustang sn95 1994-2004
Model2: gt race car
https://bringatrailer.com/listing/1999-ford-mustang-8/
Make: ford
Model: mustang sn95 1994-2004
Model2: svt cobra
https://bringatrailer.com/listing/2003-ford-mustang-svt-cobra-22