## To Do
1. Create a Load from csv function
2. Create algo that will pull any data newer than latest entry in csv
3. Deal with errored data
4. Figure out how to do pull new data either daily or weekly - using flask perhaps?
5. Use Plotly Dash to create web app to analyze data

In [46]:
'''
Program to pull auction data from BringaTrailer.com

Created by Tropskee on 10/29/2022
Est. Completion Time: 8 hours
'''

import requests
import re
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager

# base_url = "https://bringatrailer.com/"
subaru_models = ["subaru/wrx-sti/"]

def get_make_and_model(make_and_model):
    '''
    Accepts URL subdirectory and returns make and model of vehicle
    '''
    split_make_and_model = make_and_model.split('/')
    return split_make_and_model[0], split_make_and_model[1]
    
def get_url_of_all_models():
    url_all_models = 'https://bringatrailer.com/models/'
    
    try:
        html = requests.get(url_all_models)
    except requests.exceptions.RequestException as e:  # This is the correct syntax
        print(e)
        print('Models html data not acquired')
        pass
    
    # <a class="previous-listing-image-link" href="https://bringatrailer.com/acoma/">
    found_model_urls = html.find_all("href", class_ = "previous-listing-image-link")
    print(found_model_urls)
    return found_model_urls

    
def get_model_html(model_subd_url):
    '''
    Get html of overlying vehicle model webpage
    
    :input model_subd_url str: Subdirectory url of vehicle model
    
    :return model_html str: Html content of vehicle model overview page
    '''
    base_url = "https://bringatrailer.com/"
    try:
        driver = webdriver.Safari()
    except:
        # Using Chrome
        # from selenium import webdriver
        # from selenium.webdriver.chrome.service import Service as ChromeService
        # from webdriver_manager.chrome import ChromeDriverManager
        # driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
        
        # Using IE
        # from selenium import webdriver
        # from selenium.webdriver.ie.service import Service as IEService
        # from webdriver_manager.microsoft import IEDriverManager
        # driver = webdriver.Ie(service=IEService(IEDriverManager().install()))
        
        # Using Edge
        from selenium import webdriver
        from selenium.webdriver.edge.service import Service as EdgeService
        from webdriver_manager.microsoft import EdgeChromiumDriverManager
        driver = webdriver.Edge(service=EdgeService(EdgeChromiumDriverManager().install()))
    
    driver.get(base_url + model_subd_url)
    
    # Scroll to click 'show more' button to get all previously auctioned vehicle URL's
    try:
        button_selector = 'body > main > div:nth-child(6) > div > div > div > div.overlayable > div.auctions-footer.auctions-footer-previous > button'
        # button_selector = 'body > main > div.container > div > div > div.filter-group > div.overlayable > div.auctions-footer.auctions-footer-previous > button'
        while(driver.find_element(By.CSS_SELECTOR, button_selector)):
            try:
                WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, button_selector)))#.click()
                time.sleep(1) # Needed for page to load and "show more" button clicked
                WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, button_selector))).click()
            except:
                WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, button_selector)))#.click()
                time.sleep(1) # Needed for page to load and "show more" button clicked
                WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, button_selector))).click()
                time.sleep(1)
            
    except Exception as e:
        print(e)
        print('No more vehicles to show')
    
    html = driver.page_source
    driver.quit()
    model_html = BeautifulSoup(html)
    # print(model_html)
        
    return model_html

def get_vehicle_html(model_url):
    '''
    Get page html of (1) vehicle's auction data
    '''
    try:
        html = requests.get(model_url)
    except requests.exceptions.RequestException as e:  # This is the correct syntax
        print(e)
        print('Model HTML data not acquired')
        pass
        
    # Pull html data
    soup = BeautifulSoup(html.content, 'html.parser')
    
    return soup

def get_model_urls(model_html):
    '''
    Get all urls of previously auction vehicles pertaining to a specific model
    
    :input soup str: Beautiful soup string of html data
    
    :return urls list: List containing urls of previoulsy auction vehicles
    '''
    
    get_models = model_html.find_all("div", class_ = "blocks")
    
    urls = []
    for a in get_models[0].find_all("a", href=True):
        if a["href"] not in urls:
            urls.append(a["href"])
        else:
            pass
    print(f"Found {len(urls)} vehicle auction URL's.")
    return urls


# def get_sale_date(soup):
#     '''
#     Get vehicle auction date
#     '''
#     # date_html = soup.find_all('span', class_ = "data-value")
#     date_html = soup.find_all('span', class_ = "date")
#     # print(date_html[0].text.split()[1])
#     sold_date = date_html[0].text.split()[1]
    
#     return sold_date

def get_auction_result(soup):
    '''
    Get auction result data ex. "Sold for $22,250 on 10/27/22"
    '''
    # for auction_result in soup.find("span", class_ = "data-label"):
    auction_result = soup.find("span", class_ = 'info-value')
    # print(auction_result.text)    
    return auction_result.text
    

# def get_price(soup, sold_bool):
#     '''
#     Get vehicle sale price or max bid if not sold
#     '''
#     if sold_bool:
#         for price in soup.find(class_ = "data-value price"):
#             car_price_str = re.findall('[0-9]+,[0-9]+', price)
#             return int(car_price_str[0].replace(",",""))
#     else:
#         for price in soup.find(class_ = "data-value"):
#             car_price_str = re.findall('[0-9]+,[0-9]+', price)
#             return int(car_price_str[0].replace(",",""))


def get_listing_details(soup):
    '''
    Get vehicle listing details - vin, miles, etc.
    '''
    
    listing_details = soup.find_all("div", class_="item")
    return listing_details

def get_model_year(soup):
    '''
    Get vehicle model year from html
    '''
    model_year_text = soup.find("h1", class_ = 'post-title').text
    model_year = re.findall("(\d{4})", model_year_text)
    return model_year
    

# def get_vin(soup):
#     '''
#     Get vehicle VIN
#     '''
#     # Get VIN
#     for vin in soup.find("ul", class_="listing-essentials-items").find_all("li")[3]:
#         return vin
     
    
# def get_miles(soup):
#     '''
#     Get vehicle miles
#     '''
#     for miles in soup.find("ul",class_="listing-essentials-items").find_all("li")[4]:
#         return miles
            

# def main():
#     soup = get_html(models[0])
#     model_urls = get_model_urls(soup)
#     print(model_urls)
    
# main()

In [47]:
get_url_of_all_models()

HTTPSConnectionPool(host='bringatrailer.com', port=443): Max retries exceeded with url: /models/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1131)')))
Models html data not acquired


UnboundLocalError: local variable 'html' referenced before assignment

In [40]:
auction_results_url = "auctions/results/"
soup = get_model_html(auction_results_url)
model_urls = get_model_urls(soup)


# # Scroll to click 'show more' button to get all previously auctioned vehicle URL's
# btn = dr.find_element_by_css_selector("button.load-more-button")
# btn.click()

Message: 
Stacktrace:
Backtrace:
	Microsoft::Applications::Events::EventProperties::unpack [0x00F5E7A3+19811]
	Microsoft::Applications::Events::ISemanticContext::SetTicket [0x00E50E61+890497]
	Microsoft::Applications::Events::ILogConfiguration::operator* [0x00CADAAE+4190]
	Microsoft::Applications::Events::GUID_t::GUID_t [0x00CD87DF+99727]
	Microsoft::Applications::Events::GUID_t::GUID_t [0x00CD8981+100145]
	Microsoft::Applications::Events::GUID_t::GUID_t [0x00CFF792+259394]
	Microsoft::Applications::Events::GUID_t::GUID_t [0x00CED514+185028]
	Microsoft::Applications::Events::GUID_t::GUID_t [0x00CD11DB+69515]
	Microsoft::Applications::Events::GUID_t::GUID_t [0x00CFE051+253441]
	Microsoft::Applications::Events::GUID_t::GUID_t [0x00CED246+184310]
	Microsoft::Applications::Events::GUID_t::GUID_t [0x00CD0594+66372]
	Microsoft::Applications::Events::GUID_t::GUID_t [0x00CCFE3C+64492]
	Microsoft::Applications::Events::GUID_t::GUID_t [0x00CD0BEF+67999]
	Microsoft::Applications::Events::ILogMana

Found 164 vehicle auction URL's.


In [114]:
import pandas as pd

pd_db = pd.DataFrame()
# Pandas DF column names
COLUMNS = ['Make', 'Model', 'Year', 'Kilometers', 'Miles', 'Mileage Notes', 'Sale_Status', 'Final Bid Price', 'Auction Date', 'VIN', 'Details', 'URL']

In [115]:
# For logging url's that resulted in an error
error_urls = []

# Loop through all urls for a specific model, get listing data, and update db
for idx, model_url in enumerate(model_urls[:]):
    try:
        # Initiliaze pandas dataframe variables to None
        vehicle_make, vehicle_model, model_year, vehicle_kilometers, vehicle_mileage, vehicle_mileage_notes, sale_status, sale_price, sale_date, vehicle_vin, joined_results = [None] * 11

        # Initialize auction result variables to None
        vehicle_data_soup, auction_result_str, listing_details_html, listing_details = [None] * 4

        # Get vehicle html
        vehicle_data_soup = get_vehicle_html(model_url)

        # Parse html and get auction data
        auction_result_str = get_auction_result(vehicle_data_soup)

        # Get Make & Model
        vehicle_make, vehicle_model = [s.text for s in vehicle_data_soup.find_all("button", class_ = 'group-title')][0:2]
        vehicle_make = vehicle_make.replace('Make', '').strip(' ')
        vehicle_model = vehicle_model.replace('Model', '').replace(vehicle_make, '').strip(' ')

        # Get listing details - vin, miles, etc. - second entry
        listing_details_html = get_listing_details(vehicle_data_soup)
        # Find the listing_details using keyword "Listing Details"
        for detail in listing_details_html:
            if detail.find("strong") and "Listing Details" in detail.find("strong"):
                listing_details = detail
                break

        # Extract details from html "li"
        results = None
        results = [detail.text for detail in listing_details.find_all('li')]
        # Replace any hyphens in listing details with spaces
        # results = [detail.replace("-"," ") for detail in results]

        # Get vehicle mileage, normally second entry in "results"
        is_mileage_units = True
        mileage_words = ['Miles', 'miles', 'Mile', 'mile'] 
        kilometer_words = ['Kilometers', 'kilometers', 'Kilometer', 'kilometer', 'KM', 'km']
        for result in results:
            result = result.replace("-"," ")
            if any(word in result for word in kilometer_words):
                vehicle_mileage_notes = result
                is_mileage_units = False
                break
            elif any(word in result for word in mileage_words):
                vehicle_mileage_notes = result
                break

        # Extract Mileage figure from vehicle_mileage_notes
        if vehicle_mileage_notes is None:
            vehicle_mileage = None
        else:
            vehicle_mileage = re.findall('[0-9]+,[0-9]+', vehicle_mileage_notes)
            if len(vehicle_mileage) < 1: # If no match is found, mileage must contain 'k' at end i.e., 47k miles
                vehicle_mileage = re.findall('[0-9]+[kK]', vehicle_mileage_notes)
                if len(vehicle_mileage) >= 1: # If match is found, strip k from end
                    vehicle_mileage = [vehicle_mileage[0].strip('k').strip('K') + ',000']
            if len(vehicle_mileage) < 1: # Still no match found, try mileage < 1,000
                vehicle_mileage = re.findall('[0-9]+', vehicle_mileage_notes)
            if vehicle_mileage == []:
                vehicle_mileage = None
        vehicle_mileage = vehicle_mileage if type(vehicle_mileage) is not list else vehicle_mileage[0]

        # Check if units are in miles or km and make adjustments if needed
        if vehicle_mileage is not None:
            vehicle_mileage = int(vehicle_mileage.replace(',',''))
            if is_mileage_units: # units are mileage
                vehicle_kilometers = int(1.60934 * vehicle_mileage)
            else: # units are km
                vehicle_kilometers = vehicle_mileage
                vehicle_mileage = int(0.621371 * vehicle_mileage)


        # print(mileage_value)
        # vehicle_mileage = None if len(mileage_value) < 1 else mileage_value


        # Get vehicle model year
        model_year = get_model_year(vehicle_data_soup)

        # Check sale status - i.e., sold or not
        sale_status = "Not Sold"
        if "Sold" in auction_result_str or "sold" in auction_result_str:
            sale_status = "Sold"

        # Get vehicle sale date
        sale_date = auction_result_str.split()[-1]

        # Get vehicle sale price
        sale_price = int(re.findall('[0-9]+,[0-9]+', auction_result_str)[0].replace(",",""))

        # Get vehicle vin, first entry in "results"
        vehicle_vin = results[0].split()[-1]

        # Combine results into 1 string
        joined_results = " ,".join(results)

        # Create pd series for ingestion into pd_db
        pd_series = pd.Series([vehicle_make.upper(), vehicle_model.upper(), model_year[0], vehicle_kilometers, vehicle_mileage, vehicle_mileage_notes, sale_status, sale_price, sale_date, vehicle_vin, joined_results, model_url])

        # Append DataFrame - make, model, year, mileage, sale_status, price, date, vin, other_details
        pd_db = pd_db.append(pd_series, ignore_index=True)
        print(f'---Completed {idx+1} out of {len(model_urls)} vehicles---')
    
    except Exception as e:
        error_urls.append(model_url)

        
pd_db.reset_index()
pd_db.columns = COLUMNS
pd_db.set_index('Auction Date', inplace=True)
pd_db.head()



---Completed 1 out of 192 vehicles---
---Completed 2 out of 192 vehicles---
---Completed 3 out of 192 vehicles---
---Completed 4 out of 192 vehicles---
---Completed 5 out of 192 vehicles---
---Completed 6 out of 192 vehicles---
---Completed 7 out of 192 vehicles---
---Completed 8 out of 192 vehicles---
---Completed 9 out of 192 vehicles---
---Completed 10 out of 192 vehicles---
---Completed 11 out of 192 vehicles---
---Completed 12 out of 192 vehicles---
---Completed 13 out of 192 vehicles---
---Completed 14 out of 192 vehicles---
---Completed 15 out of 192 vehicles---
---Completed 16 out of 192 vehicles---
---Completed 17 out of 192 vehicles---
---Completed 18 out of 192 vehicles---
---Completed 19 out of 192 vehicles---
---Completed 20 out of 192 vehicles---
---Completed 22 out of 192 vehicles---
---Completed 23 out of 192 vehicles---
---Completed 24 out of 192 vehicles---
---Completed 25 out of 192 vehicles---
---Completed 26 out of 192 vehicles---
---Completed 27 out of 192 vehicle

Unnamed: 0_level_0,Make,Model,Year,Kilometers,Miles,Mileage Notes,Sale_Status,Final Bid Price,VIN,Details,URL
Auction Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
11/1/22,PORSCHE,911SC,1979,99779.0,62000.0,"62k Miles Shown, TMU",Sold,86000.0,9119200119,"Chassis: 9119200119 ,62k Miles Shown, TMU ,3.2...",https://bringatrailer.com/listing/1979-porsche...
11/1/22,MERCEDES-BENZ,W123,1978,292899.0,182000.0,182k Miles Shown,Sold,8000.0,12313012084731,"Chassis: 12313012084731 ,182k Miles Shown ,3.0...",https://bringatrailer.com/listing/1978-mercede...
11/1/22,CHEVROLET,C/K GMT400 (1988-2002),1995,74029.0,46000.0,46k Miles Shown,Sold,28000.0,2GCEK19K8S1287430,"Chassis: 2GCEK19K8S1287430 ,46k Miles Shown ,...",https://bringatrailer.com/listing/1995-chevrol...
11/1/22,BMW,Z3 M COUPE,2000,102997.0,64000.0,64k Miles,Sold,39800.0,WBSCM934XYLC61680,"Chassis: WBSCM934XYLC61680 ,One Owner ,64k Mil...",https://bringatrailer.com/listing/2000-bmw-m-c...
11/1/22,ROLLS-ROYCE,SY SILVER SHADOW & SILVER WRAITH II,1971,148059.0,92000.0,92k Miles Shown,Sold,9500.0,LRX11392,"Chassis: LRX11392 ,92k Miles Shown ,6.75-Liter...",https://bringatrailer.com/listing/1971-rolls-r...


In [116]:
len(pd_db)

180

In [117]:
pd_db.tail(20)

Unnamed: 0_level_0,Make,Model,Year,Kilometers,Miles,Mileage Notes,Sale_Status,Final Bid Price,VIN,Details,URL
Auction Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10/31/22,FERRARI,328,1987,33796,21000,21k Miles Shown,Not Sold,95000.0,ZFFXA20A5H0071899,"Chassis: ZFFXA20A5H0071899 ,21k Miles Shown ,3...",https://bringatrailer.com/listing/1987-ferrari...
10/31/22,FORD,F-SERIES 1992-1997,1996,133575,83000,83k Miles,Sold,25500.0,1FTEF14H1TNA15270,"Chassis: 1FTEF14H1TNA15270 ,83k Miles ,5.8-Lit...",https://bringatrailer.com/listing/1996-ford-f-...
10/31/22,PORSCHE,997 911,2006,59545,37000,37k Miles,Sold,44000.0,WP0AA29966S715747,"Chassis: WP0AA29966S715747 ,37k Miles ,3.6-Lit...",https://bringatrailer.com/listing/2006-porsche...
10/31/22,MERCEDES-BENZ,R230 SL,2003,46670,29000,29k Miles,Not Sold,20750.0,WDBSK75FX3F016828,"Chassis: WDBSK75FX3F016828 ,29k Miles ,5.0-Lit...",https://bringatrailer.com/listing/2003-mercede...
10/31/22,VOLVO,ERA2000S,2006,35405,22000,22k Miles,Sold,17000.0,YV1MC68296J004692,"Chassis: YV1MC68296J004692 ,22k Miles ,Turboch...",https://bringatrailer.com/listing/2006-volvo-c...
10/31/22,PORSCHE,991 TURBO,2014,40233,25000,25k Miles,Sold,124500.0,WP0AD2A96ES166486,"Chassis: WP0AD2A96ES166486 ,25k Miles ,Twin-Tu...",https://bringatrailer.com/listing/2014-porsche...
10/31/22,MERCEDES-BENZ,SLS AMG,2011,5632,3500,"3,500 Miles",Sold,236000.0,WDDRJ7HA0BA005048,"Chassis: WDDRJ7HA0BA005048 ,3,500 Miles ,6.2-L...",https://bringatrailer.com/listing/2011-mercede...
10/31/22,PORSCHE,LONGHOOD 911,1970,136793,85000,"85k Miles Shown, TMU",Sold,97000.0,9110121800,"Chassis: 9110121800 ,85k Miles Shown, TMU ,Rep...",https://bringatrailer.com/listing/1970-porsche...
10/31/22,BMW,E63/E64 M6,2009,38624,24000,24k Miles,Sold,63500.0,WBSEK93549CY80197,"Chassis: WBSEK93549CY80197 ,24k Miles ,5.0-Lit...",https://bringatrailer.com/listing/2009-bmw-m6-...
10/31/22,PORSCHE,911 CARRERA 3.2,1985,106216,66000,66k Miles Shown,Sold,55000.0,WP0EB0914FS170967,"Chassis: WP0EB0914FS170967 ,66k Miles Shown ,3...",https://bringatrailer.com/listing/1985-porsche...


In [120]:
from datetime import datetime
now = datetime.now()
# Month abbreviation, day and year
day = now.strftime("%b_%d_%Y_%H_%M_%S")

filepath = f'./Desktop/BAT_Analytics/BAT_Data/BAT_auction_prices_{day}.csv'
pd_db.to_csv(filepath, index=True)