# Amazon price scraper

In [1]:
import csv
from bs4 import BeautifulSoup
from msedge.selenium_tools import Edge, EdgeOptions

In [2]:
def get_url(search_text):
    """Generate a url from search text"""
    template = 'https://www.amazon.com/s?k={}&ref=nb_sb_noss_1'
    search_term = search_text.replace(' ', '+')
    
    # add term query to url
    url = template.format(search_term)
    
    # add page query placeholder
    url += '&page{}'
        
    return url

In [3]:
def extract_record(item):
    """Extract and return data from a single record"""
    
    # description and url
    atag = item.h2.a
    description = atag.text.strip()
    url = 'https://www.amazon.com' + atag.get('href')
    try:
        # product price
        price_parent = item.find('span', 'a-price')
        price = price_parent.find('span', 'a-offscreen').text
    except AttributeError:
        return
    
    try:
        # rating and review count
        rating = item.i.text
        review_count = item.find('span', {'class': 'a-size-base', 'dir': 'auto'}).text
    except AttributeError:
        rating = ''
        review_count = ''
        
    result = (description, price, rating, review_count, url)
    
    return result

In [None]:
def main(search_term):
    """Run main program routine"""
    
    # startup the webdriver
    options = EdgeOptions()
    options.use_chromium = True
    driver = Edge(options=options)
    
    records = []
    url = get_url(search_term)
    
    for page in range(1, 21):
        driver.get(url.format(page))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        results = soup.find_all('div', {'data-component-type': 's-search-result'})
        for item in results:
            record = extract_record(item)
            if record:
                records.append(record)
    
    driver.close()
    
    # save data to csv file
    with open('results.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Description', 'Price', 'Rating', 'ReviewCount', 'Url'])
        writer.writerows(records)

In [None]:
# run program
main('ultrawide monitor')

In [4]:
options = EdgeOptions()
options.use_chromium = True
driver = Edge(options=options)

In [5]:
driver.get('https://www.amazon.com/GIGABYTE-Radeon-Graphics-128-Bit-GV-R55XTOC-8GD/dp/B082BXG6Z5/ref=sr_1_4?dchild=1&keywords=amd+video+card&qid=1603932104&sr=8-4')

In [21]:
# get the buy box table and rows
table = driver.find_element_by_class_name('buybox-tabular-container')
table_rows = table.find_elements_by_tag_name('tr')

# ships from
ships_from = table_rows[0].find_elements_by_tag_name('td')[1].text

# sold by
sold_by = table_rows[1].find_elements_by_tag_name('td')[1].text

'Amazon.com'