# Amazon price scraper

In [1]:
import csv
from bs4 import BeautifulSoup
from msedge.selenium_tools import Edge, EdgeOptions

In [2]:
def get_url(search_text):
    """Generate a url from search text"""
    template = "https://www.amazon.com/s?k={}&ref=nb_sb_noss_2"
    search_term = search_text.replace(' ', '+')
    # add term query
    url = template.format(search_term)
	# add page query placeholder
    url += '&page={}'
    return url

In [3]:
def extract_record(item):
    """Extract and return data from a single record"""
    # description and url
    atag = item.h2.a
    description = atag.text.strip()
    url = 'https://www.amazon.com' + atag.get('href')

    # product price
    try:
        price_parent = item.find('span', 'a-price')
        price = price_parent.find('span', 'a-offscreen').text
    except AttributeError:
        return
    
    # rating and review
    try:
        rating = item.i.text
        review_count = item.find('span', {'class': 'a-size-base', 'dir': 'auto'}).text
    except AttributeError:
        rating = ''
        review_count = ''
    result = (description, price, rating, review_count, url)
    return result

In [4]:
# setup driver
options = EdgeOptions()
options.use_chromium = True
driver = Edge(options=options)

# list to capture results
records = []

# general url template
url = get_url('widescreen monitor')

for page in range(1, 21):
    driver.get(url.format(page))
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    results = soup.find_all('div', {'data-component-type': 's-search-result'})
    for item in results:
        record = extract_record(item)
        if record:
            records.append(record)

# close the webdriver
driver.close()

# save the results to file
with open('results.csv', 'w', newline='', encoding='utf-8') as f:
    header = ['Description', 'Price', 'Rating', 'ReviewCount', 'Url']
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerows(records)            