# Amazon Web Scraper: Product Search

In [2]:
import csv # csv file reading and writing
from bs4 import BeautifulSoup # web scraping
from selenium import webdriver # web browser automation
import numpy as np
import pandas as pd

## Building the functions

### Function that generates an URL that searches a given keyword

In [2]:
def get_url(search_keyword):
    '''
    Generates an Amazon URL that performs search of the given keyword.
    Arguments:
        search_keyword: name of the item to be searched
    Returns:
        URL with search keyword
    '''
    
    template = 'https://www.amazon.com/s?k={}' # amazon url w/ search query placeholder
    search_keyword = search_keyword.replace(' ', '+')
    
    url = template.format(search_keyword) # adding the search keyword to the search query
    
    url += '&page={}' # adding page query placeholder
    
    return url

### Function that extracts data of a single product record

In [53]:
def extract_record(item):
    '''
    Extracts data from an Amazon product listing from a search result.
    Arguments:
        item: product listing
    Returns:
        Tuple containing the product listing details: description, price, 
        star rating, review count, and url
    '''
    
    atag = item.h2.a # atag of the product (contains description + url of product listing)
    description = atag.text.strip() # product description
    url = 'https://www.amazon.com' + atag.get('href') # product listing url
    
    try:
        price_parent = item.find('span', 'a-price') # price parent span
        price = price_parent.find('span', 'a-offscreen').text # price value
        
    except AttributeError:
        return # if the item has no price, don't include it
    
    try:
        star_rating = item.i.text # star rating: 'x out of 5 stars'
        #star_rating = star_rating.split()[0] # keeps the numerical value only

        review_count = item.find('span', 'a-size-base s-underline-text').text # number of reviews
        
    except AttributeError:
        star_rating = np.nan
        review_count = np.nan

    result = (description, price, star_rating, review_count, url)
    
    return result

### Function that extracts data of all the available product records

In [70]:
def extract_all_records(search_keyword):
    '''
    Extracts data from all the available Amazon product listings from a search result
    and saves it to a csv file.
    Arguments:
        search_keyword: name of the item to be searched
    '''
    
    driver = webdriver.Chrome() # starting up the webdriver
    
    url = get_url(search_keyword)
    
    records = []
    
    for page in range(1, 21):
        driver.get(url.format(page))
        scraper = BeautifulSoup(driver.page_source, 'html.parser') # BeautifulSoup object
        results = scraper.find_all('div', {'data-component-type' : 's-search-result'})
    
        for item in results:
            record = extract_record(item)
            if record:
                records.append(record)
                
    driver.close() # closing the webdriver
    
    # saving the data to a csv file
    path = 'C:/Users/sert2/Documents/Data Science/amazon-product-scraping/results.csv'
    with open(path, 'w', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['description', 'price', 'star_rating', 'review_count', 'url'])
        writer.writerows(records)

## Running the scraper

In [71]:
extract_all_records('mechanical keyboard')

## Analyzing the data

To analyze the data, first I'll load the csv file into a pandas dataframe:

In [3]:
df = pd.read_csv('results.csv', thousands=',')

Dimensions of the dataframe: (412 rows and 5 columns)

In [4]:
df.shape

(412, 5)

Taking a look at the dataframe:

In [5]:
df.head(5)

Unnamed: 0,description,price,star_rating,review_count,url
0,JJTechGiant Touchscreen Gaming Mechanical Keyb...,$330.65,4.4 out of 5 stars,9.0,https://www.amazon.com/gp/slredirect/picassoRe...
1,"Mechanical Gaming Keyboard,RGB 104 Keys Ultra-...",$32.99,4.6 out of 5 stars,291.0,https://www.amazon.com/gp/slredirect/picassoRe...
2,"Mechanical Gaming Keyboard,RGB 104 Keys Ultra-...",$32.99,4.6 out of 5 stars,291.0,https://www.amazon.com/Mechanical-Keyboard-Ult...
3,"Portable 60% Mechanical Gaming Keyboard, MageG...",$29.99,4.4 out of 5 stars,2005.0,https://www.amazon.com/Portable-Mechanical-Key...
4,Mechanical Keyboard RGB 104 Keys LED Backlit W...,$31.99,4.3 out of 5 stars,27.0,https://www.amazon.com/Mechanical-Customizable...


### Cleaning

In [6]:
df.dtypes

description      object
price            object
star_rating      object
review_count    float64
url              object
dtype: object

Formatting and converting `price`, `star_rating`, and `review_count` into numerical values: 

* `price`:

In [7]:
df['price'] = df['price'].str.lstrip('$') # removing the '$' sign

In [8]:
df['price'] = pd.to_numeric(df['price']) # converting to numeric type

* `star_rating`:

In [9]:
df['star_rating'] = df['star_rating'].str.split().str[0] # keeping the first part of the string only

In [10]:
df['star_rating'] = pd.to_numeric(df['star_rating']) # converting to numeric type

* `review_count`:

In [11]:
df['review_count'] = pd.to_numeric(df['review_count']) # converting to numeric type

Current data types:

In [12]:
df.dtypes

description      object
price           float64
star_rating     float64
review_count    float64
url              object
dtype: object

### Exploring and Visualizing 

Viewing some basic statistical details:

In [13]:
df.describe()

Unnamed: 0,price,star_rating,review_count
count,412.0,395.0,395.0
mean,62.885752,4.491646,1441.724051
std,45.309689,0.208466,4059.326107
min,12.99,3.5,1.0
25%,32.9,4.4,53.0
50%,49.99,4.5,207.0
75%,79.99,4.6,1031.5
max,330.65,5.0,42631.0


* What is the average price of the product?
* What is the best performing listing?
* Price vs star rating
* Price distribution

In [28]:
df[df['star_rating'] == df['star_rating'].max()]

Unnamed: 0,description,price,star_rating,review_count,url
51,"JIDOHUN KB-113 Mechanical Gaming Keyboard, Wir...",27.99,5.0,3.0,https://www.amazon.com/gp/slredirect/picassoRe...
159,"Wireless Typewriter Keyboard Mechanical TKL, 2...",79.99,5.0,1.0,https://www.amazon.com/Wireless-Typewriter-Key...
178,i-rocks K73M Mechanical Gaming Keyboard with M...,94.9,5.0,1.0,https://www.amazon.com/gp/slredirect/picassoRe...
181,Typewriter Style Mechanical Keyboard 87 Key Re...,29.99,5.0,11.0,https://www.amazon.com/Typewriter-Mechanical-K...
260,Newmen GM1000 96% Mechanical Gaming Keyboard W...,69.99,5.0,6.0,https://www.amazon.com/Newmen-Mechanical-Keybo...
271,"60% Mechanical Gaming Keyboard, 68 Keys Hot-Sw...",39.99,5.0,13.0,https://www.amazon.com/Mechanical-Keyboard-Hot...
274,Newmen GM335 Hot Swappable Mechanical Keyboard...,55.99,5.0,2.0,https://www.amazon.com/gp/slredirect/picassoRe...
306,Newmen GM1000 96% Mechanical Gaming Keyboard W...,59.99,5.0,6.0,https://www.amazon.com/gp/slredirect/picassoRe...
339,"WIZMAX, Keyboard WK2 Professional Wired Mechan...",130.99,5.0,2.0,https://www.amazon.com/gp/slredirect/picassoRe...
393,Erasem EK1001-SL Silent Mechanical Keyboard (S...,79.99,5.0,3.0,https://www.amazon.com/gp/slredirect/picassoRe...


In [29]:
df[df['star_rating'] == df['star_rating'].min()]

Unnamed: 0,description,price,star_rating,review_count,url
166,i-rocks K72MN Artisan Mechanical Keyboard - 10...,79.9,3.5,6.0,https://www.amazon.com/gp/slredirect/picassoRe...


In [30]:
df[df['review_count'] == df['review_count'].max()]

Unnamed: 0,description,price,star_rating,review_count,url
132,Redragon S101 Wired Gaming Keyboard and Mouse ...,46.99,4.6,42631.0,https://www.amazon.com/Redragon-S101-Keyboard-...


In [32]:
df[df['review_count'] > df['review_count'].quantile(.95)]

Unnamed: 0,description,price,star_rating,review_count,url
5,SteelSeries Apex 5 Hybrid Mechanical Gaming Ke...,79.99,4.7,8491.0,https://www.amazon.com/SteelSeries-Apex-Hybrid...
6,Redragon K552 Mechanical Gaming Keyboard RGB L...,38.99,4.5,37080.0,https://www.amazon.com/Redragon-K552-Mechanica...
14,havit Mechanical Keyboard and Mouse Combo RGB ...,59.99,4.5,11067.0,https://www.amazon.com/Mechanical-Keyboard-Key...
31,Logitech G915 Wireless Mechanical Gaming Keybo...,212.99,4.6,8564.0,https://www.amazon.com/Logitech-Wireless-Mecha...
35,RK ROYAL KLUDGE RK61 Wired 60% Mechanical Gami...,56.99,4.6,10890.0,https://www.amazon.com/RK-ROYAL-KLUDGE-Mechani...
41,"Gaming Keyboard and Mouse Combo, K1 LED Rainbo...",32.99,4.6,11709.0,https://www.amazon.com/gp/slredirect/picassoRe...
43,RK ROYAL KLUDGE RK61 Wireless 60% Triple Mode ...,52.99,4.5,9877.0,https://www.amazon.com/RK-ROYAL-KLUDGE-Ultra-C...
53,RK ROYAL KLUDGE RK61 2.4Ghz Wireless/Bluetooth...,54.99,4.5,7790.0,https://www.amazon.com/Mechanical-Keyboard-Wir...
54,Redragon K552 Mechanical Gaming Keyboard RGB L...,33.99,4.7,14036.0,https://www.amazon.com/Redragon-K552-RGB-Mecha...
69,Corsair K70 RGB PRO Wired Mechanical Gaming Ke...,169.99,4.7,9428.0,https://www.amazon.com/Corsair-Mechanical-Gami...


In [39]:
df.sort_values(by=['star_rating'], ascending=False).head(20)

Unnamed: 0,description,price,star_rating,review_count,url
181,Typewriter Style Mechanical Keyboard 87 Key Re...,29.99,5.0,11.0,https://www.amazon.com/Typewriter-Mechanical-K...
260,Newmen GM1000 96% Mechanical Gaming Keyboard W...,69.99,5.0,6.0,https://www.amazon.com/Newmen-Mechanical-Keybo...
159,"Wireless Typewriter Keyboard Mechanical TKL, 2...",79.99,5.0,1.0,https://www.amazon.com/Wireless-Typewriter-Key...
51,"JIDOHUN KB-113 Mechanical Gaming Keyboard, Wir...",27.99,5.0,3.0,https://www.amazon.com/gp/slredirect/picassoRe...
393,Erasem EK1001-SL Silent Mechanical Keyboard (S...,79.99,5.0,3.0,https://www.amazon.com/gp/slredirect/picassoRe...
339,"WIZMAX, Keyboard WK2 Professional Wired Mechan...",130.99,5.0,2.0,https://www.amazon.com/gp/slredirect/picassoRe...
306,Newmen GM1000 96% Mechanical Gaming Keyboard W...,59.99,5.0,6.0,https://www.amazon.com/gp/slredirect/picassoRe...
178,i-rocks K73M Mechanical Gaming Keyboard with M...,94.9,5.0,1.0,https://www.amazon.com/gp/slredirect/picassoRe...
271,"60% Mechanical Gaming Keyboard, 68 Keys Hot-Sw...",39.99,5.0,13.0,https://www.amazon.com/Mechanical-Keyboard-Hot...
407,"Akko Hot Swappable Mechanical Keyboard, 5108S ...",104.98,5.0,8.0,https://www.amazon.com/Akko-Swappable-Mechanic...


In [48]:
df.sort_values(by=['review_count'], ascending=False).head(30)

Unnamed: 0,description,price,star_rating,review_count,url
132,Redragon S101 Wired Gaming Keyboard and Mouse ...,46.99,4.6,42631.0,https://www.amazon.com/Redragon-S101-Keyboard-...
6,Redragon K552 Mechanical Gaming Keyboard RGB L...,38.99,4.5,37080.0,https://www.amazon.com/Redragon-K552-Mechanica...
303,"havit Gaming Keyboard and Mouse Combo, Backlit...",37.99,4.4,27113.0,https://www.amazon.com/HAVIT-Rainbow-Backlit-G...
148,Arteck 2.4G Wireless Keyboard Stainless Steel ...,27.99,4.5,25045.0,https://www.amazon.com/gp/slredirect/picassoRe...
297,NPET K10 Gaming Keyboard USB Wired Floating Ke...,19.99,4.5,16620.0,https://www.amazon.com/NPET-Floating-Keyboard-...
291,NPET K10 Gaming Keyboard USB Wired Floating Ke...,19.99,4.5,16620.0,https://www.amazon.com/NPET-Floating-Keyboard-...
54,Redragon K552 Mechanical Gaming Keyboard RGB L...,33.99,4.7,14036.0,https://www.amazon.com/Redragon-K552-RGB-Mecha...
140,HK GAMING GK61 Mechanical Gaming Keyboard - 61...,79.99,4.6,13170.0,https://www.amazon.com/GK61-Mechanical-Gaming-...
73,"Gaming Keyboard and Mouse Combo, K1 LED Rainbo...",29.99,4.6,11709.0,https://www.amazon.com/gp/slredirect/picassoRe...
41,"Gaming Keyboard and Mouse Combo, K1 LED Rainbo...",32.99,4.6,11709.0,https://www.amazon.com/gp/slredirect/picassoRe...


In [54]:
df[df['star_rating'] >= 4.5].sort_values(by=['review_count'], ascending=False).head(30)

Unnamed: 0,description,price,star_rating,review_count,url
132,Redragon S101 Wired Gaming Keyboard and Mouse ...,46.99,4.6,42631.0,https://www.amazon.com/Redragon-S101-Keyboard-...
6,Redragon K552 Mechanical Gaming Keyboard RGB L...,38.99,4.5,37080.0,https://www.amazon.com/Redragon-K552-Mechanica...
148,Arteck 2.4G Wireless Keyboard Stainless Steel ...,27.99,4.5,25045.0,https://www.amazon.com/gp/slredirect/picassoRe...
297,NPET K10 Gaming Keyboard USB Wired Floating Ke...,19.99,4.5,16620.0,https://www.amazon.com/NPET-Floating-Keyboard-...
291,NPET K10 Gaming Keyboard USB Wired Floating Ke...,19.99,4.5,16620.0,https://www.amazon.com/NPET-Floating-Keyboard-...
54,Redragon K552 Mechanical Gaming Keyboard RGB L...,33.99,4.7,14036.0,https://www.amazon.com/Redragon-K552-RGB-Mecha...
140,HK GAMING GK61 Mechanical Gaming Keyboard - 61...,79.99,4.6,13170.0,https://www.amazon.com/GK61-Mechanical-Gaming-...
73,"Gaming Keyboard and Mouse Combo, K1 LED Rainbo...",29.99,4.6,11709.0,https://www.amazon.com/gp/slredirect/picassoRe...
41,"Gaming Keyboard and Mouse Combo, K1 LED Rainbo...",32.99,4.6,11709.0,https://www.amazon.com/gp/slredirect/picassoRe...
14,havit Mechanical Keyboard and Mouse Combo RGB ...,59.99,4.5,11067.0,https://www.amazon.com/Mechanical-Keyboard-Key...
