# Selenium Tutorial -- Dev Sharma, Applied Analytics Club

Import libraries 

In [1]:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.keys import Keys
import os

In [2]:
# load our driver 
driver = webdriver.Chrome('driver/chromedriver')

Define the url and use selenium's in-built get function to go to the desired site

In [3]:
url = "http://www.amazon.com"
driver.get(url)

### Selecting an option out of a dropdown menu

In [4]:
options = driver.find_element_by_css_selector('#searchDropdownBox')
 
print(options.text)

All Departments
Audible Books & Originals
Alexa Skills
Amazon Devices
Amazon Fresh
Amazon Warehouse
Appliances
Apps & Games
Arts, Crafts & Sewing
Automotive Parts & Accessories
Baby
Beauty & Personal Care
Books
CDs & Vinyl
Cell Phones & Accessories
Clothing, Shoes & Jewelry
   Women
   Men
   Girls
   Boys
   Baby
 
Under $10
Collectibles & Fine Art
Computers
Courses
Credit and Payment Cards
Digital Music
Electronics
Garden & Outdoor
Gift Cards
Grocery & Gourmet Food
Handmade
Health, Household & Baby Care
Home & Business Services
Home & Kitchen
Industrial & Scientific
Just for Prime
Kindle Store
Luggage & Travel Gear
Luxury Beauty
Magazine Subscriptions
Movies & TV
Musical Instruments
Office Products
Pet Supplies
Prime Pantry
Prime Video
Software
Sports & Outdoors
Subscription Boxes
Tools & Home Improvement
Toys & Games
Vehicles
Video Games


In [5]:
selection = Select(options)
selection.select_by_visible_text('Home & Kitchen')

Search for any product type e.g. "photo frames"

In [6]:
SearchBar = driver.find_element_by_css_selector("#twotabsearchtextbox")
SearchBar.send_keys("Photo Frames")
SearchBar.send_keys(Keys.ENTER)

### Selecting multiple elements 

In [7]:
frames_desc = []
frames_desc_temp = driver.find_elements_by_css_selector(".s-line-clamp-4 .a-text-normal") # Note the "s" in elements denoting multiple elements


#Accounrint for duplicates
for i in range(len(frames_desc_temp)):
    if i % 2 != 0:
        frames_desc.append(frames_desc_temp[i])

print(len(frames_desc))
frames_desc[10].text

38


'AmazonBasics Photo Picture Frame - 4" x 6", Black, 2-Pack'

In [8]:
frames_whole_price = driver.find_elements_by_css_selector(".a-spacing-top-small .a-price:nth-child(1) .a-price-whole")
frames_whole_price[10].text

'9'

In [9]:
frames_frac_price = driver.find_elements_by_css_selector(".a-spacing-top-small .a-price:nth-child(1) .a-price-fraction")
frames_frac_price[10].text


'99'

In [10]:
type(frames_frac_price[10].text) # type of the element is string. we will need to convert this to int to derive the final price

str

#### Summing the whole and fraction prices

In [11]:
# Creating the function 
def price_compiler(whole,frac):
    try:
        final_price = int(whole) + (int(frac)/100)
    except: 
        final_price = None
    return final_price

In [12]:
frames_price = []

for i in range(len(frames_whole_price)):
    full_price = price_compiler( frames_whole_price[i].text , frames_frac_price[i].text )
    frames_price.append(full_price)
    
print(frames_price)

[9.84, 19.99, 19.95, 7.99, 9.99, 9.84, 19.99, None, None, 25.49, 9.99, 29.99, 13.99, 14.95, 30.99, 15.99, 38.99, 21.99, 13.99, 18.99, 33.99, 21.95, 32.99, 10.99, 24.95, 31.95, 16.95, 19.99, 35.99, 14.99, 37.87, 45.49, 27.49, 9.84, 27.95, 25.95, 7.99, 25.99, 38.99, 16.99, 29.99, None, None, None]


### Creating a dictionary for each item

In [13]:
frames = {}

for i in range(len(frames_desc)):
    frames[frames_desc[i].text[:20]] = frames_price[i]
    
print(frames)

{'AmazonBasics 8" x 10': 9.84, 'Hap Tim 5x7 Picture ': 19.99, 'Scholartree Wooden P': 19.95, 'RPJC 5x7 Picture Fra': 7.99, 'AmazonBasics Photo P': 9.99, 'Yome 11x14 Black Pic': 19.99, '': None, 'Giftgarden 8x10 Pict': 25.49, 'SONGMICS Picture Fra': 29.99, 'Emfogo 4x6 Picture F': 13.99, 'Americanflat 11x14 B': 14.95, '8x10 Black Picture F': 30.99, 'Love-KANKEI Wood Pic': 15.99, 'Golden State Art, Go': 38.99, 'BOICHEN 6 Pack 4x6 P': 21.99, 'RPJC 5x7 Soild Wood ': 13.99, 'The Display Guys LUX': 18.99, 'upsimples 5x7 Pictur': 33.99, 'Scholartree Wooden G': 21.95, 'Giftgarden Multi Pic': 32.99, 'Umbra Prisma Picture': 10.99, 'Collage Picture Fram': 24.95, 'Americanflat 10-Piec': 31.95, 'Americanflat 11x14 C': 16.95, 'MELANNCO Customizabl': 19.99, 'Homemaxs 12 Pack Pic': 35.99, 'Giftgarden 5 by 7 In': 14.99, 'Umbra Luna 9-Opening': 37.87, 'Gallery Perfect 7 Pi': 45.49, 'Egofine 8x10 Picture': 27.49, 'LaVie Home 5x7 Pictu': 27.95, 'LaVie Home 4x6 Pictu': 25.95, 'RPJC 4x6 Picture Fra': 7.99, '

In [14]:
len(frames_desc)

38

## Challenge: Use Selenium to scrape Ebay.com for a product of your choice 

In [15]:
# Hint: Don't worry about selecting the right product category

# Answer
#
#
#
#
#
#
#
#
#

In [16]:
url = "http://www.ebay.com"
driver.get(url)

SearchBar = driver.find_element_by_css_selector("#gh-ac")
SearchBar.send_keys("Photo Frames")
SearchBar.send_keys(Keys.ENTER)

frames_desc = driver.find_elements_by_css_selector(".s-item__title")
frames_prices = driver.find_elements_by_css_selector(".s-item__price")

ebay_frames = {}

for i in range(len(frames_desc)):
    ebay_frames[frames_desc[i].text[:20]] = frames_prices[i].text
print(ebay_frames)

{'4 11 23 26pcs Multi ': '$19.99 to $38.99', '7 PCS, Giftgarden 8x': '$23.91', 'Fridge Magnetic Pict': '$17.50', '7 Pack of 8x10 Pictu': '$25.64', 'Photo Frame Poster P': '$11.93', 'Format Picture Frame': '$23.99', 'NEW LISTINGFrames Un': '$15.00', '11 Pcs Photo Frame S': '$23.99', 'Photo Frame Set Of 6': '$23.49', 'Photo Frame Set Form': '$22.99 to $39.99', 'Clear Magnetic Photo': '$9.05', '3 Sizes 12 Piece Pic': '$22.82', 'Photo Booth Frame 2x': '$6.99', '4PCS Picture Photo W': '$19.99', '50 Photo Insert Note': '$19.01', '100 plastic Photo Bo': '$39.00', 'Icona Bay 8x10 Pictu': '$18.90', 'Craig Frames Economy': '$8.99 to $133.00', 'Stand Up Acrylic Pic': '$14.99', 'VonHaus 2x Standing ': '$9.99', 'NEW LISTING10Pcs Nov': '$5.49 to $5.99', 'Wood Photo Frame Pos': '$15.99', 'NEW LISTINGBlack Woo': '$25.00', 'A1 A2 A3 A4 A5 Pictu': '$3.13 to $44.03', 'Special Moments Silv': '$7.79', 'New 11 x 17 picture ': '$6.00', 'StoreSMART Full-Back': '$20.28', 'Aluminum Photo Poste': '$12.50 to $24.

## Bonus: Converting data into a pandas DataFrame

In [17]:
import pandas as pd

# Pandas take a special dictionary format to convert a dictionary into a dataframe. {"column name" : ["list of values]}

data = pd.DataFrame( {
    'item' : list(frames.keys()),
    'price' : list(frames.values())
})

print(data)

                    item  price
0   AmazonBasics 8" x 10   9.84
1   Hap Tim 5x7 Picture   19.99
2   Scholartree Wooden P  19.95
3   RPJC 5x7 Picture Fra   7.99
4   AmazonBasics Photo P   9.99
5   Yome 11x14 Black Pic  19.99
6                           NaN
7   Giftgarden 8x10 Pict  25.49
8   SONGMICS Picture Fra  29.99
9   Emfogo 4x6 Picture F  13.99
10  Americanflat 11x14 B  14.95
11  8x10 Black Picture F  30.99
12  Love-KANKEI Wood Pic  15.99
13  Golden State Art, Go  38.99
14  BOICHEN 6 Pack 4x6 P  21.99
15  RPJC 5x7 Soild Wood   13.99
16  The Display Guys LUX  18.99
17  upsimples 5x7 Pictur  33.99
18  Scholartree Wooden G  21.95
19  Giftgarden Multi Pic  32.99
20  Umbra Prisma Picture  10.99
21  Collage Picture Fram  24.95
22  Americanflat 10-Piec  31.95
23  Americanflat 11x14 C  16.95
24  MELANNCO Customizabl  19.99
25  Homemaxs 12 Pack Pic  35.99
26  Giftgarden 5 by 7 In  14.99
27  Umbra Luna 9-Opening  37.87
28  Gallery Perfect 7 Pi  45.49
29  Egofine 8x10 Picture  27.49
30  LaVi