In [1]:
# Importing libraries
import csv
from bs4 import BeautifulSoup
from selenium import webdriver

In [2]:
# Startup webdriver
driver = webdriver.Chrome()

In [3]:
# Connecting and Pulling search result
url = 'https://www.amazon.com/'
driver.get(url)

In [4]:
def get_url(search_term):
    """"Generate a url from term"""
    template = 'https://www.amazon.com/s?k={}&ref=nb_sb_noss_2'
    search_term = search_term.replace(' ','+')
    return template.format(search_term)

In [5]:
url = get_url('mouse')
print(url)

https://www.amazon.com/s?k=mouse&ref=nb_sb_noss_2


In [6]:
driver.get(url)

In [7]:
soup = BeautifulSoup(driver.page_source, 'html.parser')
results = soup.find_all('div', {'data-component-type': 's-search-result'})

In [8]:
# Initializing the records list outside the loop
record = []

In [9]:
len(results)

22

In [10]:
# Pulling description
item = results[0]
atag = item.h2.a
atag.text.strip()

'Amazon Basics Wireless Computer Mouse with USB Nano Receiver - Black'

In [11]:
description = atag.text.strip()

In [12]:
# Pulling URL
atag.get('href')

'/sspa/click?ie=UTF8&spc=MTo4NTk1MTI0OTIzMDI1NTk1OjE2OTI0NDY5MTA6c3BfYXRmOjIwMDAxMDUxMDcyNTkzMTo6MDo6&url=%2FAmazonBasics-Wireless-Computer-Mouse-Receiver%2Fdp%2FB005EJH6Z4%2Fref%3Dsr_1_1_ffob_sspa%3Fkeywords%3Dmouse%26qid%3D1692446910%26sr%3D8-1-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9hdGY%26psc%3D1'

In [13]:
url = 'https://www.amazon.com' + atag.get('href')

In [14]:
# Pulling price
price_parent = item.find('span', 'a-price')
price_parent.find('span', 'a-offscreen')

<span class="a-offscreen">$9.99</span>

In [15]:
price = price_parent.find('span', 'a-offscreen').text

In [16]:
# Pulling rating
item.i.text

'4.5 out of 5 stars'

In [17]:
rating = item.i.text

In [18]:
# Timastamp to show when data was extracted
import datetime

today =datetime.date.today()

print(today)

2023-08-19


## Combining all columns and extracting multiple data

In [19]:
def get_url(search_term):
    """Generate a url from term"""
    template = 'https://www.amazon.com/s?k={}&ref=nb_sb_noss_2'
    search_term = search_term.replace(' ','+')
    
    # add term queryto url
    url = template.format(search_term)
    
    # add page query placeholder
    url += '&page{}'
    
    return url

def extract_record(item):
    """Extract and return data from a single record"""

    # description and url
    atag = item.h2.a
    description = atag.text.strip()
    url = 'https://www.amazon.com' + atag.get('href')

    # price
    price_parent = item.find('span', 'a-price')
    if price_parent:
        price = price_parent.find('span', 'a-offscreen').text
    else:
        price = 'N/A'

    # rank and timestamp
    rating = item.i.text
    
    today = datetime.date.today()
    
    
        
    result =(description, price, rating, today, url)

    return result



        

In [20]:
records = []

In [21]:
 def main(search_term):
    """Run main program routine"""
    # startup the webdriver
    driver = webdriver.Chrome()
    
 
    url = get_url(search_term)
    
    for page in range(1, 23):
        driver.get(url.format(page))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        results = soup.find_all('div', {'data-component-type': 's-search-result'})
    for item in results:
            record = extract_record(item)
            if record:
                records.append(record)
                
    driver.close()
    
    # Saving data to csv file
    with open('Amazonwebscrapresult.csv','w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Description', 'Price', 'Rating', 'Time','Url'])
        writer.writerows(records)

In [22]:
main('mouse')

In [23]:
# Viewing Results

import pandas as pd

df = pd.read_csv(r'C:\Users\unjoku\Amazonwebscrapresult.csv')

print(df)

                                          Description   Price  \
0   E-YOOSO Wireless Mouse, Computer Mouse 18 Mont...  $11.99   
1   Trueque Wireless Mouse for Laptop, 2.4GHz Ergo...  $11.99   
2   TECKNET Wireless Mouse, 2.4G Ergonomic Optical...   $9.59   
3   Razer DeathAdder Essential Gaming Mouse: 6400 ...  $24.99   
4   Logitech M705 Marathon Wireless Mouse, 2.4 GHz...  $34.80   
5   Logitech MX Master 2S Wireless Mouse â€“ Use on ...  $66.99   
6   TedGem Wireless Mouse for Laptop, 2.4GHz Porta...   $8.99   
7   Logitech M185 Wireless Mouse, 2.4GHz with USB ...  $13.92   
8   Glorious Gaming - Model O Wireless RGB Mouse w...  $79.99   
9   Lizsword Wired Gaming Mouse, PC Gaming Mice [B...  $14.98   
10  Logitech MX Master 3S - Wireless Performance M...  $91.98   
11  Wireless Keyboard Mouse Combo, RaceGT Energy S...  $19.99   
12  Vssoplor Wireless Mouse, 2.4G Slim Portable Co...  $10.97   
13  HOTWEEMS Wireless D-09 Computer Mouse USB Cord...   $7.89   
14  TECKNET Bluetooth W