In [108]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
HEADERS = ({'User-Agent':
			'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
			AppleWebKit/537.36 (KHTML, like Gecko) \
			Chrome/90.0.4430.212 Safari/537.36',
			'Accept-Language': 'en-US, en;q=0.5'})

In [109]:
def get_product_name(soup):
    product_name_element = soup.find('span', {'id': 'productTitle'})
    return product_name_element.text.strip() if product_name_element else ''

def get_product_price(soup):
    # price_element = soup.find('span', {'id': 'priceblock_ourprice'})
    price_element = soup.find("span", attrs={'class':'a-offscreen'})
    return price_element.text.strip() if price_element else ''

def get_product_rating(soup):
    rating_element = soup.find('span', {'class': 'a-icon-alt'})
    return rating_element.text.strip() if rating_element else ''

def get_review_count(soup):
    review_count_element = soup.find('span', {'id': 'acrCustomerReviewText'})
    return review_count_element.text.strip() if review_count_element else ''

def get_product_description(soup):
    description_element = soup.find('div', {'id': 'productDescription'})
    return description_element.text.strip() if description_element else ''

def get_asin(soup):
    asin_element = soup.find('th', string=' ASIN ')
    return asin_element.find_next('td').text.strip() if asin_element else ''

def get_manufacturer(soup):
    manufacturer_element = soup.find('th', string=' Manufacturer ')
    return manufacturer_element.find_next('td').text.strip() if manufacturer_element else ''

In [110]:
def scrape_product_details(url):
    response = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(response.text, 'html.parser')

    product_details = {
        'Product URL': url,
        'Product Name': get_product_name(soup),
        'Product Price': get_product_price(soup),
        'Rating': get_product_rating(soup),
        'Number of reviews': get_review_count(soup),
        'Description': get_product_description(soup),
        'ASIN': get_asin(soup),
        'Product Description': get_product_description(soup),
        'Manufacturer': get_manufacturer(soup)
    }

    return product_details

In [111]:
def scrape_multiple_pages(url, num_pages):
    all_product_details = []
    
    for page in range(1, num_pages + 1):
        page_url = url + f'&page={page}'
        print("Page "+str(page) + ": "+page_url)
        response = requests.get(page_url, headers=HEADERS)
        soup = BeautifulSoup(response.content, 'html.parser')
        products = soup.find_all('div', {'data-component-type': 's-search-result'})
        i=1
        for product in products:
            product_url = product.find('a', {'class': 'a-link-normal s-no-outline'}).get('href')
            print("Product "+str(i)+": "+'https://amazon.com' + product_url)
            product_details = scrape_product_details('https://amazon.com' + product_url)
            print(product_details)
            all_product_details.append(product_details)
            i+=1

    return all_product_details

In [112]:
if __name__ == '__main__':
    base_url = "https://www.amazon.com/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_1"
    num_pages_to_scrape = 6
    num_urls_to_scrape = 200

    scraped_data = scrape_multiple_pages(base_url, num_pages_to_scrape)
    df = pd.DataFrame(scraped_data)
    df.to_csv('amazon_product_data.csv', index=False)

1: https://www.amazon.com/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_1&page=1
1: https://amazon.com/gp/slredirect/picassoRedirect.html/ref=pa_sp_atf_aps_sr_pg1_1?ie=UTF8&adId=A0490932DVMDIPT5S2NT&qualifier=1685105477&id=6823567326512151&widgetName=sp_atf&url=%2FHaiquan-Handles-Shopping-Packaging-Recycled%25EF%25BC%2588Purple%25EF%25BC%2589%2Fdp%2FB09MPM5RZP%2Fref%3Dsr_1_1_sspa%3Fcrid%3D2M096C61O4MLT%26keywords%3Dbags%26qid%3D1685105477%26sprefix%3Dba%252Caps%252C283%26sr%3D8-1-spons%26psc%3D1
{'Product URL': 'https://amazon.com/gp/slredirect/picassoRedirect.html/ref=pa_sp_atf_aps_sr_pg1_1?ie=UTF8&adId=A0490932DVMDIPT5S2NT&qualifier=1685105477&id=6823567326512151&widgetName=sp_atf&url=%2FHaiquan-Handles-Shopping-Packaging-Recycled%25EF%25BC%2588Purple%25EF%25BC%2589%2Fdp%2FB09MPM5RZP%2Fref%3Dsr_1_1_sspa%3Fcrid%3D2M096C61O4MLT%26keywords%3Dbags%26qid%3D1685105477%26sprefix%3Dba%252Caps%252C283%26sr%3D8-1-spons%26psc%3D1', 'Product Name': 'HaiQuan Purple G

In [113]:
df

Unnamed: 0,Product URL,Product Name,Product Price,Rating,Number of reviews,Description,ASIN,Product Description,Manufacturer
0,https://amazon.com/gp/slredirect/picassoRedire...,HaiQuan Purple Gift Bags Kraft Paper Bags 50Pc...,$17.99,4.5 out of 5 stars,40 ratings,,B09MPM5RZP,,
1,https://amazon.com/TOMNK-Handles-Perfect-Shopp...,TOMNK 90pcs Brown Paper Bags with Handles Asso...,$48.99,4.8 out of 5 stars,"1,807 ratings",,B096RY5NMK,,
2,https://amazon.com/Lululemon-Everywhere-Belt-B...,"Lululemon Everywhere Belt Bag, (LU9B78S)",$36.39,4.7 out of 5 stars,737 ratings,"Lululemon Everywhere Belt Bag, (LU9B78S)",B0B94RMN17,"Lululemon Everywhere Belt Bag, (LU9B78S)",
3,https://amazon.com/Concession-Essentials-Thank...,Concession Essentials Thank You Bags Pack of 3...,$15.59,4.7 out of 5 stars,"3,731 ratings","Thank you bags measure 11.5"" X 6.25"" X 21"" - 0...",B093TLNBSG,"Thank you bags measure 11.5"" X 6.25"" X 21"" - 0...",Concession Essentials
4,https://amazon.com/Puma-Evercat-Runway-Duffel-...,PUMA Evercat Form Factor Duffel Bag,$19.53,4.7 out of 5 stars,"3,442 ratings",On the go with unlimited storage is just what ...,,On the go with unlimited storage is just what ...,
...,...,...,...,...,...,...,...,...,...
385,https://amazon.com/SFXULIX-Capacity-Travel-Cos...,SFXULIX Large Capacity Travel Cosmetic Bag - M...,$23.99,4.2 out of 5 stars,571 ratings,,B0BMWV2QSR,,
386,https://amazon.com/Igloo-16-Can-Compartment-In...,Igloo 16-Can Softsided Insulated Lunch Box Gri...,$29.74,4.5 out of 5 stars,"2,077 ratings",,B09SGR37TX,,‎Igloo
387,https://amazon.com/SUPERBIO-Compostable-Garbag...,SUPERBIO 1.6 Gallon Compostable Handle Tie Gar...,$15.95,4.7 out of 5 stars,399 ratings,,B0B1L4BSJT,,
388,https://amazon.com/LSxia-Chenille-Cosmetic-Wat...,LSxia Chenille Letter Clear Travel Zipper Pouc...,$11.99,4.6 out of 5 stars,76 ratings,SPECIFICATIONS Package Includes: 1 x Clear tra...,B0B75GVTCK,SPECIFICATIONS Package Includes: 1 x Clear tra...,LSxia
