Single Page

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from fake_useragent import UserAgent

def scrape_amazon_product_data(url, headers):
    response = requests.get(url, headers=headers)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, "html.parser")

    results = soup.find("div", {"class": "s-main-slot s-result-list s-search-results sg-row"})

    if results:
        all_products = results.find_all("div", {"class": "sg-col-inner"})

        product_data = []

        for item in all_products:
            name = item.find("span", {"class": "a-size-base-plus"})
            price = item.find("span", {"class": "a-price-whole"})

            # Handle missing data
            name_text = name.text.strip() if name else "N/A"
            price_text = price.text.strip() if price else "N/A"

            product_data.append([name_text, price_text])

        return product_data
    else:
        return None

def main():
    url = "https://www.amazon.in/s?rh=n%3A1388921031&fs=true&ref=lp_1388921031_sar"
    ua = UserAgent()
    headers = {'User-Agent': ua.chrome}
    product_data = scrape_amazon_product_data(url, headers)

    if product_data:
        # Create a DataFrame using Pandas
        df = pd.DataFrame(product_data, columns=['Name', 'Price'])

        item_count = len(df)  # Get the count of items

        print(f"Total items found: {item_count}")
        
        # Save the DataFrame to a CSV file
        df.to_csv('amazon_products_headphone.csv', index=False, encoding='utf-8')

        print("Data has been saved to amazon_products_headphones.csv")
    else:
        print("No results found on the page.")

if __name__ == "__main__":
    main()


HTTPError: 503 Server Error: Service Unavailable for url: https://www.amazon.in/s?rh=n%3A1388921031&fs=true&ref=lp_1388921031_sar

Multi Page

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from fake_useragent import UserAgent

def scrape_amazon_product_data(url, headers, num_pages):
    all_product_data = []

    for page in range(1, num_pages + 1):
        current_url = f"{url}&page={page}"
        response = requests.get(current_url, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")

        results = soup.find("div", {"class": "s-main-slot s-result-list s-search-results sg-row"})

        if results:
            all_products = results.find_all("div", {"class": "sg-col-inner"})

            for item in all_products:
                name = item.find("span", {"class": "a-size-base-plus"})
                price = item.find("span", {"class": "a-price-whole"})

                # Handle missing data
                name_text = name.text.strip() if name else "N/A"
                price_text = price.text.strip() if price else "N/A"

                all_product_data.append([name_text, price_text])

    return all_product_data

def main():
    url = "https://www.amazon.in/s?rh=n%3A1388921031&fs=true&ref=lp_1388921031_sar"
    ua = UserAgent()
    headers = {'User-Agent': ua.chrome}
    num_pages = 5  # You can change this to the number of pages you want to scrape

    product_data = scrape_amazon_product_data(url, headers, num_pages)

    if product_data:
        # Create a DataFrame using Pandas
        df = pd.DataFrame(product_data, columns=['Name', 'Price'])

        item_count = len(df)  # Get the count of items

        print(f"Total items found: {item_count}")
        
        # Save the DataFrame to a CSV file
        df.to_csv('amazon_products_headphones_multi.csv', index=False, encoding='utf-8')

        print("Data has been saved to amazon_products_headphones_multi.csv")
    else:
        print("No results found on the page.")

if __name__ == "__main__":
    main()


HTTPError: 503 Server Error: Service Unavailable for url: https://www.amazon.in/s?rh=n%3A1388921031&fs=true&ref=lp_1388921031_sar&page=1