In [7]:
import requests
from bs4 import BeautifulSoup
import csv
import time

In [8]:
# Function to scrape product details from the listing page
def scrape_listing_page(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    products = []
    # Extract product details from each item on the page
    for item in soup.select('.s-result-item'):
        product_url = item.select_one('.a-link-normal.s-no-outline')['href']
        product_name = item.select_one('.a-size-medium.a-color-base.a-text-normal').text.strip()
        product_price = item.select_one('.a-price .a-offscreen').text.strip()
        rating_element = item.select_one('.a-icon-alt')
        rating = float(rating_element.text.split()[0]) if rating_element else None
        num_reviews_element = item.select_one('.a-size-base')
        num_reviews = int(num_reviews_element.text.replace(',', '').split()[0]) if num_reviews_element else None

        products.append({
            'Product URL': product_url,
            'Product Name': product_name,
            'Product Price': product_price,
            'Rating': rating,
            'Number of Reviews': num_reviews
        })
        print(products)

    return products

In [9]:
# Function to scrape additional product details from the product page
def scrape_product_page(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    asin = soup.select_one('[data-asin]')['data-asin']
    product_description = soup.select_one('#productTitle').text.strip()
    manufacturer = soup.select_one('#bylineInfo').text.strip()

    return {
        'ASIN': asin,
        'Description': product_description,
        'Product Description': product_description,
        'Manufacturer': manufacturer
    }

In [None]:
def main():
    base_url = "https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_{}"

    all_products = []
    total_pages = 20
    max_products = 200
    products_scraped = 0

    for page in range(1, total_pages + 1):
        if products_scraped >= max_products:
            break

        url = base_url.format(page)
        products = scrape_listing_page(url)
        all_products.extend(products)

        # Scrape additional details for each product
        for product in products:
            if products_scraped >= max_products:
                break

            product_url = product['Product URL']
            additional_info = scrape_product_page(product_url)
            product.update(additional_info)
            products_scraped += 1

            # Delay to avoid overwhelming the server
            time.sleep(1)

    # Export data to a CSV file
    with open('amazon_products.csv', 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Product URL', 'Product Name', 'Product Price', 'Rating', 'Number of Reviews',
                      'ASIN', 'Description', 'Product Description', 'Manufacturer']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(all_products)

if __name__ == "__main__":
    main()