In [14]:
import csv
import requests
from bs4 import BeautifulSoup

# Scrape product details from a given URL
def scrape_product_details(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Get the product details
    product_url = url
    product_name = soup.find('span', {'class': 'a-size-medium a-color-base a-text-normal'}).text.strip()
    product_price = soup.find('span', {'class': 'a-offscreen'}).text.strip()
    rating = soup.find('span', {'class': 'a-icon-alt'}).text.strip().split(' ')[0]
    num_reviews = soup.find('span', {'id': 'acrCustomerReviewText'}).text.strip()

    # Return the scraped details as a dictionary
    return {
        'Product URL': product_url,
        'Product Name': product_name,
        'Product Price': product_price,
        'Rating': rating,
        'Number of Reviews': num_reviews
    }

# Scrape product description from a given URL
def scrape_product_description(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Get the product description details
    description = soup.find('div', {'id': 'productDescription'}).text.strip()
    asin = soup.find('th', text='ASIN').find_next('td').text.strip()
    product_description = soup.find('h2', text='Product Description').find_next('div').text.strip()
    manufacturer = soup.find('th', text='Manufacturer').find_next('td').text.strip()

    # Return the scraped details as a dictionary
    return {
        'Description': description,
        'ASIN': asin,
        'Product Description': product_description,
        'Manufacturer': manufacturer
    }

# Scrape multiple product pages
def scrape_product_pages(num_pages):
    all_products = []

    for page in range(1, num_pages + 1):
        url = f'https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_{page}'
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all product links on the page
        product_links = soup.find_all('a', {'class': 'a-link-normal a-text-normal'})

        for link in product_links:
            product_url = 'https://www.amazon.in' + link['href']
            product_details = scrape_product_details(product_url)
            product_details.update(scrape_product_description(product_url))
            all_products.append(product_details)

        print(f'Page {page} scraped.')

    return all_products



# Write scraped data to CSV file
def write_to_csv(data, filename):
    if not data:
        print('No data to write.')
        return

    keys = data[0].keys()

    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=keys)
        writer.writeheader()
        writer.writerows(data)

    print(f'Data written to {filename} successfully.')


# Scrape 20 pages of product listings
num_pages = 20
products = scrape_product_pages(num_pages)

# Write data to CSV file
filename = 'amazon_products.csv'
write_to_csv(products, filename)


Page 1 scraped.
Page 2 scraped.
Page 3 scraped.
Page 4 scraped.
Page 5 scraped.
Page 6 scraped.
Page 7 scraped.
Page 8 scraped.
Page 9 scraped.
Page 10 scraped.
Page 11 scraped.
Page 12 scraped.
Page 13 scraped.
Page 14 scraped.
Page 15 scraped.
Page 16 scraped.
Page 17 scraped.
Page 18 scraped.
Page 19 scraped.
Page 20 scraped.
No data to write.
