In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd

url_template = 'https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_{}'
product_data = []
header = ['Product URL', 'Product Name', 'Product Price', 'Rating', 'Number of reviews']
for page_num in range(1, 21):
    url = url_template.format(page_num)
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    products = soup.find_all('div', {'data-component-type': 's-search-result'})
    
    for product in products:
        product_url = 'https://www.amazon.in' + product.find('a', {'class': 'a-link-normal'})['href']
        product_name = product.find('h2', {'class': 'a-size-mini'}).text.strip()
        product_price = product.find('span', {'class': 'a-price-whole'}).text.strip()
        
        rating_element = product.find('span', {'class': 'a-icon-alt'})
        if rating_element is not None:
            rating = rating_element.text.split()[0]
        else:
            rating = None
        
        num_reviews_element = product.find('span', {'class': 'a-size-base'})
        if num_reviews_element is not None:
            num_reviews = num_reviews_element.text.strip()
        else:
            num_reviews = None
        
        product_data.append({
            'Product URL': product_url,
            'Product Name': product_name,
            'Product Price': product_price,
            'Rating': rating,
            'Number of reviews': num_reviews
        })
        
# scrape additional product details for each product URL
product_details = []
for product in product_data:
    response = requests.get(product['Product URL'])
    soup = BeautifulSoup(response.content, 'html.parser')
    
    product_desc_element = soup.find('div', {'id': 'productDescription'})
    if product_desc_element is not None:
        product_desc = product_desc_element.text.strip()
    else:
        product_desc = None
    
    asin_element = soup.find('th', text='ASIN')
    if asin_element is not None:
        asin = asin_element.find_next('td').text.strip()
    else:
        asin = None
    
    manufacturer_element = soup.find('th', text='Manufacturer')
    if manufacturer_element is not None:
        manufacturer = manufacturer_element.find_next('td').text.strip()
    else:
        manufacturer = None
    
    product_details.append({
        'Product URL': product['Product URL'],
        'Product Name': product['Product Name'],
        'Product Price': product['Product Price'],
        'Rating': product['Rating'],
        'Number of reviews': product['Number of reviews'],
        'Description': product_desc,
        'ASIN': asin,
        'Manufacturer': manufacturer
    })

# write data to csv
with open('product_data.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=header+['Description', 'ASIN', 'Manufacturer'])
    writer.writeheader()
    for product in product_details:
        writer.writerow(product)


In [2]:
df=pd.read_csv("product_data.csv")

In [4]:
df.head()

Unnamed: 0,Product URL,Product Name,Product Price,Rating,Number of reviews,Description,ASIN,Manufacturer
0,https://www.amazon.in/gp/bestsellers/luggage/2...,American Tourister 32 Ltrs Black Casual Backpa...,1199,4.1,4.1,,,
1,https://www.amazon.in/Skybags-Brat-Black-Casua...,Skybags Brat Black 46 Cms Casual Backpack,659,4.1,4.1,,,
2,https://www.amazon.in/gp/bestsellers/computers...,Wesley Milestone 2.0 Casual Waterproof Laptop ...,565,4.3,4.3,,,
3,https://www.amazon.in/Lavie-Sport-Duffle-Lugga...,Lavie Sport Lino Large Size 63 cms Wheel Duffl...,949,3.9,3.9,,,
4,https://www.amazon.in/ADISA-Laptop-Backpack-Of...,ADISA 15.6 inch Laptop Backpack Office Bag Col...,499,3.8,3.8,,,
