In [11]:
import csv
import requests
from bs4 import BeautifulSoup
import time

def scrape_product_listings(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    product_listings = []
    
    for product in soup.find_all('div', {'data-component-type': 's-search-result'}):
        product_url = 'https://www.amazon.in' + product.find('a', {'class': 'a-link-normal'})['href']
        product_name = product.find('span', {'class': 'a-size-medium'}).text.strip()
        product_price = product.find('span', {'class': 'a-offscreen'}).text.strip()
        product_rating = product.find('span', {'class': 'a-icon-alt'}).text.strip().split()[0]
        num_reviews = product.find('span', {'class': 'a-size-base'}).text.strip()
        
        product_listings.append({
            'Product URL': product_url,
            'Product Name': product_name,
            'Product Price': product_price,
            'Rating': product_rating,
            'Number of Reviews': num_reviews,
        })
    
    return product_listings

def scrape_multiple_pages(base_url, num_pages):
    all_product_listings = []
    for page_num in range(1, num_pages + 1):
        url = f"{base_url}&page={page_num}"
        product_listings = scrape_product_listings(url)
        all_product_listings.extend(product_listings)
        time.sleep(2)  
        
    return all_product_listings


base_url = 'https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_'
num_pages_to_scrape = 20

all_product_listings = scrape_multiple_pages(base_url, num_pages_to_scrape)

csv_filename = 'amazon_bags_data.csv'
with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['Product URL', 'Product Name', 'Product Price', 'Rating', 'Number of Reviews']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(all_product_listings)

print("Product listings scraped and saved to CSV.")

Product listings scraped and saved to CSV.


In [12]:
import csv
import requests
from bs4 import BeautifulSoup
import time

def scrape_product_details(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    asin = soup.find('th', text='ASIN')
    if asin:
        asin = asin.find_next_sibling('td').text.strip()
    else:
        asin = 'N/A'
    
    description_meta = soup.find('meta', {'name': 'description'})
    if description_meta:
        description = description_meta['content'].strip()
    else:
        description = 'N/A'
    
    manufacturer_tag = soup.find('a', {'id': 'bylineInfo'})
    if manufacturer_tag:
        manufacturer = manufacturer_tag.text.strip()
    else:
        manufacturer = 'N/A'
    
    product_description_tag = soup.find('div', {'id': 'productDescription'})
    if product_description_tag:
        product_description = product_description_tag.text.strip()
    else:
        product_description = 'N/A'
    
    return {
        'Product URL': url,
        'ASIN': asin,
        'Description': description,
        'Manufacturer': manufacturer,
        'Product Description': product_description,
    }

def scrape_multiple_products(urls):
    products_info = []
    for url in urls:
        product_info = scrape_product_details(url)
        products_info.append(product_info)
        time.sleep(2)  # Be respectful by adding a delay between requests
            
    return products_info



product_urls = []
with open('amazon_bags_data.csv', 'r', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        product_urls.append(row['Product URL'])

product_details = scrape_multiple_products(product_urls)

csv_filename_part2 = 'amazon_bags_additional_info.csv'
with open(csv_filename_part2, 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['Product URL', 'ASIN', 'Description', 'Manufacturer', 'Product Description']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(product_details)

print("Additional product information scraped and saved to CSV.")

  asin = soup.find('th', text='ASIN')


Additional product information scraped and saved to CSV.
