In [None]:
# Importing necessary libraries
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd
import time
import random

In [None]:
# Initialize lists to store the scraped data
product_names = []
prices = []
ratings = []
sellers = []

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'}

In [None]:
# Function to scrape product details from a given url
def scrape_products(url):
    # Send a GET Request to the url
    response = requests.get(url, headers = headers)
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, "html.parser")
    # Find all the product items on the page
    product_items = soup.find_all('div', {'data-component-type': 's-search-result'})
    
    # Iterate over each product item
    for item in product_items:

        # Extract product name
        product_name = item.find('span', {'class': 'a-size-base-plus a-color-base a-text-normal'}).text.strip()
        
        # Extract rating
        rating = item.find('span', {'class': 'a-icon-alt'})
        rating = rating.text.strip() if rating is not None else "N/A"
        # Extract href and create complete link of the product
        href = item.find('a', {'class': 'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})
        link = 'https://www.amazon.in' + href['href']

        # Send a GET request to the product link
        response = requests.get(link, headers=headers)
        soup1 = BeautifulSoup(response.content, "html.parser")

        # Check if the product is out of stock and continue to next iteration if it is
        out_of_stock = soup1.find('div', {'id': 'outOfStock'})
        if out_of_stock is not None:
            continue

        # Extract price
        price = item.find('span', {'class': 'a-price-whole'})
        price = price.text.strip() if price is not None else "N/A"
        
        # Extract seller name
        merchant_info = soup1.find('div', {'id': 'merchant-info'})
        seller_link = merchant_info.find('a') if merchant_info is not None else None
        seller_name = seller_link.text.strip() if seller_link is not None else "N/A"
        
        if product_name and rating and price and seller_name:
            product_names.append(product_name)
            ratings.append(rating)
            prices.append(price)
            sellers.append(seller_name)

In [None]:
# URL of the first page of search results
url = "https://www.amazon.in/s?rh=n%3A6612025031&fs=true&ref=lp_6612025031_sar"

# Scrape products from the first page
scrape_products(url)

In [None]:
# Send a GET Request to the url
response = requests.get(url, headers=headers)
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")

# Check if there are multiple pages of search results
pagination = soup.find('span', {'class': 's-pagination-item s-pagination-diasbled'})
if pagination is None:
    # Extract the total number of pages
    page_count = int(soup.find('span', {'class': 's-pagination-item s-pagination-disabled'}).text.strip())
    
    # Scrape Products from the remaining pages
    for page_number in range(2, page_count + 1):
        page_url = url + f"&page={page_number}"
        scrape_products(page_url)

In [None]:
# Create a DataFrame from the scraped data
data = {
    'Product Name': product_names[:1947],
    'Price': prices[:1947],
    'Rating': ratings[:1947],
    'Seller Name': sellers[:1947]
}
df = pd.DataFrame(data)

# Save the dataframe to a csv file
df.to_csv('amazon_products.csv', index=True)