In [65]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

In [67]:
def init_driver():
    options = Options()
    options.add_argument("--headless")  # Run browser in headless mode
    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                         "AppleWebKit/537.36 (KHTML, like Gecko) "
                         "Chrome/115.0.0.0 Safari/537.36")
    driver = webdriver.Chrome(options=options)
    return driver

In [69]:
def scrape_flipkart(driver, query, max_pages=2):
    base_url = "https://www.flipkart.com/search?q="
    products = []
    for page in range(1, max_pages + 1):
        url = f"{base_url}{query}&page={page}"
        driver.get(url)
        time.sleep(3)  # Wait for page to load
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        product_cards = soup.find_all('div', class_='_3pLy-c row')
        for card in product_cards:
            try:
                name = card.find('div', class_='_4rR01T').text.strip()
                price = card.find('div', class_='_30jeq3 _1_WHN1').text.strip()
                rating_tag = card.find('div', class_='_3LWZlK')
                rating = rating_tag.text.strip() if rating_tag else "No rating"
                products.append({
                    'Source': 'Flipkart',
                    'Product Name': name,
                    'Price': price,
                    'Rating': rating
                })
            except AttributeError:
                # Skip if any element is missing
                continue
    return products

In [71]:
def scrape_amazon(driver, query, max_pages=2):
    base_url = "https://www.amazon.in/s?k="
    products = []
    for page in range(1, max_pages + 1):
        url = f"{base_url}{query}&page={page}"
        driver.get(url)
        time.sleep(3)  # Wait for page to load
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        product_cards = soup.find_all('div', {'data-component-type': 's-search-result'})
        for card in product_cards:
            try:
                name = card.h2.text.strip()
                price_whole = card.find('span', 'a-price-whole')
                price_fraction = card.find('span', 'a-price-fraction')
                price = (price_whole.text + price_fraction.text) if price_whole and price_fraction else "Price not available"
                rating_tag = card.find('span', class_='a-icon-alt')
                rating = rating_tag.text.split(' ')[0] if rating_tag else "No rating"
                products.append({
                    'Source': 'Amazon',
                    'Product Name': name,
                    'Price': price,
                    'Rating': rating
                })
            except AttributeError:
                continue
    return products



In [None]:
def main():
    query = input("Enter the product to search: ").strip().replace(' ', '+')
    driver = init_driver()
    print("Scraping Flipkart...")
    flipkart_data = scrape_flipkart(driver, query)
    print("Scraping Amazon...")
    amazon_data = scrape_amazon(driver, query)
    driver.quit()
    # Combine data
    all_products = flipkart_data + amazon_data
    # Save to Excel
    df = pd.DataFrame(all_products)
    filename = f"product_details_{query.replace('+', '_')}.xlsx"
    df.to_excel(filename, index=False)
    print(f"Data saved to {filename}")

if __name__ == "__main__":
    main()

Enter the product to search:  oppo mobile


Scraping Flipkart...


NameError: name 'all_products' is not defined