In [1]:
import requests
import csv
from bs4 import BeautifulSoup

def scrape_flipkart_products(base_url, start_page, end_page):
    try:
        # Headers to mimic a browser visit
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
            "Accept-Language": "en-US,en;q=0.9",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Connection": "keep-alive"
        }

        # Use a session to manage cookies
        session = requests.Session()
        session.headers.update(headers)

        all_products = []
        current_rank = 1  # Initialize rank starting at 1

        for page in range(start_page, end_page + 1):
            url = f"{base_url}&page={page}"
            print(f"Scraping page {page}: {url}")
            response = session.get(url)
            response.raise_for_status()

            # Parse the HTML content using BeautifulSoup
            soup = BeautifulSoup(response.content, "html.parser")

            # Find all divs that potentially contain product links
            product_divs = soup.find_all("div", class_="_75nlfW")

            for parent_div in product_divs:
                # Find all child divs with a specific structure containing product links
                child_divs = parent_div.find_all("div", recursive=False)
                for div in child_divs:
                    product_link = div.find("a")
                    if product_link and product_link.has_attr("href"):
                        product_url = "https://www.flipkart.com" + product_link["href"]
                        all_products.append({"Rank": current_rank, "Product URL": product_url})
                        current_rank += 1

        return all_products

    except requests.exceptions.RequestException as e:
        print("Error fetching the URL:", e)
        return []
    
def scrape_product_details(product_url, rank):
    try:
        # Headers to mimic a browser visit
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
            "Accept-Language": "en-US,en;q=0.9",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Connection": "keep-alive"
        }

        # Send a GET request to the product page
        response = requests.get(product_url, headers=headers)
        response.raise_for_status()

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, "html.parser")

        # Extract the desired details
        product_name = soup.find("span", class_="VU-ZEz").get_text(strip=True) if soup.find("span", class_="VU-ZEz") else "N/A"
        rating = float(soup.find("div", class_="XQDdHH").get_text(strip=True)) if soup.find("div", class_="XQDdHH") else 0.0
        rating_review_text = soup.find("span", class_="Wphh3N").get_text(strip=True) if soup.find("span", class_="Wphh3N") else "0 Ratings & 0 Reviews"
        rating_count = int(rating_review_text.split(" Ratings")[0]) if "Ratings" in rating_review_text else 0
        review_count = int(rating_review_text.split("&")[1].split(" Reviews")[0].strip()) if "&" in rating_review_text else 0
        price = int(soup.find("div", class_="Nx9bqj CxhGGd").get_text(strip=True).replace('₹', '').replace(',', '')) if soup.find("div", class_="Nx9bqj CxhGGd") else 0
        brand_element = soup.find("td", class_="Izz52n col col-9-12", attrs={"def": ""})
        brand = brand_element.find("li", class_="HPETK2").get_text(strip=True) if brand_element and brand_element.find("li", class_="HPETK2") else "N/A"
        
        return {
            "Product name": product_name,
            "Brand name": brand,
            "Price": price,
            "Rating": rating,
            "Rating count": rating_count,
            "Review count": review_count,
            "Rank": rank,
            "URL": product_url
        }

    except requests.exceptions.RequestException as e:
        print("Error fetching the product page:", e)
        return {}

def save_to_csv(products, filename="product_data.csv"):
    keys = products[0].keys() if products else []
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=keys)
        writer.writeheader()
        for product in products:
            writer.writerow(product)

# Base URL for scraping
base_url = "https://www.flipkart.com/search?q=Smart+Lock&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=off&as=off"

# Call the scraper function for pages 1 to 7
product_data = scrape_flipkart_products(base_url, 1, 7)

# Visit each product URL and scrape detailed information
all_product_details = []
for product in product_data:
    product_url = product['Product URL']
    rank = product['Rank']
    details = scrape_product_details(product_url, rank)
    if details:
        all_product_details.append(details)

# Save the details to a CSV file
save_to_csv(all_product_details)
print("Data has been saved to product_data.csv")

Scraping page 1: https://www.flipkart.com/search?q=Smart+Lock&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=off&as=off&page=1
Scraping page 2: https://www.flipkart.com/search?q=Smart+Lock&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=off&as=off&page=2
Scraping page 3: https://www.flipkart.com/search?q=Smart+Lock&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=off&as=off&page=3
Scraping page 4: https://www.flipkart.com/search?q=Smart+Lock&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=off&as=off&page=4
Scraping page 5: https://www.flipkart.com/search?q=Smart+Lock&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=off&as=off&page=5
Scraping page 6: https://www.flipkart.com/search?q=Smart+Lock&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=off&as=off&page=6
Scraping page 7: https://www.flipkart.com/search?q=Smart+Lock&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=off&as=off&page=7
Error 