In [1]:
# Import necessary libraries for web scraping and data handling

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
import socket

In [2]:
# Define a function to set a socket timeout (prevents hanging during requests)
# Function to set a timeout

def timeout_handler():
    socket.setdefaulttimeout(10)  # Set timeout to 10 seconds

# Call the timeout handler

timeout_handler()

In [3]:
# Set up Chrome options to behave like a regular browser and bypass detection
# Set up Chrome options to mimic real browsing behavior

options = webdriver.ChromeOptions()
options.add_argument("start-maximized")  # Start browser maximized
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
options.add_argument("--disable-blink-features=AutomationControlled")  # Disable detection of Selenium
options.add_argument("--disable-gpu")  # Disable GPU acceleration
options.add_argument("--no-sandbox")  # Bypass OS security model
options.add_argument("--window-size=1920,1080")  # Set window size to desktop dimensions
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36")  # Firefox User-Agent
# options.add_argument("--headless")  # Uncomment to run in headless mode

In [4]:
# Initialize Chrome WebDriver using the ChromeDriverManager
# Set up Selenium WebDriver

service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

In [5]:
# Open the Flipkart website and handle any login/signup pop-up if it appears
# Open the Flipkart website with the desired URL (initial page)

url = 'https://www.flipkart.com/search?q=mobiles+under+50000&as=on&as-show=on&otracker=AS_Query_HistoryAutoSuggest_1_9_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_9_na_na_na&as-pos=1&as-type=HISTORY&suggestionId=mobiles+under+50000&requestId=478da6aa-6536-430e-b8d7-d62ea70dd978'
driver.get(url)

# Handle login/signup pop-up if it appears

try:
    close_popup = driver.find_element("xpath", "//button[contains(text(), '✕')]")
    close_popup.click()
    print("Login pop-up closed successfully.")
except:
    print("No login pop-up found.")

# Wait for the page to load fully

time.sleep(5)

No login pop-up found.


In [6]:
# Create lists for the DataFrame

product_name = []
prices = []
description = []
ratings = []

In [7]:
# Open the Flipkart website and handle any login/signup pop-up if it appears
# Loop through pages 2 to 11

for i in range(2, 12):
    
    # Open the URL for the desired page
    
    url = f'https://www.flipkart.com/search?q=mobiles+under+50000&as=on&as-show=on&otracker=AS_Query_HistoryAutoSuggest_1_9_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_9_na_na_na&as-pos=1&as-type=HISTORY&suggestionId=mobiles+under+50000&requestId=478da6aa-6536-430e-b8d7-d62ea70dd978&as-searchtext=mobiles+u&page={i}'
    driver.get(url)
    
    # Wait for the page to load
    
    time.sleep(random.uniform(3, 6))  # Random sleep to mimic human behavior
    
    # Get the page source after the full page loads
    
    page_source = driver.page_source

    # Parse the page source with BeautifulSoup
    
    soup = BeautifulSoup(page_source, 'lxml')
    
    box = soup.find('div', class_ = 'DOjaWF gdgoEp')
    
    if box:  # Only proceed if 'box' is found
        names = box.find_all('div', class_='KzDlHZ')
        for i in names:
            name = i.text
            product_name.append(name)

        price = box.find_all('div', class_='Nx9bqj _4b5DiR')
        for i in price:
            name = i.text
            prices.append(name)

        desc = box.find_all('ul', class_='G4BRas')
        for i in desc:
            name = i.text
            description.append(name)

        rating = box.find_all('div', class_='XQDdHH')
        for i in rating:
            name = i.text
            ratings.append(name)
    else:
        print(f"No box found on page {i}")

    # Example of finding a product link or other data (adapt based on your scraping needs)
    
    try:
        next_page = soup.find('a', class_='_9QVEpD').get('href')
        complete_np = 'https://www.flipkart.com' + next_page
        print(f"Next page URL: {complete_np}")
    except AttributeError:
        print(f"No next page found for page {i}")

    # Optional: Add delay to avoid rapid requests
    
    time.sleep(random.uniform(2, 5))  # Wait between page requests to avoid detection

Next page URL: https://www.flipkart.com/search?q=mobiles+under+50000&as=on&as-show=on&otracker=AS_Query_HistoryAutoSuggest_1_9_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_9_na_na_na&as-pos=1&as-type=HISTORY&suggestionId=mobiles+under+50000&requestId=478da6aa-6536-430e-b8d7-d62ea70dd978&as-searchtext=mobiles+u&page=1
Next page URL: https://www.flipkart.com/search?q=mobiles+under+50000&as=on&as-show=on&otracker=AS_Query_HistoryAutoSuggest_1_9_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_9_na_na_na&as-pos=1&as-type=HISTORY&suggestionId=mobiles+under+50000&requestId=478da6aa-6536-430e-b8d7-d62ea70dd978&as-searchtext=mobiles+u&page=2
Next page URL: https://www.flipkart.com/search?q=mobiles+under+50000&as=on&as-show=on&otracker=AS_Query_HistoryAutoSuggest_1_9_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_9_na_na_na&as-pos=1&as-type=HISTORY&suggestionId=mobiles+under+50000&requestId=478da6aa-6536-430e-b8d7-d62ea70dd978&as-searchtext=mobiles+u&page=3
Next page URL: https://www.flip

In [8]:
# Convert the scraped product data into a pandas DataFrame

df = pd.DataFrame({'Product Name': product_name, 'Prices': prices, 'Description': description, 'Ratings': ratings})
df

Unnamed: 0,Product Name,Prices,Description,Ratings
0,"Apple iPhone 13 (Pink, 128 GB)","₹49,900",128 GB ROM15.49 cm (6.1 inch) Super Retina XDR...,4.6
1,"OnePlus 11 5G (Marble Odyssey, 256 GB)","₹49,999",16 GB RAM | 256 GB ROM17.02 cm (6.7 inch) Disp...,4.5
2,"MOTOROLA razr 40 (Vanilla Cream, 256 GB)","₹49,999",8 GB RAM | 256 GB ROM17.53 cm (6.9 inch) Displ...,4
3,"vivo X70 Pro+ (Enigma Black, 256 GB)","₹47,999",12 GB RAM | 256 GB ROM17.22 cm (6.78 inch) WQH...,4.5
4,Samsung Galaxy S21 FE 5G with Snapdragon 888 (...,"₹45,999",8 GB RAM | 128 GB ROM16.26 cm (6.4 inch) Full ...,4.3
...,...,...,...,...
235,"Motorola G85 5G (Urban Grey, 256 GB)","₹18,999",12 GB RAM | 256 GB ROM16.94 cm (6.67 inch) Ful...,4.4
236,"Motorola g64 5G (Mint Green, 256 GB)","₹14,999",12 GB RAM | 256 GB ROM | Expandable Upto 1 TB1...,4.2
237,"REDMI 13c 5G (Startrail Green, 128 GB)","₹9,613",4 GB RAM | 128 GB ROM17.12 cm (6.74 inch) Disp...,4.3
238,"REDMI 13c 5G (Starlight Black, 128 GB)","₹9,452",4 GB RAM | 128 GB ROM17.12 cm (6.74 inch) Disp...,4.3


In [9]:
# Close the browser after scraping

driver.quit()

In [10]:
# Save the DataFrame to a CSV file

df.to_csv('C:/Users/ARYA/Desktop/Flipkart_Scraping.csv')