In [1]:
# importing libraries and packages
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time
import pandas as pd

In [2]:
# Set Chrome options for the web driver
options = Options()
# options.add_argument("--headless")  
options.add_argument("--disable-gpu")
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

# URL of the Flipkart mobile search results page
url = 'https://www.flipkart.com/search?q=mobiles+5g'
driver.get(url)
time.sleep(5)

# Initialize empty lists to store scraped data
titles = []
prices = []
categories = []
ratings = []
number_of_reviews = []
page_counter = 0

In [7]:
# Define the maximum number of pages to scrap
max_pages = 10 

while page_counter < max_pages:
    content = driver.page_source
    soup = BeautifulSoup(content, "html.parser")
# Find product containers
    products = soup.find_all("div", class_="cPHDOP col-12-12")

    for product in products:
        # Extract product name
        title = product.find("div", class_="KzDlHZ")
        if title:
            titles.append(title.text)
        else:
            titles.append("Unknown")

        # Extract price
        price = product.find("div", class_="Nx9bqj _4b5DiR")
        if price:
            prices.append(price.text)
        else:
            prices.append('0')

        #  category is set statically
        categories = 'mobiles 5g'
    
        # Extract rating
        rating = product.find("div", class_="XQDdHH")
        if rating:
            ratings.append(rating.text)
        else:
            ratings.append("Unknown")

        # Extract number of reviews
        reviews = product.find("span",class_="Wphh3N")
        if reviews:
            number_of_reviews.append(reviews.text)
        else:
            number_of_reviews.append("0")  # No reviews found

    try:
        # Find the "Next" button using an XPath expression and wait until it is clickable
        next_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//a[contains(@class, '_9QVEpD') and span[contains(text(), 'Next')]]"))
        )
        
        print("Next button URL:", next_button.get_attribute("href"))
        
        next_button.click()
        time.sleep(5)  
        
        page_counter += 1   # Increment the page counter after successfully navigating to the next page
    except Exception as e:
        print(f"No more pages to scrape or an error occurred: {e}")
        break  # Exit the loop if no more pages are available or an error occurs


In [8]:
len(titles)

668

In [9]:
# Store the scraped data in a DataFrame
data = {
    'Title': titles,
    'Price': prices,
    'Category': categories,
    'Rating': ratings,
    'Number of Reviews': number_of_reviews,
}
df = pd.DataFrame(data)


In [10]:
df

Unnamed: 0,Title,Price,Category,Rating,Number of Reviews
0,Unknown,0,mobiles 5g,Unknown,0
1,Unknown,0,mobiles 5g,Unknown,0
2,OPPO K13x 5G 6000mAh and 45W SUPERVOOC Charger...,"₹11,999",mobiles 5g,4.5,"13,953 Ratings & 1,243 Reviews"
3,"Nothing Phone (3) (Black, 256 GB)","₹79,999",mobiles 5g,4.4,"1,735 Ratings & 158 Reviews"
4,"vivo T4x 5G (Pronto Purple, 128 GB)","₹14,499",mobiles 5g,4.4,"1,18,524 Ratings & 5,772 Reviews"
...,...,...,...,...,...
663,"Tecno Pop 9 5G (Aurora Cloud, 64 GB)","₹8,699",mobiles 5g,4.1,127 Ratings & 7 Reviews
664,"MOTOROLA Edge 60 Pro (Pantone Walnut, 256 GB)","₹29,999",mobiles 5g,4.3,"10,271 Ratings & 889 Reviews"
665,Unknown,0,mobiles 5g,Unknown,0
666,Unknown,0,mobiles 5g,4.4,0


In [11]:
df.shape

(668, 5)

In [13]:
# Save to CSV file
df.to_csv('flipkart_mobile5g.csv', index=False)

In [14]:
# Close the browser
driver.quit()