In [87]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
from bs4 import BeautifulSoup
import time

In [116]:
# Function to extract product information
def extract_product_info(page_source):
    soup = BeautifulSoup(page_source, 'html.parser')
    products = soup.find_all('div', class_='plp-card-container')  # Ensure this matches the correct class

    name = []
    price = []
    discount_applied = []

    for product in products:
        
        # Extracting the text content of the name element
        name_element = product.find('div', class_='plp-card-details-name')
        name_value = name_element.text.strip() if name_element else None
        name.append(name_value)

        # Extracting the text content of the price element
        price_element = product.find('span', class_='jm-heading-xxs')
        price_value = price_element.text.strip() if price_element else None
        price.append(price_value)
        
        # Extracting the text content of the discount applied element
        discount_applied_element = product.find('span', class_='jm-badge')
        discount_applied_value = discount_applied_element.text.strip() if discount_applied_element else None
        discount_applied.append(discount_applied_value)
        
    return pd.DataFrame({
        'Name': name,
        'Price': price,
        'Discount_applied': discount_applied
    })

In [117]:
# Function to set delivery location pincode
def set_delivery_location(driver, pincode):
    try:
        # Find and click on the "Deliver to" button using XPath
        deliver_to_button = driver.find_element(By.XPATH, '/html/body/header/section[1]/div/section[1]/div[2]/button')
        deliver_to_button.click()
        
        # Find and click on the "Enter Pin Code" button using XPath
        deliver_to_button = driver.find_element(By.XPATH, '/html/body/header/section[1]/div/section[1]/div[2]/div/section/div[1]/div[4]/div[1]/button')
        deliver_to_button.click()
        
        # Find and click on the "Enter Pin Code" text bar using XPath
        deliver_to_button = driver.find_element(By.XPATH, '/html/body/header/section[1]/div/section[1]/div[2]/div/section/div[2]/form/div/div[1]/div/input')
        deliver_to_button.click()
        
        pincode_input = driver.find_element(By.ID, 'rel_pincode')
        pincode_input.clear()
        pincode_input.send_keys(pincode)
        
        # Submit the form (if necessary)
        pincode_input.submit()
        
        # Add a short delay to ensure the page loads with the new pincode (optional)
        time.sleep(5)
        
        print(f"Successfully set delivery location to {pincode}")
    except Exception as e:
        print(f"Failed to set delivery location: {e}")

In [118]:
# Set up Selenium WebDriver
service = Service('C:/Program Files (x86)/chromedriver-win64/chromedriver.exe')  # Update with the path to your ChromeDriver
driver = webdriver.Chrome(service=service)
        
# Load the home page and set delivery location
driver.get('https://www.jiomart.com')
set_delivery_location(driver, '834001')  # Replace with your desired pincode

Successfully set delivery location to 834001


In [119]:
# Define categories with URLs
category = {
    'fruits-vegetables': 219,
    'dairy-bakery': 61,
    'staples': 13,
    'snacks-branded-foods': 10,
    'beverages': 33,
    'personal-care': 91,
    'home-care': 36,
    'home-kitchen': 1687,
    'mom-baby-care': 2551,
    'beauty': 6607
}

# Initialize an empty list to hold all DataFrames
all_data_frames = []

# Loop through each category and scrape data
for cat_type in category:
    all_pages_data = []  # Reset the list for each category
    
    url = f'https://www.jiomart.com/c/groceries/{cat_type}/{category[cat_type]}'
    driver.get(url)
    print(f"Requesting URL: {url}")  # Debug: Print the URL being requested
    
    try:
        # Wait for the initial products to load
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'plp-card-container')))
        
        # Scroll down to load all products
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Wait to load page
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height
        
        # Extract data after all products are loaded
        page_data = extract_product_info(driver.page_source)
        all_pages_data.append(page_data)
    except Exception as e:
        print(f"Failed to retrieve data from {url}: {e}")
        
    if all_pages_data:  # Check if there is any data before concatenating
        final_df = pd.concat(all_pages_data, ignore_index=True)
        final_df['Category'] = cat_type  # Add a column with the category name
        all_data_frames.append(final_df)

driver.quit()

Requesting URL: https://www.jiomart.com/c/groceries/fruits-vegetables/219
Requesting URL: https://www.jiomart.com/c/groceries/beauty/6607


In [120]:
# Concatenate all DataFrames into a single DataFrame
JioMart_Data = pd.concat(all_data_frames, ignore_index=True)

In [121]:
JioMart_Data

Unnamed: 0,Name,Price,Discount_applied,Category
0,Onion 5 kg (Pack),₹190.00,,fruits-vegetables
1,Okra 250 g,₹9.00,,fruits-vegetables
2,Onion 1 kg (Pack),₹39.00,,fruits-vegetables
3,Button Mushroom 200 g,₹55.00,,fruits-vegetables
4,Tomato 1 kg,₹50.00,,fruits-vegetables
...,...,...,...,...
1070,X-kicks. 100 ML Ayurvedic Natural Intimate Was...,₹279.00,53% OFF,beauty
1071,LIFREE Extra Absorb M10 Adult Diapers - M (30 ...,"₹1,099.00",29% OFF,beauty
1072,Riyuzone 2 In 1 Hair Straightener Curler Combo...,₹685.00,65% OFF,beauty
1073,Nisha Naturemate Natural Henna Based Hair Colo...,₹188.00,6% OFF,beauty
