In [1]:
import time

In [2]:
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager


In [3]:
def scrape_myntra(KEYWORDS, browser):
    product_data = []

    for keyword in KEYWORDS:
        browser.get("https://www.myntra.com")
        time.sleep(2)

        input_search = browser.find_element(By.CLASS_NAME, "desktop-searchBar")
        input_search.clear()
        input_search.send_keys(keyword)
        input_search.send_keys(Keys.RETURN)

        time.sleep(3)

        
        elements = browser.find_elements(By.XPATH, "//li[@class='product-base']")
        product_links = []
        for product in elements[:10]:
            try:
                link = product.find_element(By.TAG_NAME, "a").get_attribute("href")
                product_links.append(link)
            except:
                continue

        for link in product_links:
            try:
                browser.get(link)
                time.sleep(2)

                brand = browser.find_element(By.CLASS_NAME, "pdp-title").text
                name = browser.find_element(By.CLASS_NAME, "pdp-name").text

                try:
                    discounted_price = browser.find_element(By.CLASS_NAME, "pdp-price").text
                except:
                    discounted_price = "N/A"

                try:
                    original_price = browser.find_element(By.CLASS_NAME, "pdp-cut-price").text
                except:
                    original_price = discounted_price

                try:
                    image = browser.find_element(By.XPATH, "//img[@class='image-grid-image']").get_attribute("src")
                except:
                    image = "N/A"

                try:
                    rating = browser.find_element(By.CLASS_NAME, "index-overallRating").text
                except:
                    rating = "N/A"

                try:
                    review_elements = browser.find_elements(By.XPATH, "//div[@class='user-review-reviewTextWrapper']")
                    reviews = [elem.text for elem in review_elements]
                except:
                    reviews = []

                sizes = browser.find_elements(By.CLASS_NAME, "size-buttons-size-button")
                available_sizes = []
                for size in sizes:
                    status = "In Stock"
                    if "disabled" in size.get_attribute("class") or size.get_attribute("aria-disabled") == "true":
                        status = "Out of Stock"
                    available_sizes.append(f"{size.text.strip()} ({status})")

                product_data.append({
                    "brand": brand,
                    "name": name,
                    "product_link": link,
                    "image_url": image,
                    "original_price": original_price,
                    "discounted_price": discounted_price,
                    "rating": rating,
                    "reviews": reviews,
                    "available_sizes": available_sizes
                })

            except Exception as e:
                print("Error scraping product:", e)
                continue

    return product_data


In [4]:
def scrape_nykaa(KEYWORDS, browser):
    product_data = []

    for keyword in KEYWORDS:
        browser.get("https://www.nykaafashion.com")
        time.sleep(2)
        browser.maximize_window()

        try:
            no_thanks_button = browser.find_element(By.XPATH, "//button[contains(text(), 'No thanks')]")
            no_thanks_button.click()
        except:
            pass

        try:
            input_search = browser.find_element(By.XPATH, "//input[@placeholder='Search for products, styles, brands']")
            input_search.clear()
            input_search.send_keys(keyword)
            input_search.send_keys(Keys.RETURN)
            time.sleep(3)
        except:
            print("Search bar not found")
            continue

        elements = browser.find_elements(By.XPATH, "//div[@class='css-384pms']")
        product_links = []
        for product in elements[:10]:
            try:
                link = product.find_element(By.TAG_NAME, "a").get_attribute("href")
                product_links.append(link)
            except:
                continue

        for link in product_links:
            try:
                browser.get(link)
                time.sleep(2)

                brand = browser.find_element(By.XPATH, "//a[@class='css-6mpq2k']").text
                name = browser.find_element(By.XPATH, "//span[@class='css-cmh3n9']").text

                try:
                    discounted_price = browser.find_element(By.XPATH, "//span[@class='css-5pw8k6']").text
                except:
                    discounted_price = "N/A"

                try:
                    original_price = browser.find_element(By.XPATH, "//span[@class=' css-1byl9fj']").text
                except:
                    original_price = discounted_price

                try:
                    image = browser.find_element(By.XPATH, "//img[@class=' css-kwk7lt']").get_attribute("src")
                except:
                    image = "N/A"

                try:
                    rating = browser.find_element(By.XPATH, "//div[@class='css-xoezkq']").text
                except:
                    rating = "N/A"

                try:
                    review_elements = browser.find_element(By.XPATH, "//p[@class='css-183zl1c']")
                    reviews = [elem.text for elem in review_elements]
                except:
                    reviews = []

                sizes = browser.find_elements(By.XPATH, "//span[@class='css-la6tof']")
                available_sizes = []
                for size in sizes:
                    status = "In Stock"
                    if "disabled" in size.get_attribute("class") or size.get_attribute("aria-disabled") == "true":
                        status = "Out of Stock"
                    available_sizes.append(f"{size.text.strip()} ({status})")

                product_data.append({
                    "brand": brand,
                    "name": name,
                    "product_link": link,
                    "image_url": image,
                    "original_price": original_price,
                    "discounted_price": discounted_price,
                    "rating": rating,
                    "reviews": reviews,
                    "available_sizes": available_sizes
                })

            except Exception as e:
                print("Error scraping product:", e)
                continue

    return product_data

In [5]:
KEYWORDS = [
    "white shirt",
    "black dress",
    "denim jeans",
    "summer kurti",
    "co-ord set",
    "oversized t-shirt",
    "sneakers",
    "blue linen pants",
    "pink blazer for women",
    "yellow maxi dress"
]

In [6]:
service = Service(ChromeDriverManager().install())
browser = webdriver.Chrome(service=service)
browser.maximize_window()

In [7]:
myntra_data = scrape_myntra(KEYWORDS, browser)
nykaa_data = scrape_nykaa(KEYWORDS, browser)
browser.quit()


In [8]:
import json
with open('myntra_products_ouput.json', 'w', encoding='utf-8') as f:
    json.dump(myntra_data, f, indent=4, ensure_ascii=False)


with open('nykaa_products_ouput.json', 'w', encoding='utf-8') as f:
    json.dump(nykaa_data, f, indent=4, ensure_ascii=False)