In [11]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import traceback
import re
import pandas as pd
import numpy as np
from datetime import datetime as dt
import datetime
import time

### Helper Functions

In [12]:
def is_exists_by_xpath(driver,path):
    try:
        driver.find_element_by_xpath(path)
        return 1
    except:
        return 0
    
def get_product_details(driver,product,row_dict):
    
    # best seller
    bv_path = ".//*[./text()='Bestseller']"
    is_bs = True if is_exists_by_xpath(product,bv_path) else False
    
    # best Value
    bv_path = ".//*[./text()='Best Value']"
    is_bv = True if is_exists_by_xpath(product,bv_path) else False
    
    # trending
    trend_path = ".//*[./text()='Trending']"
    is_trend = True if is_exists_by_xpath(product,trend_path) else False

    # ad
    ad_path = ".//*[./text()='Ad']"
    is_ad = True if is_exists_by_xpath(product,ad_path) else False

    # flipkart assured
    f_path = ".//img[contains(@src,'fa_8b4b59.png')]"
    is_FA = True if is_exists_by_xpath(product,f_path) else False
    
    # update dictionary
    row_dict["is_BestSeller"] = is_bs
    row_dict["is_BestValue"] = is_bv
    row_dict["is_Trending"] = is_trend
    row_dict["is_Ad"] = is_ad
    row_dict["is_FAssured"] = is_FA
    
    # direct to product page
    product_link = product.find_element_by_tag_name('a').get_attribute('href')
    driver.execute_script("window.open('');")
    driver.switch_to.window(driver.window_handles[1])
    driver.get(product_link)
    
    # updates from product page
    row_dict = get_product_details_from_product_page(driver,row_dict)
    
    # close product page
    driver.close()
    driver.switch_to.window(driver.window_handles[0])
    
    return row_dict
    
    
def get_product_details_from_product_page(driver,row_dict):
    
    prod_desc = driver.find_element_by_class_name("_29OxBi")
    
    # product name
    product_name = prod_desc.find_element_by_class_name('_35KyD6').text
    print("Product No", row_dict["Rank"], ":",product_name)
    
    
    try:
        # product ratings & reviews
        try:
            prod_rating = prod_desc.find_element_by_class_name('hGSR34').text
        except:
            prod_rating = np.nan
            
        # product ratings & reviews
        try:
            rat_and_revs = prod_desc.find_element_by_class_name('_38sUEc').text
        except:
            rat_and_revs = np.nan
        
        seller = driver.find_element_by_id("sellerName")
        
        # seller name & rating
        try:
            rating = seller.find_element_by_class_name('hGSR34').text
            seller_name = re.sub(rating,'',seller.text)
            rating = float(rating)
        except:
            rating = np.nan

        # seller since
        try:
            seller.click()
            driver.switch_to.active_element 
            seller_since = driver.find_element_by_class_name('_1gltgT').text
        except:
            seller_since = np.nan

        # seller scores
        try:
            scores = driver.find_elements_by_class_name('PRNS4f')
            seller_prod_quality = float(scores[0].text)
            seller_service_quality = float(scores[1].text)
        except:
            seller_prod_quality = np.nan
            seller_service_quality = np.nan

    except:
        seller_name = np.nan
        rating = np.nan
        seller_since = np.nan
        seller_prod_quality = np.nan
        seller_service_quality = np.nan

    # update dictionary
    row_dict["Product"] = product_name 
    row_dict["Product_Rating"] = float(prod_rating)
    row_dict["No_of_Ratings_and_Reviews"] = str(rat_and_revs)
    row_dict["Seller_Name"] = seller_name
    row_dict["Seller_Rating"] = rating
    row_dict["Seller_Since"] = seller_since
    row_dict["Seller_Product_Quality"] = seller_prod_quality
    row_dict["Seller_Service_Quality"] = seller_service_quality    
    
    return row_dict


### Inputs need to configure

In [13]:
URL = "https://www.flipkart.com/"
attributes = ['Query','Rank','Product','Product_Rating','is_BestSeller','is_BestValue','is_Trending','is_Ad','is_FAssured',
              'No_of_Ratings_and_Reviews','Seller_Name','Seller_Rating','Seller_Since','Seller_Product_Quality',
              'Seller_Service_Quality']
queries = ['iron box','diapers','power bank']
MAX_PAGE_NUM = 2

df = pd.DataFrame()

### Main

In [9]:
driver = webdriver.Chrome()
driver.get(URL)

close_popup = driver.find_element_by_xpath('//button[contains(text(),"✕")]')
close_popup.click()

for query in queries:
    # Search for a product
    search = driver.find_element_by_name("q")
    search.clear()
    search.send_keys(query)
    search.send_keys(Keys.RETURN)
    print("# Query: {0} #".format(query))
    start = time.time()

    exit = False
    page_no = 1
    no_of_products_traversed = 0
    while True:
        driver.implicitly_wait(10)
        print("-------Page No: {0}-------".format(page_no))
        
        try:
            main = driver.find_elements_by_xpath("//*[@class='_1HmYoV _35HD7C']")[1]
            rows = main.find_elements_by_css_selector('.bhgxx2.col-12-12')
            for i,row in enumerate(rows):
                products = row.find_elements_by_xpath(".//*[@data-id]")
                if len(products) > 0:      
                    for j,product in enumerate(products):
                        row_dict = dict.fromkeys(attributes)
                        
                        # Update dictionary
                        row_dict["Query"] = query
                        row_dict["Rank"] = no_of_products_traversed + 1

                        row_dict = get_product_details(driver,product,row_dict)
                        row_df = pd.DataFrame(row_dict,index = [0])
                        df = pd.concat([df,row_df],axis = 0)
                        
                        no_of_products_traversed = no_of_products_traversed + 1
                else:
                    if page_no == MAX_PAGE_NUM: # if page limit is reached then exit
                        exit = True
                        break;
                    else:
                        # check for existence of navigation bar
                        try:
                            nav_bar = row.find_element_by_tag_name('nav')
                            
                            # Navigate to next page
                            nav_links = nav_bar.find_elements_by_tag_name('a')
                            if len(nav_links) > page_no:
                                next_page_link = nav_links[page_no].get_attribute('href')
                                page_no = page_no+1
                                driver.get(next_page_link)
                                break;
                            else:
                                exit = True
                        except:
                            exit = True
                            break;
                        
            if exit:
                print("-------Done-------")
                break;

        except Exception as e:
            print(e)
#             break;
    end = time.time()
    
    print('Total time taken for query-{0} is {1}'.format(query,str(datetime.timedelta(seconds = end-start))))

# Query: iron box #
-------Page No: 1-------
Product No 1 : Bajaj Majesty DX 6 1000 W Dry Iron  (White)
Product No 2 : Orient Electric Fabri Press DIFP10BP 1000 W Dry Iron  (Blue, Black)
Product No 3 : Usha EI 1602 1000 W Dry Iron  (White)
Product No 4 : Bajaj Majesty MX3 1250 W Steam Iron  (Purple, White)
Product No 5 : Nova Plus 1100 w Amaze NI 10 1100 W Dry Iron  (Grey & Turquoise)
Product No 6 : Flipkart SmartBuy Non-Stick 1000W Dry Iron
Product No 7 : Flipkart SmartBuy 1000 W Dry Iron
Product No 8 : Nova Plus 1100 w Amaze NI 20 1100 W Dry Iron  (white & Turquoise)
Product No 9 : Flipkart SmartBuy 1000 W Dry Iron
Product No 10 : Philips GC1905 Steam Iron, 1440 W
Product No 11 : Bajaj DX 2 L/W Dry Iron
Product No 12 : Bajaj DX 7 Light Weight 1000 W Dry Iron  (White)
Product No 13 : Crompton INSTAGLIDE IRON 1000 W Dry Iron  (BLACK AND BROWN)
Product No 14 : Bajaj MX 16 1400 W Steam Iron  (Pink, Black)
Product No 15 : Nova Plus 1100 w Amaze NI 40 1100 W Dry Iron  (White, Pink)
Product

Product No 44 : Huggies Premium Soft Pants, Extra Large size diaper pants, 40 count - XL  (40 Pieces)
Product No 45 : Himalaya Total Care Baby pants - M  (54 Pieces)
Product No 46 : Pampers New Extra Extra Large Size Diapers Pants (16 Count) - XXL  (16 Pieces)
Product No 47 : Pampers Baby-Dry Diapers - L  (60 Pieces)
Product No 48 : Huggies Wonder Pants diapers - L  (32 Pieces)
Product No 49 : Huggies Wonder Pants diapers -Combo pack - XL  (76 Pieces)
Product No 50 : Huggies Premium Soft Pants Monthly pack, Extra Large size diaper pants, 80 count - XL  (80 Pieces)
Product No 51 : Huggies Wonder Pants diapers -Combo pack - S  (120 Pieces)
Product No 52 : Billion Extra Absorb Diaper Pants - M  (56 Pieces)
Product No 53 : Huggies Dry pants diapers - M  (16 Pieces)
Product No 54 : Huggies Premium Soft Pants Sumo Monthly pack Medium size - M  (186 Pieces)
Product No 55 : Huggies Wonder Pants diapers - M  (38 Pieces)
Product No 56 : Pampers Diapers Pants - XXXL  (23 Pieces)
Product No 57 : H

Product No 44 : Flipkart SmartBuy 10000 mAh Power Bank (Fast Charging, 12 W)  (White, Lithium Polymer)
Product No 45 : iBall 10000 mAh Power Bank  (Black, Lithium Polymer)
Product No 46 : Oswaal CBSE Question Bank Class 12 Mathematics Book Chapterwise & Topicwise Includes Objective Types & MCQ's  (English, Undefined, unknown)
Product No 47 : QIPS H.M.INTERNATIONAL HIGH QUALITY METAL COIN BANK - S-237 [BLUE] Coin Bank  (Blue)
Product No 48 : Zeus ATM Piggy Bank Mini ATM Coin Bank Electronic Money Safe Toy Kids Gift with secret code lock Coin Bank  (Multicolor)
Product No 49 : Tuelip Combo of Two Cute Hello Kitty And Doremon Piggy Bank Coin Bank  (Multicolor)
Product No 50 : Flipkart SmartBuy 10000 mAh Power Bank (Fast Charging, 10 W)  (Black, Lithium Polymer)
Product No 51 : Zest 4 Toyz Smart Electronic Robot Style Password Protected Money Bank for Kids with Talking Function Coin Bank  (White)
Product No 52 : kiti kits Fingerprint Sensing Money Saving Bank With Double Protected Password

In [10]:
df = df.reset_index(drop = True)
df.to_csv('flipkart_data_'+str(dt.now().strftime("%d_%b_%H:%M:%p")+'.csv'),index = False)

In [164]:
driver.quit()

In [None]:
# Furniture
# Sports
# Fitness
# Remote Control Toys
# Air Conditioners
# Washing Machine
# Iron box
# Diapers
# power bank