In [149]:
!pip install selenium
!apt-get update
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension

import sys
sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver')
from google.colab import files
import pandas as import pd
from tqdm.notebook import tqdm as tqdm

import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from ipywidgets import widgets, IntProgress, interact
from IPython.display import display, clear_output
import datetime

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')


date_today=datetime.datetime.strftime(datetime.datetime.now(),"%Y-%m-%d")

range_list=[(0,150),(150,250),(250,500),(500,"")]

page = "https://www.amazon.in/gp/browse.html?node=1380374031&ref_=nav_em_0_2_11_8_sbc_hk_decor"

def look_for_item(search_item,page,driver):
    driver.get(page)
    elem=driver.find_element_by_id("twotabsearchtextbox")
    elem.send_keys(search_item)
    elem.send_keys(Keys.ENTER)

def assert_nonprime(driver):
    clear_elements = driver.find_elements_by_class_name("a-link-normal.s-navigation-item.s-navigation-clear-link")
    if len(clear_elements)!=0:
        driver.find_element_by_class_name("a-link-normal.s-navigation-item.s-navigation-clear-link").click()

def set_range(low,high,driver):
    driver.find_element_by_id("low-price").clear()
    driver.find_element_by_id("low-price").send_keys(low)
    
    driver.find_element_by_id("high-price").clear()        
    driver.find_element_by_id("high-price").send_keys(high)
    driver.find_element_by_xpath('//*[@id="a-autoid-1"]/span/input').click()

def number_of_results(driver):
    text=driver.find_element_by_class_name('sg-col-inner').text
    num_results=text.split("results",maxsplit=1)[0].strip().split(" ",maxsplit=-1)[-1].strip()
    num_results=''.join([x for x in num_results if x.isdigit()==True])
    try:
        num_results=int(num_results)
    except ValueError as err:
        num_results=0
    return num_results

def switch_to_prime(driver):
    driver.find_element_by_class_name("a-link-normal.s-navigation-item").click()

def scrape_results(products,range_list=range_list):
    data=pd.DataFrame()
    driver = webdriver.Chrome(chromepath)
    driver.get(page)
    date_today=datetime.datetime.strftime(datetime.datetime.now(),"%Y-%m-%d")
    pbar = IntProgress(min=0, max=len(products))
    display(pbar)
    for s, search_item in enumerate(products):
        look_for_item(search_item,page,driver)
        driver.implicitly_wait(10)
        total_all    = number_of_results(driver=driver)
        switch_to_prime(driver)
        total_prime  =  number_of_results(driver=driver)

        data.loc[s,'Item']                = search_item
        data.loc[s,'Number of Results (Total)']     = total_all
        data.loc[s,'Number of Results (Prime)']   = total_prime

        for r, RANGE in enumerate(range_list):
            low, high = str(RANGE[0]), str(RANGE[1])
            assert_nonprime(driver)
            set_range(low,high,driver=driver)

            num_all = number_of_results(driver=driver)
            range_string_1 ="All Results in Range "+str(low)+"-"+str(high)
            data.loc[s,range_string_1] = num_all

            switch_to_prime(driver)
            num_prime = number_of_results(driver=driver)                
            range_string_2 ="Prime Results in Range "+str(low)+"-"+str(high)
            data.loc[s,range_string_2] = num_prime
        pbar.value += 1
    data.loc[:,'data_date']=date_today
    return data

item_widget=widgets.Text(description='Type here')

add_button = widgets.Button(description="Add product",
                        button_style='info',
                        tooltip='Click me',
                        icon='check')

output = widgets.Output()


products=list()
def on_button_clicked(b):
    with output:        
        print("------------")
        product_name = item_widget.value

        if len(product_name)<1:
            print("Not added, type something and click on button again")
        else:
            print(product_name, " - Added")
            
            products.append(product_name)
add_button.on_click(on_button_clicked)

review_button = widgets.Button(description="Review Products",
                               button_style='primary',
                               tooltip='Click me',
                               icon='check'
                              )

output = widgets.Output()

def review_button_clicked(b,products=products):
    with output:
        if len(products)>0:
            clear_output()
            products= list(set(products))
            for p, product in enumerate(products):
                print("Product number-",p+1,"-",product)
        else:
            print("No Items added yet")
            
review_button.on_click(review_button_clicked)

def download_file(data,date_today):
    file_name = 'amzn '+ date_today + '.xlsx'
    data.to_excel(file_name,index=False)
    files.download(file_name)

download_button = widgets.Button(description="Download file",button_style='success',tooltip='Click me',icon='check')
download_output=widgets.Output()
def download_button_clicked(b):
    with download_output:
        download_file(data,date_today)
        print("File downloaded")
        
download_button.on_click(download_button_clicked)

confirm_button = widgets.Button(description="Confirm & Begin",
                                button_style='info',
                                tooltip='Click me',
                                icon='check')

confirm_output = widgets.Output()
pbar = IntProgress(min=0, max=len(products))

def review_button_clicked(b):
    with confirm_output:
        data=pd.DataFrame()
        driver = webdriver.Chrome('chromedriver',chrome_options=chrome_options)
        driver.get(page)
        date_today=datetime.datetime.strftime(datetime.datetime.now(),"%Y-%m-%d")

        for s, search_item in enumerate(products):

            look_for_item(search_item,page,driver)
            driver.implicitly_wait(10)
            total_all    = number_of_results(driver=driver)
            switch_to_prime(driver)
            total_prime  =  number_of_results(driver=driver)

            data.loc[s,'Item']                = search_item
            data.loc[s,'Number of Results (Total)']     = total_all
            data.loc[s,'Number of Results (Prime)']   = total_prime

            for r, RANGE in enumerate(range_list):
                low, high = str(RANGE[0]), str(RANGE[1])
                assert_nonprime(driver)
                set_range(low,high,driver=driver)

                num_all = number_of_results(driver=driver)
                range_string_1 ="All Results in Range "+str(low)+"-"+str(high)
                data.loc[s,range_string_1] = num_all

                switch_to_prime(driver)
                num_prime = number_of_results(driver=driver)                
                range_string_2 ="Prime Results in Range "+str(low)+"-"+str(high)
                data.loc[s,range_string_2] = num_prime
            pbar.value += 1
        data.loc[:,'data_date']=date_today
        display(widgets.VBox([download_button,download_output]))
confirm_button.on_click(review_button_clicked)

In [150]:
display(widgets.VBox([widgets.HBox([item_widget,add_button]),output]))

VBox(children=(HBox(children=(Text(value='', description='Type here'), Button(button_style='info', description…

In [151]:
display(widgets.VBox([review_button,output]))

VBox(children=(Button(button_style='primary', description='Review Products', icon='check', style=ButtonStyle()…

In [152]:
display(widgets.VBox([widgets.HBox([confirm_button,pbar]),confirm_output]))

VBox(children=(HBox(children=(Button(button_style='info', description='Confirm & Begin', icon='check', style=B…