# Web Scrapping with Selenium

### Content:
1. [Naukri.com Data Analyst Jobs](#1)
2. [Naukri.com Data Scientist Jobs](#2)
3. [Data Scientist in Delhi/NCR with 3-6 LPA Naukri.com](#3)
4. [Glassdoor Data Scientist Jobs in Noida](#4)
5. [Data Scientist in Noida Salary Data from Glassboor](#5)
6. [Flipkart Sunglasses Data](#6)
7. [iPhone 11 Reviews](#7)
8. [Flipkart Snearkers Data](#8)
9. [Myntra Black Shoes](#9)
10. [Amazon i7 and i9 Laptops](#10)

In [1]:
import pandas as pd
import selenium
import time

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait

#!pip install clipboard
import clipboard

import warnings
warnings.filterwarnings('ignore')

###### 1. Naukri.com Data Analyst Jobs <a name = "1"></a>

In [2]:
driver = webdriver.Chrome('./chromedriver')

In [3]:
driver.get('https://www.naukri.com/')

In [4]:
jobName = 'Data Analyst'
jobSearchLocation = 'Bangalore/Bengaluru'

In [5]:
#Filling the job search bar
jobSearchBar = driver.find_element_by_xpath('//*[@id="qsb-keyword-sugg"]')
jobSearchBar.send_keys(jobName)

In [6]:
#Filling the job location bar
jobLocationBar = driver.find_element_by_xpath('//*[@id="qsb-location-sugg"]')
jobLocationBar.send_keys(jobSearchLocation)

In [7]:
#clicking the search button
jobSearchButton = driver.find_element_by_xpath('//*[@id="root"]/div[3]/div[2]/section/div/form/div[3]/button')
jobSearchButton.click()

In [8]:
#Iterating and getting the first 10 job titles
jobTitle = []

for index in range(1,11):
    title = driver.find_element_by_xpath(f'//*[@id="root"]/div[3]/div[2]/section[2]/div[2]/article[{index}]/div[1]/div[1]/a')
    jobTitle.append(title.text)

jobTitle

['Data Scientist / Data Analyst -Business Analyst',
 'Hiring Data Analysts For E commerce Platform || WFH',
 'Data Analyst',
 'Hiring For Data Analyst/ MIS Reporting Analyst - Bangalore',
 'DA - Urgent Opening For Data Analyst BFSI Domain - Pan India',
 'Data Analyst - Informatica MDM',
 'Assistant Vice President - MIS & Reporting ( Business Data Analyst)',
 'Data Analyst',
 'Data Analyst',
 'Data Analyst']

In [9]:
#Iterating and getting the first 10 job locations
jobLocation = []

for index in range(1,11):
    location = driver.find_element_by_xpath(f'//*[@id="root"]/div[3]/div[2]/section[2]/div[2]/article[{index}]/div[1]/div[1]/ul/li[3]/span')
    jobLocation.append(location.text)

jobLocation

['Mumbai, Hyderabad/Secunderabad, Pune, Gurgaon/Gurugram, Chennai, Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Kolkata, Hyderabad/Secunderabad, Pune, Ahmedabad, Chennai, Bangalore/Bengaluru, Delhi / NCR, Mumbai (All Areas)',
 'Bangalore/Bengaluru',
 'Mumbai, Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru',
 'Bangalore/Bengaluru']

In [10]:
#Iterating and getting the first 10 hiring firms
jobFirm = []

for index in range(1,11):
    firm = driver.find_element_by_xpath(f'//*[@id="root"]/div[3]/div[2]/section[2]/div[2]/article[{index}]/div[1]/div[1]/div/a[1]')
    jobFirm.append(firm.text)

jobFirm

['Inflexion Analytix Private Limited',
 'Allegis Services India Pvt. Ltd.',
 'Applied Materials',
 'PHARMACEUTICAL RESEARCH ASSOCIATES INDIA Pvt Ltd',
 'Tata Consultancy Services Ltd.',
 'Shell India Markets Private Limited',
 'INTERTRUSTVITEOS CORPORATE AND FUND SERVICES PVT. LTD.',
 'Myntra Designs Pvt. Ltd.',
 'Myntra Designs Pvt. Ltd.',
 'Myntra Designs Pvt. Ltd.']

In [11]:
#Iterating and getting the first 10 job experience requirements
jobExp = []

for index in range(1,11):
    exp = driver.find_element_by_xpath(f'//*[@id="root"]/div[3]/div[2]/section[2]/div[2]/article[{index}]/div[1]/div/ul/li[1]/span')
    jobExp.append(exp.text.split()[0])
    
jobExp

['0-3', '0-5', '7-10', '2-4', '4-9', '6-9', '12-18', '3-6', '3-6', '4-9']

In [12]:
#Makign a job dataframe with all the scrapped information
dataAnalystJobs = pd.DataFrame()

dataAnalystJobs['Title'] = jobTitle
dataAnalystJobs['Location'] = jobLocation
dataAnalystJobs['Company Name'] = jobFirm
dataAnalystJobs['Experience in Years'] = jobExp

dataAnalystJobs

Unnamed: 0,Title,Location,Company Name,Experience in Years
0,Data Scientist / Data Analyst -Business Analyst,"Mumbai, Hyderabad/Secunderabad, Pune, Gurgaon/...",Inflexion Analytix Private Limited,0-3
1,Hiring Data Analysts For E commerce Platform |...,Bangalore/Bengaluru,Allegis Services India Pvt. Ltd.,0-5
2,Data Analyst,Bangalore/Bengaluru,Applied Materials,7-10
3,Hiring For Data Analyst/ MIS Reporting Analyst...,Bangalore/Bengaluru,PHARMACEUTICAL RESEARCH ASSOCIATES INDIA Pvt Ltd,2-4
4,DA - Urgent Opening For Data Analyst BFSI Doma...,"Kolkata, Hyderabad/Secunderabad, Pune, Ahmedab...",Tata Consultancy Services Ltd.,4-9
5,Data Analyst - Informatica MDM,Bangalore/Bengaluru,Shell India Markets Private Limited,6-9
6,Assistant Vice President - MIS & Reporting ( B...,"Mumbai, Bangalore/Bengaluru",INTERTRUSTVITEOS CORPORATE AND FUND SERVICES P...,12-18
7,Data Analyst,Bangalore/Bengaluru,Myntra Designs Pvt. Ltd.,3-6
8,Data Analyst,Bangalore/Bengaluru,Myntra Designs Pvt. Ltd.,3-6
9,Data Analyst,Bangalore/Bengaluru,Myntra Designs Pvt. Ltd.,4-9


In [13]:
#Closing the browers
driver.close()

###### 2. Naukri.com Data Scientist Jobs <a name = "2"></a>

In [14]:
driver = webdriver.Chrome('./chromedriver')
driver.get('https://www.naukri.com/')

In [15]:
jobName = 'Data Scientist'
jobSearchLocation = 'Bangalore/Bengaluru'

In [16]:
#filling the job title bar
jobSearchBar = driver.find_element_by_xpath('//*[@id="qsb-keyword-sugg"]')
jobSearchBar.send_keys(jobName)

In [17]:
#filling the job location bar
jobLocationBar = driver.find_element_by_xpath('//*[@id="qsb-location-sugg"]')
jobLocationBar.send_keys(jobSearchLocation)

In [18]:
#Clicking on search
jobSearchButton = driver.find_element_by_xpath('//*[@id="root"]/div[3]/div[2]/section/div/form/div[3]/button')
jobSearchButton.click()

In [19]:
#inspecting the first job listing to study the structure
jobInfoTile = driver.find_element_by_xpath(f'//*[@id="root"]/div[3]/div[2]/section[2]/div[2]/article[1]/div[1]/div').text.split('\n')
jobInfoTile

['Data Scientist / Data Analyst -Business Analyst',
 'Inflexion Analytix Private Limited',
 '0-3 Yrs',
 '3,50,000 - 4,50,000 PA.',
 'Mumbai, Hyderabad/Secunderabad, Pune, Gurgaon/Gurugram, Chennai, Bangalore/Bengaluru']

In [20]:
#Getting job link for full job description
jobDescLink = driver.find_element_by_xpath(f'//*[@id="root"]/div[3]/div[2]/section[2]/div[2]/article[1]/div[1]/div[1]/a').get_attribute('href')
jobDescLink

'https://www.naukri.com/job-listings-data-scientist-data-analyst-business-analyst-inflexion-analytix-private-limited-mumbai-hyderabad-secunderabad-pune-gurgaon-gurugram-chennai-bangalore-bengaluru-0-to-3-years-100521000368?src=jobsearchDesk&sid=16212332571492194&xp=1&px=1'

In [21]:
jobInfoTile[-1]

'Mumbai, Hyderabad/Secunderabad, Pune, Gurgaon/Gurugram, Chennai, Bangalore/Bengaluru'

In [64]:
def jobInfo(tileNumber):
    """This function will return all job related information from naukri.com
    It will visit each job listing to get the full job description. However, some companies have their own
    custom page within naukri.com and these custom pages do not follow a predictable HTML structure. Therefore
    whenever such a custom page is encounted, it adds Full Job Description not available in the jobDesc variable"""
    
    jobInfoTile = driver.find_element_by_xpath(f'//*[@id="root"]/div[3]/div[2]/section[2]/div[2]/article[{tileNumber}]/div[1]/div').text.split('\n')
    jobTitle = jobInfoTile[0]
    jobFirm = jobInfoTile[1]
    jobLocation = jobInfoTile[-1]
    
    jobDescLink = driver.find_element_by_xpath(f'//*[@id="root"]/div[3]/div[2]/section[2]/div[2]/article[{tileNumber}]/div[1]/div[1]/a').get_attribute('href')
    
    try:
        driver.get(jobDescLink)
        jobDesc = driver.find_element_by_xpath('//*[@id="root"]/main/div[2]/div[2]/section[2]').text.split('\n')[1::]
        jobDesc = ''.join(jobDesc)
        driver.back()
        
    except:
        jobDesc = 'Full Job decription not available'
        driver.back()
        
    finally:
        return jobTitle, jobFirm, jobLocation, jobDesc
        

In [23]:
#Getting jo info for the first 10 job listing

jobTitle = []
jobFirm = []
jobLocation = []
jobDesc = []

for index in range(1,11):
    print(f'Getting Job No. {index}')
    time.sleep(2)
    try:
        t ,f, l, d = jobInfo(index)
    
    except:
        print('Except block executed')
        continue
    
    else:
        time.sleep(3)
        jobTitle.append(t)
        jobFirm.append(f)
        jobLocation.append(l)
        jobDesc.append(d)
    print(f'\tSuccessfully collected Job {index} Info')

Getting Job No. 1
	Successfully collected Job 1 Info
Getting Job No. 2
	Successfully collected Job 2 Info
Getting Job No. 3
	Successfully collected Job 3 Info
Getting Job No. 4
	Successfully collected Job 4 Info
Getting Job No. 5
	Successfully collected Job 5 Info
Getting Job No. 6
	Successfully collected Job 6 Info
Getting Job No. 7
	Successfully collected Job 7 Info
Getting Job No. 8
	Successfully collected Job 8 Info
Getting Job No. 9
	Successfully collected Job 9 Info
Getting Job No. 10
	Successfully collected Job 10 Info


In [24]:
#Making a job dataframe
jobDf = pd.DataFrame()

jobDf['Title'] = jobTitle
jobDf['Firm'] = jobFirm
jobDf['Location'] = jobLocation
jobDf['Description'] = jobDesc

jobDf

Unnamed: 0,Title,Firm,Location,Description
0,Data Scientist / Data Analyst -Business Analyst,Inflexion Analytix Private Limited,"Mumbai, Hyderabad/Secunderabad, Pune, Gurgaon/...",Job Role : Data Scientist/Data Analyst /Busine...
1,"Senior Data Scientist, Modeling",Nielsen,"Kolkata, Gurgaon/Gurugram, Bangalore/Bengaluru...","We wont say we can predict the future, but our..."
2,Data Scientist - IBM Garage,IBM India Pvt. Limited,"Noida, Hyderabad/Secunderabad, Bangalore/Benga...",Full Job decription not available
3,Data Scientist,IBM India Pvt. Limited,"Noida, Hyderabad/Secunderabad, Bangalore/Benga...",Full Job decription not available
4,Senior Data Scientist - Credit risk,Scienaptic Systems,Bangalore/Bengaluru,Responsibilities and duties Focus on developin...
5,Big Data - Data Scientist,Xoriant Solutions Pvt Ltd,"Kochi/Cochin, Indore, Hyderabad/Secunderabad, ...",Full Job decription not available
6,Big Data - Data Scientist,Xoriant Solutions Pvt Ltd,"Kochi/Cochin, Indore, Hyderabad/Secunderabad, ...",Full Job decription not available
7,Lead Data Scientist,Intel Technology India Pvt Ltd,Bangalore/Bengaluru,We are seeking an outstanding Lead Data Scient...
8,Senior Data Scientist - Chatbot & NLP,Gojek Tech,Bangalore/Bengaluru,"What You Will DoWork with Data Scientists, Mac..."
9,Senior Data Scientist - Chatbot & NLP,GO-JEK India,Bangalore/Bengaluru,"What You Will DoWork with Data Scientists, M..."


In [25]:
driver.close()

###### 3. Data Scientist in Delhi/NCR with 3-6 LPA Naukri.com <a name = "3"></a>

In [39]:
driver = webdriver.Chrome('./chromedriver')
driver.get('https://www.naukri.com/')

In [40]:
jobName = 'Data Scientist'
jobSearchLocation = 'Delhi/NCR'

In [41]:
#Filling the job search bar
jobSearchBar = driver.find_element_by_xpath('//*[@id="qsb-keyword-sugg"]')
jobSearchBar.send_keys(jobName)

In [42]:
#Filling the job location bar
jobLocationBar = driver.find_element_by_xpath('//*[@id="qsb-location-sugg"]')
jobLocationBar.send_keys(jobSearchLocation)

In [43]:
#clicking the search button
jobSearchButton = driver.find_element_by_xpath('//*[@id="root"]/div[3]/div[2]/section/div/form/div[3]/button')
jobSearchButton.click()

In [44]:
#applying location filter to show job listings for Delhi/NCR
time.sleep(2)
driver.find_element_by_xpath('//*[@id="root"]/div[3]/div[2]/section[1]/div[2]/div[2]/div[2]/div[1]/label/p').click()

In [46]:
#applying location filter to show job listings for 3-6 lahks salary
time.sleep(2)
driver.find_element_by_xpath('//*[@id="root"]/div[3]/div[2]/section[1]/div[2]/div[3]/div[2]/div[2]/label/p').click()

In [47]:
#Getting the first 10 job title
jobTitle = []

for index in range(1,11):
    title = driver.find_element_by_xpath(f'//*[@id="root"]/div[3]/div[2]/section[2]/div[2]/article[{index}]/div[1]/div[1]/a')
    jobTitle.append(title.text)

jobTitle

['Data Scientist / Data Analyst -Business Analyst',
 'Data Scientist - High growth VC backed Influencer Marketplace',
 'Excellent opportunity For Data Scientist',
 'Data Scientist',
 'DATA Scientist – Gurgaon (Exp 3-6 years)',
 'DATA Scientist – Gurgaon (Exp 3-6 years)',
 'Data Scientist - Noida',
 "Data Scientist - Noida/ B'lore",
 'Data Scientist',
 'Data Scientist']

In [48]:
#Getting the first 10 hiring firms
jobFirm = []

for index in range(1,11):
    firm = driver.find_element_by_xpath(f'//*[@id="root"]/div[3]/div[2]/section[2]/div[2]/article[{index}]/div[1]/div[1]/div/a[1]')
    jobFirm.append(firm.text)

jobFirm

['Inflexion Analytix Private Limited',
 'Ravgins International Pvt. Ltd.',
 'NEC CORPORATION INDIA PRIVATE LIMITED',
 'Mobikwik',
 'CRESCENDO GLOBAL LEADERSHIP HIRING INDIA PRIVATE L IMITED',
 'CRESCENDO GLOBAL LEADERSHIP HIRING INDIA PRIVATE L IMITED',
 'Optum Global Solutions (India) Private Limited',
 'NEC CORPORATION INDIA PRIVATE LIMITED',
 'Cloudstrats Technologies Private Limited',
 'IBM India Pvt. Limited']

In [49]:
#Getting the first 10 job experience requirement

jobExp = []

for index in range(1,11):
    exp = driver.find_element_by_xpath(f'//*[@id="root"]/div[3]/div[2]/section[2]/div[2]/article[{index}]/div[1]/div/ul/li[1]/span')
    jobExp.append(exp.text.split()[0])
    
jobExp

['0-3', '3-5', '3-7', '3-5', '3-6', '3-6', '3-5', '3-8', '5-8', '4-9']

In [50]:
#Getting the first 10 job location

jobLocation = []

for index in range(1,11):
    location = driver.find_element_by_xpath(f'//*[@id="root"]/div[3]/div[2]/section[2]/div[2]/article[{index}]/div[1]/div[1]/ul/li[3]/span')
    jobLocation.append(location.text)

jobLocation

['Mumbai, Hyderabad/Secunderabad, Pune, Gurgaon/Gurugram, Chennai, Bangalore/Bengaluru',
 'Bangalore/Bengaluru, Delhi / NCR, Mumbai (All Areas)',
 'Noida, Bangalore/Bengaluru',
 'New Delhi, Gurgaon/Gurugram, Delhi / NCR',
 'Gurgaon/Gurugram, Delhi / NCR',
 'Gurgaon/Gurugram, Delhi / NCR',
 'Noida',
 'Noida, Bangalore/Bengaluru',
 'Bangalore/Bengaluru, Delhi / NCR, Mumbai (All Areas)',
 'Noida, Hyderabad/Secunderabad, Bangalore/Bengaluru']

In [51]:
#Making a job dataframe

analystJobsDelhiNCR = pd.DataFrame()

analystJobsDelhiNCR['Title'] = jobTitle
analystJobsDelhiNCR['Location'] = jobLocation
analystJobsDelhiNCR['Company Name'] = jobFirm
analystJobsDelhiNCR['Experience in Years'] = jobExp

analystJobsDelhiNCR

Unnamed: 0,Title,Location,Company Name,Experience in Years
0,Data Scientist / Data Analyst -Business Analyst,"Mumbai, Hyderabad/Secunderabad, Pune, Gurgaon/...",Inflexion Analytix Private Limited,0-3
1,Data Scientist - High growth VC backed Influen...,"Bangalore/Bengaluru, Delhi / NCR, Mumbai (All ...",Ravgins International Pvt. Ltd.,3-5
2,Excellent opportunity For Data Scientist,"Noida, Bangalore/Bengaluru",NEC CORPORATION INDIA PRIVATE LIMITED,3-7
3,Data Scientist,"New Delhi, Gurgaon/Gurugram, Delhi / NCR",Mobikwik,3-5
4,DATA Scientist – Gurgaon (Exp 3-6 years),"Gurgaon/Gurugram, Delhi / NCR",CRESCENDO GLOBAL LEADERSHIP HIRING INDIA PRIVA...,3-6
5,DATA Scientist – Gurgaon (Exp 3-6 years),"Gurgaon/Gurugram, Delhi / NCR",CRESCENDO GLOBAL LEADERSHIP HIRING INDIA PRIVA...,3-6
6,Data Scientist - Noida,Noida,Optum Global Solutions (India) Private Limited,3-5
7,Data Scientist - Noida/ B'lore,"Noida, Bangalore/Bengaluru",NEC CORPORATION INDIA PRIVATE LIMITED,3-8
8,Data Scientist,"Bangalore/Bengaluru, Delhi / NCR, Mumbai (All ...",Cloudstrats Technologies Private Limited,5-8
9,Data Scientist,"Noida, Hyderabad/Secunderabad, Bangalore/Benga...",IBM India Pvt. Limited,4-9


In [52]:
driver.close()

###### 4. Glassdoor Data Scientist Jobs in Noida <a name = "4"></a>

In [53]:
#Specifying the job search parameters
jobName = 'Data Scientist'
jobSearchLocation = 'Noida'

Note: Glassdoor requires users to sign up or be logged-in in order to perform job search. Attempts were made to automate the sign-up using a disposable email Id from <em><a href = 'https://www.temp-mail.org/'>this</a></em> website. However, it proved to be very difficult as the sign-up process requires a lot of inputs which needs to be selected from a dropdown menu and the drop-down menus are populated using some AJAX call so there is no way to find the menu items beforehand. 

An attempt to automatically sign-in to glassdoor using 'Sign-in with Google' was also tried. But Google doesn't recognise the Chrome browser when it is being controled using Selenium and blocks any sign-in attempts. 

The only way to bypass sign-in into a Glassdoor account and make job search on the website is to search on Google the phrase 'glassdoor' foloowed by job title and location. Example: 'Glassdoor data scientist noida'. The first Google search result will direct us to the make we require to scrape. This is exactly what is being done below.

In [54]:
#Making a Selenium Driver object for Google and opening the Google Homepage
driverGoogle = webdriver.Chrome('./chromedriver.exe')
driverGoogle.get(f'https://www.google.com/')
#Populating the Google Search Bar with our search term and clicking the search button
driverGoogle.find_element_by_xpath('/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input').send_keys(f'glassdoor {jobName} {jobSearchLocation}')
driverGoogle.find_element_by_xpath('/html/body/div[1]/div[3]/form/div[1]/div[1]/div[3]/center/input[1]').click()

#Getting the link to Glassdoor from Google Search Results
glassdoorLink = driverGoogle.find_element_by_xpath('//*[@id="rso"]/div[1]/div/div/div[1]/a').get_attribute('href')
#Closing the Google window
driverGoogle.close()

In [55]:
#Making a Selenium Driver object for Glassdoor and opening the Glassdoor page using the link obtained from Google
driverGlassdoor = webdriver.Chrome('./chromedriver')
driverGlassdoor.get(glassdoorLink)

In [58]:
#Getting the first job listing fomr Glassdoor and inspecting it structure so that we can use for loop later to 
#scrape all the other job listings
jobListTile = driverGlassdoor.find_element_by_xpath('//*[@id="MainCol"]/div[1]/ul/li[1]')

s = jobListTile.text.split('\n') #//*[@id="MainCol"]/div[1]/ul/li[2]

s

['3.8',
 'Biz2Credit Inc',
 'Data Scientist',
 'Noida',
 'Easy Apply',
 '30d+',
 'Continuously evaluate alternative data sources and structures to document and improve the efficacy of our customer conversion models and processes.…']

In [59]:
#Making empty list for firm, ratings and no. of days ago the job was posted
#Note: The listings with missing Company Ratings are skipped
rating = []
firm = []
jobPosted = []
      
for i in range(1,20):
    jobListTile = driverGlassdoor.find_element_by_xpath(f'//*[@id="MainCol"]/div[1]/ul/li[{i}]').text.split('\n')
    if len(jobListTile[0]) > 3:
        continue
    else:
        rating.append(jobListTile[0])
        firm.append(jobListTile[1])
        jobPosted.append(jobListTile[-2])

In [60]:
#Keeping only the first 10 job details
rating = rating[0:10]
firm = firm[0:10]
jobPosted = jobPosted[0:10]

In [61]:
#making a dataframe with al job details and displaying them
glassdoorDSNoida = pd.DataFrame()

glassdoorDSNoida['Firm Name'] = firm
glassdoorDSNoida['Rating'] = rating
glassdoorDSNoida['Posted (hours/days ago)'] = jobPosted

glassdoorDSNoida

Unnamed: 0,Firm Name,Rating,Posted (hours/days ago)
0,Biz2Credit Inc,3.8,30d+
1,Ericsson,4.1,11d
2,Techlive,5.0,Noida
3,CRMNEXT,3.6,Noida
4,CRMNEXT,3.6,Noida
5,WishFin,3.8,Noida
6,Adobe,4.4,Noida
7,UnitedHealth Group,3.6,Gurgaon
8,dunnhumby,4.1,3d
9,Nitesh Kumar,3.8,7d


In [62]:
driverGlassdoor.close()

######  5. Data Scientist in Noida Salary Data from Glassboor <a name = "5"></a>

In [63]:
driverGlassdoor = webdriver.Chrome('./chromedriver')
driverGlassdoor.get('https://www.glassdoor.co.in/Salaries/index.htm')

In [64]:
#filling the job title bar
driverGlassdoor.find_element_by_xpath('//*[@id="KeywordSearch"]').send_keys('Data Scientist')

In [65]:
#clearing any existing entry and filling the job location bar
driverGlassdoor.find_element_by_xpath('//*[@id="LocationSearch"]').clear()
driverGlassdoor.find_element_by_xpath('//*[@id="LocationSearch"]').send_keys('Noida')

In [66]:
#Clicking the search button
driverGlassdoor.find_element_by_xpath('//*[@id="HeroSearchButton"]').click()

In [67]:
#Getting th first listing from Glassdoor and inspecting it structure so that we can use for loop later to 
#scrape all the other job listings
time.sleep(2) #Will throw error if executed before the page loads. This line ensures we wait for 2 seconds
s = driverGlassdoor.find_element_by_css_selector('#SalariesByCompany > div.module.mb-0 > div:nth-child(4) > div')

In [68]:
#studying the structure of the first listing
s.text.split('\n')

['Data Scientist',
 'Tata Consultancy Services',
 '16 salaries',
 'See 128 salaries from all locations',
 '₹ 6,11,228',
 '/yr',
 '₹343K',
 '₹1,095K']

In [69]:
#Using for loop to get all salary related information from all listing 

firm = []
salaryNum = []
avgSalary = []
minSalary = []
maxSalary = []

for index in range(4, 20): #listing states from 4th child. So the loop starts from 4
    jobListTile = driverGlassdoor.find_element_by_css_selector(f'#SalariesByCompany > div.module.mb-0 > div:nth-child({index}) > div').text.split('\n')
    try:
        print(f'interation {index-3}')
        if len(jobListTile[2])> 11:
            pass
        else:
            firm.append(jobListTile[1])
            salaryNum.append(jobListTile[2])
            avgSalary.append(jobListTile[4])
            minSalary.append(jobListTile[-2])
            maxSalary.append(jobListTile[-1])
    except IndexError:
        continue
    

interation 1
interation 2
interation 3
interation 4
interation 5
interation 6
interation 7
interation 8
interation 9
interation 10
interation 11
interation 12
interation 13
interation 14
interation 15
interation 16


In [70]:
#Making a dataframe of the scraped data and keeping only the first 10 salary data
dsNoidaSalaryData = pd.DataFrame()

dsNoidaSalaryData['Firm'] = firm[0:10]
dsNoidaSalaryData['No. of Salary'] = salaryNum[0:10]
dsNoidaSalaryData['Average Salary'] = avgSalary[0:10]
dsNoidaSalaryData['Minimum Salary'] = minSalary[0:10]
dsNoidaSalaryData['Maximum Salary'] = maxSalary[0:10]

dsNoidaSalaryData

Unnamed: 0,Firm,No. of Salary,Average Salary,Minimum Salary,Maximum Salary
0,Tata Consultancy Services,16 salaries,"₹ 6,11,228",₹343K,"₹1,095K"
1,Accenture,14 salaries,"₹ 11,46,533",₹577K,"₹2,213K"
2,IBM,14 salaries,"₹ 8,97,795",₹586K,"₹2,730K"
3,Ericsson-Worldwide,14 salaries,"₹ 7,38,057",₹355K,"₹1,613K"
4,Delhivery,14 salaries,"₹ 12,39,781",₹450K,"₹11,622K"
5,UnitedHealth Group,11 salaries,"₹ 13,36,142","₹1,069K","₹1,520K"
6,Valiance Solutions,9 salaries,"₹ 8,15,192",₹502K,"₹1,465K"
7,EXL Service,8 salaries,"₹ 11,44,243",₹575K,"₹1,520K"
8,Innovaccer,8 salaries,"₹ 12,07,110",₹620K,"₹1,695K"
9,Cognizant Technology Solutions,6 salaries,"₹ 10,07,410",₹792K,"₹1,263K"


In [71]:
driverGlassdoor.close()

###### 6. Flipkart Sunglasses Data <a name = "6"></a>

In [72]:
driverFlipkart = webdriver.Chrome('./chromedriver.exe')
driverFlipkart.get('https://www.flipkart.com')

In [73]:
#Filling the search bar and clicking search
searchTerm = 'sunglasses'

driverFlipkart.find_element_by_xpath('/html/body/div[2]/div/div/button').click() #Dismissing flipkart login/offer pop-up
driverFlipkart.find_element_by_xpath('//*[@id="container"]/div/div[1]/div[1]/div[2]/div[2]/form/div/div/input').send_keys(searchTerm)
driverFlipkart.find_element_by_xpath('//*[@id="container"]/div/div[1]/div[1]/div[2]/div[2]/form/div/button').click()

In [76]:
#Taking the first element and inspecting the structure to see what combination of commands produces the desired output
#Then we will use these combination 
time.sleep(2)
listings = driverFlipkart.find_elements_by_class_name('_2B099V')

In [77]:
a = listings[2].text.split('\n')
a

['Fastrack',
 'UV Protection Wayfarer Sunglasses (Free Size)',
 '₹758₹89915% off',
 'Or Pay ₹720 + ',
 ' 38',
 'Buy 3 items, save extra 10%']

In [78]:
a[0]

'Fastrack'

In [79]:
a[1]

'UV Protection Wayfarer Sunglasses (Free Size)'

In [80]:
a[2].split('₹')[1]

'758'

In [81]:
a[2].split('₹')[2].split()[-2][-3:-1]

'15'

In [82]:
def sunglassScrapper():
    '''This function scrapes all the listings in Flipkarts page and then goes to the next page by clicking the Next button'''
    brand = []
    proDesc = []
    price = []
    discount = []
    sunglassListing = driverFlipkart.find_elements_by_class_name('_2B099V')
    for index in range(len(sunglassListing)):
        sunglassInfo = sunglassListing[index].text.split('\n')
        brand.append(sunglassInfo[0])
        proDesc.append(sunglassInfo[1])
        price.append(sunglassInfo[2].split('₹')[1])
        discount.append(sunglassInfo[2].split('₹')[2].split()[-2][-3:-1])
    
    #Clicking the next button and waiting for 2 seconds
    try:
        #This block is for page 2 and subsiquent pages where there is 'Previous' and 'Next' buttons both of which
        #have the class '_1LKTO3'
        driverFlipkart.find_elements_by_class_name('_1LKTO3')[1].click()
    except:
        #This exception block is for the first page only where there is only 'Next' button
        driverFlipkart.find_element_by_class_name('_1LKTO3').click()
        
    time.sleep(2)
        
    return brand, proDesc, price, discount

In [83]:
#Using the function above in a for loop to scrap the data from first 3 pages

time.sleep(2)
sunglassBrand = []
sunglassDesc = []
sunglassPrice = []
sunglassDiscount = []

for index in range(0,3):
    b, d, p, dis = sunglassScrapper()
    sunglassBrand.extend(b)
    sunglassDesc.extend(d)
    sunglassPrice.extend(p)
    sunglassDiscount.extend(dis)
    print(f'Product Page {index + 1} scrapped')

Product Page 1 scrapped
Product Page 2 scrapped
Product Page 3 scrapped


In [84]:
#making a dataframe and keeping the first 100 sunglass information

sunglassInfo = pd.DataFrame()

sunglassInfo['Brand'] = sunglassBrand[0:100]
sunglassInfo['Description'] = sunglassDesc[0:100]
sunglassInfo['Price'] = sunglassPrice[0:100]
sunglassInfo['Discount %'] = sunglassDiscount[0:100]

sunglassInfo

Unnamed: 0,Brand,Description,Price,Discount %
0,AISLIN,"UV Protection, Gradient Round, Shield Sunglass...",395,77
1,ROYAL SON,UV Protection Retro Square Sunglasses (49),664,66
2,Fastrack,UV Protection Wayfarer Sunglasses (Free Size),758,15
3,Fastrack,UV Protection Rectangular Sunglasses (Free Size),630,21
4,PHENOMENAL,UV Protection Retro Square Sunglasses (Free Size),399,80
...,...,...,...,...
95,AISLIN,"UV Protection, Gradient Cat-eye Sunglasses (58)",511,66
96,ROZZETTA CRAFT,"UV Protection, Gradient Rectangular Sunglasses...",449,77
97,Silver Kartz,UV Protection Aviator Sunglasses (62),269,77
98,ROZZETTA CRAFT,"UV Protection, Gradient Round Sunglasses (Free...",449,87


In [85]:
driverFlipkart.close()

###### 7. iPhone 11 Reviews <a name = "7"></a>

In [86]:
iPhoneDriver = webdriver.Chrome('./chromedriver.exe')
iPhoneDriver.get('https://www.flipkart.com/apple-iphone-11-black-64-gb-includes-earpods-power-adapter/p/itm0f37c2240b217?pid=MOBFKCTSVZAXUHGR&lid=LSTMOBFKCTSVZAXUHGREPBFGI&marketplace')

In [87]:
#Clicking the all reviews button to go to full review page
iPhoneDriver.find_element_by_xpath('//*[@id="container"]/div/div[3]/div[1]/div[2]/div[9]/div/div/div[5]/div/a/div').click()

In [88]:
#Checking the first review to study its structure
reviews = iPhoneDriver.find_elements_by_class_name('_2wzgFH')
a =reviews[0].text.split('\n')

In [89]:
a

['5',
 'Brilliant',
 'The Best Phone for the Money',
 '',
 'The iPhone 11 offers superb cameras, a more durable design and excellent battery life for an affordable price.',
 '',
 'Compelling ultra-wide camera',
 'New Night mode is excellent',
 'Long battery life',
 'Ankit',
 'Certified Buyer, Gurgaon',
 'Sep, 2019',
 '6362927']

In [90]:
' '.join(a[2:9:]).replace('  ', ' ')

'The Best Phone for the Money The iPhone 11 offers superb cameras, a more durable design and excellent battery life for an affordable price. Compelling ultra-wide camera New Night mode is excellent Long battery life'

In [91]:
len(a)-4

9

In [94]:
def iPhoneReviewScrapper():
    '''This function scrapes all the iPhone reviews from Flipkarts and then goes to the next page by clicking the Next button'''
    rating = []
    summary = []
    review = []
    
    reviewList = iPhoneDriver.find_elements_by_class_name('_2wzgFH')
    for index in range(len(reviewList)):
        reviewItems = reviewList[index].text.split('\n')
        rating.append(reviewItems[0])
        summary.append(reviewItems[1])
        review.append(' '.join(reviewItems[2:(len(reviewItems)-4)]).replace('  ', ' '))
    
    #Clicking the next button and waiting for 2 seconds
    try:
        #This block is for page 2 and subsiquent pages where there is 'Previous' and 'Next' buttons both of which
        #have the class '_1LKTO3'
        iPhoneDriver.find_elements_by_class_name('_1LKTO3')[1].click()
    except:
        #This exception block is for the first page only where there is only 'Next' button
        iPhoneDriver.find_element_by_class_name('_1LKTO3').click()
    
    time.sleep(2)
        
    return rating, summary, review

In [95]:
#Scrapping iPhone reviews for the first 10 pages

time.sleep(2)

iPhoneRating = []
iPhoneSummary = []
iPhoneReview = []

for index in range(0, 10):
    r, s, re = iPhoneReviewScrapper()
    iPhoneRating.extend(r)
    iPhoneSummary.extend(s)
    iPhoneReview.extend(re)
    print(f'Review Page {index+1} scrapped')

Review Page 1 scrapped
Review Page 2 scrapped
Review Page 3 scrapped
Review Page 4 scrapped
Review Page 5 scrapped
Review Page 6 scrapped
Review Page 7 scrapped
Review Page 8 scrapped
Review Page 9 scrapped
Review Page 10 scrapped


In [96]:
#Making a dataframe of all reviews

iPhoneReviewsDf = pd.DataFrame()

iPhoneReviewsDf['Review Title'] = iPhoneSummary
iPhoneReviewsDf['Rating'] = iPhoneRating
iPhoneReviewsDf['Full Review'] = iPhoneReview

iPhoneReviewsDf

Unnamed: 0,Review Title,Rating,Full Review
0,Brilliant,5,The Best Phone for the Money The iPhone 11 off...
1,Perfect product!,5,Amazing phone with great cameras and better ba...
2,Great product,5,Amazing Powerful and Durable Gadget. I’m am ve...
3,Worth every penny,5,Previously I was using one plus 3t it was a gr...
4,Good choice,4,So far it’s been an AMAZING experience coming ...
...,...,...,...
95,Perfect product!,5,Best and amazing product.....phone looks so pr...
96,Must buy!,5,I rate this product 5* as it has got amazing u...
97,Fabulous!,5,I purchased the iPhone 11 a month back. I must...
98,Fabulous!,5,Product is nice at the deviled time the delive...


In [97]:
iPhoneDriver.close()

###### 8. Flipkart Sneakers Data <a name = "8"></a>

In [98]:
sneakerDriver = webdriver.Chrome('./chromedriver.exe')
sneakerDriver.get('https://www.flipkart.com')

In [99]:
#Filling the search bar and clicking search

searchTerm = 'snearkers'

sneakerDriver.find_element_by_xpath('/html/body/div[2]/div/div/button').click() #dismissing flipkart offer/login pop-up
sneakerDriver.find_element_by_xpath('//*[@id="container"]/div/div[1]/div[1]/div[2]/div[2]/form/div/div/input').send_keys(searchTerm)
sneakerDriver.find_element_by_xpath('//*[@id="container"]/div/div[1]/div[1]/div[2]/div[2]/form/div/button').click()

In [100]:
#Taking the first element and inspecting the structure to see what combination of commands produces the desired output
#Then we will use these combination 
time.sleep(2)
listings = sneakerDriver.find_elements_by_class_name('_2B099V')

In [101]:
a = listings[0].text.split('\n')
a

['Robbie jones',
 'Casual Sneakers Shoes For Men Sneakers For Men',
 '₹399₹99960% off',
 'Or Pay ₹379 + ',
 ' 20']

In [102]:
a[0]

'Robbie jones'

In [103]:
a[1]

'Casual Sneakers Shoes For Men Sneakers For Men'

In [104]:
a[2].split('₹')[1]

'399'

In [105]:
a[2].split('₹')[2].split()[-2][-3:-1]

'60'

In [106]:
def sneakerScrapper():
    '''This function scrapes all the listings in Flipkarts page and then goes to the next page by clicking the Next button'''
    brand = []
    proDesc = []
    price = []
    discount = []
    time.sleep(2)
    sneakerListing = sneakerDriver.find_elements_by_class_name('_2B099V')
    for index in range(len(sneakerListing)):
        try:
            sneakerInfo = sneakerListing[index].text.split('\n')
            brand.append(sneakerInfo[0])
            proDesc.append(sneakerInfo[1])
            price.append(sneakerInfo[2].split('₹')[1])
            discount.append(sneakerInfo[2].split('₹')[2].split()[-2][-3:-1])
        except IndexError:
            continue
    
    #Clicking the next button and waiting for 2 seconds
    try:
        #This block is for page 2 and subsiquent pages where there is 'Previous' and 'Next' buttons both of which
        #have the class '_1LKTO3'
        sneakerDriver.find_elements_by_class_name('_1LKTO3')[1].click()
    except:
        #This exception block is for the first page only where there is only 'Next' button
        sneakerDriver.find_element_by_class_name('_1LKTO3').click()
        
    time.sleep(2)
        
    return brand, proDesc, price, discount

In [107]:
#Scrapping sneakers data from the first 3 pages
time.sleep(2)

sneakerBrand = []
sneakerDesc = []
sneakerPrice = []
sneakerDiscount = []

for index in range(0,3):
    b, d, p, dis = sneakerScrapper()
    sneakerBrand.extend(b)
    sneakerDesc.extend(d)
    sneakerPrice.extend(p)
    sneakerDiscount.extend(dis)
    print(f'Product Page {index + 1} scrapped')

Product Page 1 scrapped
Product Page 2 scrapped
Product Page 3 scrapped


In [108]:
#Making a dataframe with sneaker data and keeping the first 100 sneaker info

sneakerInfo = pd.DataFrame()

sneakerInfo['Brand'] = sneakerBrand[0:100]
sneakerInfo['Description'] = sneakerDesc[0:100]
sneakerInfo['Price'] = sneakerPrice[0:100]
sneakerInfo['Discount %'] = sneakerDiscount[0:100]

sneakerInfo

Unnamed: 0,Brand,Description,Price,Discount %
0,Robbie jones,Casual Sneakers Shoes For Men Sneakers For Men,399,60
1,aadi,Sneakers For Men,298,70
2,HOTSTYLE,Sneakers Sneakers For Men,283,43
3,World Wear Footwear,New Latest Affordable Range of Combo Pack of 2...,498,75
4,Numenzo,Sneakers For Men,398,60
...,...,...,...,...
95,Camfoot,Combo Pack of 2 Latest Collection Stylish Casu...,328,52
96,Adies,Sneakers For Men,375,50
97,Robbie jones,Sneakers For Men,474,69
98,Longwalk,Men Boxer Sneakers For Men,249,76


In [109]:
sneakerDriver.close()

###### 9. Myntra Black Shoes <a name = "9"></a>

In [127]:
myntraDriver = webdriver.Chrome('./chromedriver.exe')
myntraDriver.get('https://www.myntra.com/shoes')

In [128]:
#applying the colours filter
myntraDriver.find_element_by_xpath('//*[@id="mountRoot"]/div/div[1]/main/div[3]/div[1]/section/div/div[6]/ul/li[1]').click()

time.sleep(2)

In [129]:
#applying the price filter
myntraDriver.find_element_by_xpath('//*[@id="mountRoot"]/div/div[1]/main/div[3]/div[1]/section/div/div[5]/ul/li[2]/label').click()

In [137]:
#Getting page 1 shoe informations
time.sleep(2)
page1shoes = myntraDriver.find_elements_by_class_name('product-productMetaInfo')
len(page1shoes)

50

In [138]:
a = page1shoes[0].text.split('\n')
a

['Skechers', 'Men VIPER COMPETITOR Training', 'Rs. 6999', 'Only Few Left!']

In [139]:
a[2].split('Rs.')[1].replace(' ', '')

'6999'

In [140]:
#making three empty list and adding the brand, description and price information there

brand = []
shortDesc = []
price = []

for index in range(len(page1shoes)):
    print(f'Getting Product {index+1}')
    shoeInfo = page1shoes[index].text.split('\n')
    brand.append(shoeInfo[0])
    shortDesc.append(shoeInfo[1])
    price.append(shoeInfo[2].split('Rs.')[1].replace(' ', ''))
    print(f'Done for Product {index+1}')

Getting Product 1
Done for Product 1
Getting Product 2
Done for Product 2
Getting Product 3
Done for Product 3
Getting Product 4
Done for Product 4
Getting Product 5
Done for Product 5
Getting Product 6
Done for Product 6
Getting Product 7
Done for Product 7
Getting Product 8
Done for Product 8
Getting Product 9
Done for Product 9
Getting Product 10
Done for Product 10
Getting Product 11
Done for Product 11
Getting Product 12
Done for Product 12
Getting Product 13
Done for Product 13
Getting Product 14
Done for Product 14
Getting Product 15
Done for Product 15
Getting Product 16
Done for Product 16
Getting Product 17
Done for Product 17
Getting Product 18
Done for Product 18
Getting Product 19
Done for Product 19
Getting Product 20
Done for Product 20
Getting Product 21
Done for Product 21
Getting Product 22
Done for Product 22
Getting Product 23
Done for Product 23
Getting Product 24
Done for Product 24
Getting Product 25
Done for Product 25
Getting Product 26
Done for Product 26
Gett

In [141]:
page1shoes[0].text

'Skechers\nMen VIPER COMPETITOR Training\nRs. 6999\nOnly Few Left!'

In [142]:
myntraDriver.find_element_by_xpath('//*[@id="desktopSearchResults"]/div[2]/section/div[2]/ul/li[12]/a').click()

In [143]:
#Getting page 2 shoe informations and adding the brand, description and price information to ther existing list of brand, descriptipn  and price
page2shoes = myntraDriver.find_elements_by_class_name('product-productMetaInfo')

time.sleep(2)

for index in range(len(page2shoes)):
    shoeInfo = page2shoes[index].text.split('\n')
    brand.append(shoeInfo[0])
    shortDesc.append(shoeInfo[1])
    price.append(shoeInfo[2].split('Rs.')[1].replace(' ', ''))

In [144]:
myntraDriver.find_element_by_xpath('//*[@id="desktopSearchResults"]/div[2]/section/div[2]/ul/li[13]/a').click()

In [145]:
#Getting page 3 shoe informations and adding the brand, description and price information to ther existing list of brand, descriptipn  and price

page3shoes = myntraDriver.find_element_by_class_name('product-productMetaInfo').text.split('\n')

brand.append(page3shoes[0])
shortDesc.append(page3shoes[1])
price.append(page3shoes[2].split('Rs.')[1].replace(' ', ''))

In [146]:
#making a dataframe with all shoes

myntraSneakersDf = pd.DataFrame()

myntraSneakersDf['Brand'] = brand
myntraSneakersDf['Price'] = price
myntraSneakersDf['Description'] = shortDesc

myntraSneakersDf

Unnamed: 0,Brand,Price,Description
0,Skechers,6999,Men VIPER COMPETITOR Training
1,Nike,12995,Men KD13 EP Basketball Shoes
2,PUMA Motorsport,7999,Unisex Mercedes Running Shoes
3,Nike,10846,Men React Infinity Running
4,Nike,7721,Women AIR ZOOM Running Shoes
...,...,...,...
96,Kenneth Cole,9513,Women Solid Flat Boots
97,ADIDAS,12749,Men Ultraboost DNA CC_1 Shoes
98,UNDER ARMOUR,7999,Men Charged Impulse Running
99,Ruosh,8990,Men Textured Derbys


In [147]:
myntraDriver.close()

###### 10. Amazon Laptops <a name = "10"></a>

Note: Amazon has a very dynamic website and it changes with each session. Sometimes all the products dont have the same information, sometimes they do not follow the usual patter which causes the code to crash. In such a case, please re-run the code after some time. Also, sometimes the i9 filter is not shown. In this case, close the session and re-run the code until the filter shows up.

In [2]:
driverAmazon = webdriver.Chrome('./chromedriver.exe')
driverAmazon.get('https://www.amazon.in')

In [3]:
#filling the search bar
driverAmazon.find_element_by_xpath('//*[@id="twotabsearchtextbox"]').send_keys('Laptop')

In [4]:
#clicking the search button
driverAmazon.find_element_by_xpath('//*[@id="nav-search-submit-button"]').click()

In [5]:
#applying i7 filter
time.sleep(3)
driverAmazon.find_element_by_xpath('//*[@id="p_n_feature_thirteen_browse-bin/12598163031"]/span').click()

In [19]:
#checking the first listing to study the structure
i7 = driverAmazon.find_element_by_xpath('//*[@id="search"]/div[1]/div/div[1]/div/span[3]/div[2]/div[4]/div/span/div/div/div[2]/div[2]/div').text.split('\n')
i7

['HP 14 Thin & Light 14" (35.56cms) FHD Laptop (11th Gen Intel i7-1165G7/8GB/512GB SSD/Windows 10/MS Office 2019/Alexa Built-in/Pale Gold/1.47 kg), 14s-dr2007TU',
 '4']

In [15]:
i7[0]

'Lenovo IdeaPad 3 Laptop, 15.6" HD Touch Screen, Intel Core i7-1065G7, 4GB Soldered DDR4 + 4GB SO-DIMM DDR4, 256GB SSD M.2 2242 PCIe 3.0x2, Win 10 - Platinum Grey'

In [16]:
i7[2]

'₹99,138'

In [17]:
rating = driverAmazon.find_element_by_xpath('//*[@id="search"]/div[1]/div/div[1]/div/span[3]/div[2]/div[2]/div/span/div/div/div[2]/div[2]/div/div[2]/div/span[1]/span/a/i[1]/span').get_attribute('innerHTML')
rating

'5.0 out of 5 stars'

In [34]:
#Getting all the laptop listing
#This code ignores listings which do not have all the necessary information

time.sleep(3)

title = []
price = []
rating = []

for index in range(2,30):
    try:
        print(f'Getting Product {index-1}')
        i7 = driverAmazon.find_element_by_xpath(f'//*[@id="search"]/div[1]/div/div[1]/div/span[3]/div[2]/div[{index}]/div/span/div').text.split('\n')
        i7Rating = driverAmazon.find_element_by_xpath(f'//*[@id="search"]/div[1]/div/div[1]/div/span[3]/div[2]/div[{index}]/div/span/div/div/div[2]/div[2]/div/div[2]/div/span[1]/span/a/i[1]/span').get_attribute('innerHTML')
        title.append(i7[0])
        price.append(i7[2])
        rating.append(i7Rating[0:3])
    except IndexError:
        title.pop()
        continue
    
    except NoSuchElementException:
        continue
    

Getting Product 1
Getting Product 2
Getting Product 3
Getting Product 4
Getting Product 5
Getting Product 6
Getting Product 7
Getting Product 8
Getting Product 9
Getting Product 10
Getting Product 11
Getting Product 12
Getting Product 13
Getting Product 14
Getting Product 15
Getting Product 16
Getting Product 17
Getting Product 18
Getting Product 19
Getting Product 20
Getting Product 21
Getting Product 22
Getting Product 23
Getting Product 24
Getting Product 25
Getting Product 26
Getting Product 27
Getting Product 28


In [35]:
len(title)

9

In [36]:
len(rating)

9

In [37]:
len(price)

9

In [38]:
#making a dataframe with i7 laptop information

i7laptopDf = pd.DataFrame()

i7laptopDf['Name'] = title
i7laptopDf['Price'] = price
i7laptopDf['Rating'] = rating

i7laptopDf

Unnamed: 0,Name,Price,Rating
0,"Asus ROG Zephyrus S Ultra Slim Gaming Laptop, ...","₹3,43,099",5.0
1,"Lenovo IdeaPad 3 Laptop, 15.6"" HD Touch Screen...","₹99,138",5.0
2,(Renewed) Dell Latitude E7240 12.5-inch Laptop...,"₹39,799",3.9
3,"2019 HP 17.3"" HD+ Touchscreen Laptop Computer","₹2,01,524",4.5
4,(Renewed) Lenovo Intel 4th Gen Core i7-4980HQ ...,"₹46,290",3.0
5,Lenovo Ideapad 720s Intel Core i7 8th Gen 13.3...,"₹89,990",3.1
6,CUK VivoBook K571 by ASUS 15 Inch Gaming Lapto...,"₹1,44,643",5.0
7,"Dell G3 3500 Gaming15.6"" (39.62cms) FHD Laptop...","₹82,490",4.1
8,Lenovo IdeaPad Gaming 3 10th Gen Intel Core i7...,"₹84,200",4.3


In [42]:
#closing the browers
driverAmazon.close()

In [49]:
#Starting a new session and seraching for laptop
time.sleep(2)
driverAmazon = webdriver.Chrome('./chromedriver.exe')
driverAmazon.get('https://www.amazon.in')

In [50]:
driverAmazon.find_element_by_xpath('//*[@id="twotabsearchtextbox"]').send_keys('Laptop')

In [51]:
driverAmazon.find_element_by_xpath('//*[@id="nav-search-submit-button"]').click()

In [53]:
#Applying the i9 filter
time.sleep(3)
driverAmazon.find_element_by_xpath('//*[@id="p_n_feature_thirteen_browse-bin/16757432031"]/span/a/span').click()

In [54]:
#Getting all i9 laptop information
time.sleep(3)

i9title = []
i9price = []
i9rating = []

for index in range(2,30):
    try:
        print(f'Getting Product {index-1}')
        i9 = driverAmazon.find_element_by_xpath(f'//*[@id="search"]/div[1]/div/div[1]/div/span[3]/div[2]/div[{index}]/div/span/div').text.split('\n')
        i9Rating = driverAmazon.find_element_by_xpath(f'//*[@id="search"]/div[1]/div/div[1]/div/span[3]/div[2]/div[{index}]/div/span/div/div/div[2]/div[2]/div/div[2]/div/span[1]/span/a/i[1]/span').get_attribute('innerHTML')
        i9title.append(i9[0])
        i9price.append(i9[2])
        i9rating.append(i9Rating[0:3])
    except IndexError:
        i9title.pop()
        continue
    
    except NoSuchElementException:
        continue
    

Getting Product 1
Getting Product 2
Getting Product 3
Getting Product 4
Getting Product 5
Getting Product 6
Getting Product 7
Getting Product 8
Getting Product 9
Getting Product 10
Getting Product 11
Getting Product 12
Getting Product 13
Getting Product 14
Getting Product 15
Getting Product 16
Getting Product 17
Getting Product 18
Getting Product 19
Getting Product 20
Getting Product 21
Getting Product 22
Getting Product 23
Getting Product 24
Getting Product 25
Getting Product 26
Getting Product 27
Getting Product 28


In [57]:
i9price

['₹2,15,990', '₹2,14,990', '₹2,65,788', '₹2,69,900']

In [58]:
i9title

['Apple MacBook Pro (16-inch, 16GB RAM, 1TB Storage, 2.3GHz 9th Gen Intel Core i9) - Space Grey',
 'ASUS ROG Strix Scar 15 (2020), 15.6" FHD 300Hz/3ms, Intel Core i9-10980HK 10th Gen, RTX 2070 Super GDDR6 8GB Graphics, Gaming Laptop (32GB/2TB RAID 0 SSD/Windows 10/Black/2.35 Kg), G532LWS-HF091T',
 'Lenovo Legion 7i 10thGen Intel i9 15.6" FHD Gaming Laptop(16GB/1TB SSD/Win10/MSO2019/144 Hz/RTX 2080 8GB GDDR6)81YU006HIN+Legion H300 Stereo Gaming Headset+Lenovo Ideapad M100 RGB Gaming Mouse 3200DPI',
 'HP Z4 Workstation, Intel Core i9-7900X, RAM 32GB (4x8GB), 2TB Hard Disk, GFX NVIDIA Quadro P2000 5GB (4) DP, Windows 10 Pro, DVDRW, 3 Yrs Onsite Warranty by HP']

In [59]:
i9rating

['4.2', '1.0', '3.7', '5.0']

In [61]:
#making a dataframe with i9 laptop information

i9laptopDf = pd.DataFrame()

i9laptopDf['Name'] = i9title
i9laptopDf['Price'] = i9price
i9laptopDf['Rating'] = i9rating

i9laptopDf

Unnamed: 0,Name,Price,Rating
0,"Apple MacBook Pro (16-inch, 16GB RAM, 1TB Stor...","₹2,15,990",4.2
1,"ASUS ROG Strix Scar 15 (2020), 15.6"" FHD 300Hz...","₹2,14,990",1.0
2,"Lenovo Legion 7i 10thGen Intel i9 15.6"" FHD Ga...","₹2,65,788",3.7
3,"HP Z4 Workstation, Intel Core i9-7900X, RAM 32...","₹2,69,900",5.0


In [62]:
#Merging the i7 and i9 information in to one dataframe

laptopDf = i7laptopDf.append(i9laptopDf, ignore_index=True)
laptopDf

Unnamed: 0,Name,Price,Rating
0,"Asus ROG Zephyrus S Ultra Slim Gaming Laptop, ...","₹3,43,099",5.0
1,"Lenovo IdeaPad 3 Laptop, 15.6"" HD Touch Screen...","₹99,138",5.0
2,(Renewed) Dell Latitude E7240 12.5-inch Laptop...,"₹39,799",3.9
3,"2019 HP 17.3"" HD+ Touchscreen Laptop Computer","₹2,01,524",4.5
4,(Renewed) Lenovo Intel 4th Gen Core i7-4980HQ ...,"₹46,290",3.0
5,Lenovo Ideapad 720s Intel Core i7 8th Gen 13.3...,"₹89,990",3.1
6,CUK VivoBook K571 by ASUS 15 Inch Gaming Lapto...,"₹1,44,643",5.0
7,"Dell G3 3500 Gaming15.6"" (39.62cms) FHD Laptop...","₹82,490",4.1
8,Lenovo IdeaPad Gaming 3 10th Gen Intel Core i7...,"₹84,200",4.3
9,"Apple MacBook Pro (16-inch, 16GB RAM, 1TB Stor...","₹2,15,990",4.2


In [63]:
driverAmazon.close()