## Scrapping job details from the LinkedIn job portal

In [1]:
# importing modules
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

In [2]:
# Set up the web driver (provide the path to your Firefox web driver)
chrome_options = webdriver.FirefoxOptions()

chrome_options.add_argument("--window-size=1920x1080")
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--verbose")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--disable-software-rasterizer")

driver = webdriver.Firefox(options=chrome_options)

driver.get("https://www.linkedin.com/")

In [3]:
# Navigation the Job post page by click job_icon link in the home page
job_icon = driver.find_element(By.XPATH, "/html/body/nav/ul/li[4]/a")
job_icon.click()

In [4]:
# Searching jobs by it's title and perticular location
job_title = driver.find_element(By.XPATH, "//input[@id='job-search-bar-keywords']")
job_location = driver.find_element(By.XPATH, "//input[@id='job-search-bar-location']")
search_button = driver.find_element(By.XPATH, "/html/body/div[1]/header/nav/section/section[2]/form/button")

In [5]:
# sending keys to website
job_title.send_keys('Data Analyst')

clear_existing_job_title = driver.find_element(By.XPATH, "/html/body/div[1]/header/nav/section/section[2]/form/section[2]/button")
clear_existing_job_title.click()

time.sleep(1)
job_location.send_keys('India')

search_button.click()

In [16]:
# Set pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

# Initialize an empty list to store dictionaries
data_list = []

# collecting all job post from the current page
job_post = driver.find_elements(By.XPATH, "//ul[@class='jobs-search__results-list']//li")

for single_post in job_post:
    j_title = single_post.find_element(By.CLASS_NAME, "base-search-card__title").text
    j_company = single_post.find_element(By.CLASS_NAME, "base-search-card__subtitle").text
    j_location = single_post.find_element(By.CLASS_NAME, "job-search-card__location").text
    j_url = single_post.find_element(By.TAG_NAME, 'a').get_attribute('href')
    
    data_list.append({'Title':j_title, 'Company':j_company, 'Location':j_location, 'URL':j_url})
    

# Create the DataFrame by concatenating the list of dictionaries
df = pd.DataFrame(data_list)

In [17]:
len(df)

25

In [18]:
df.columns

Index(['Title', 'Company', 'Location', 'URL'], dtype='object')

In [20]:
# checking duplicate data
df[df.duplicated()]

Unnamed: 0,Title,Company,Location,URL


In [21]:
df.sample(5)

Unnamed: 0,Title,Company,Location,URL
21,Data Analyst,Acme Services,"Mumbai, Maharashtra, India",https://in.linkedin.com/jobs/view/data-analyst-at-acme-services-3724222511?refId=QF1dT2XBzw%2BEWz18Bf1cRg%3D%3D&trackingId=iDMKoKM7qY8%2FBnHY6RANiQ%3D%3D&position=22&pageNum=0&trk=public_jobs_jserp-result_search-card
19,"BI and Data Analyst (Tableau, SQL)",Workday,"Pune, Maharashtra, India",https://in.linkedin.com/jobs/view/bi-and-data-analyst-tableau-sql-at-workday-3744707740?refId=QF1dT2XBzw%2BEWz18Bf1cRg%3D%3D&trackingId=XCCozveHPdza1Ywl3nVMVw%3D%3D&position=20&pageNum=0&trk=public_jobs_jserp-result_search-card
18,Data Analyst - Pune,ConveGenius,"Pune, Maharashtra, India",https://in.linkedin.com/jobs/view/data-analyst-pune-at-convegenius-3741759955?refId=QF1dT2XBzw%2BEWz18Bf1cRg%3D%3D&trackingId=325WyNv4SeuMy3sqyQ%2FN0Q%3D%3D&position=19&pageNum=0&trk=public_jobs_jserp-result_search-card
15,Data Analyst – Service Now,Generis Tek Inc,"Hyderabad, Telangana, India",https://in.linkedin.com/jobs/view/data-analyst-%E2%80%93-service-now-at-generis-tek-inc-3720204536?refId=QF1dT2XBzw%2BEWz18Bf1cRg%3D%3D&trackingId=QnFRmuHvZ%2BqTE6l16at7GA%3D%3D&position=16&pageNum=0&trk=public_jobs_jserp-result_search-card
17,Data Analyst – Supply Chain- Power BI,"Dubai Jobs, Gulf Jobs, Jobs in Dubai, Qatar, Kuwait - Boyen Haddin & The Giant HR Consultant",Pune/Pimpri-Chinchwad Area,https://in.linkedin.com/jobs/view/data-analyst-%E2%80%93-supply-chain-power-bi-at-dubai-jobs-gulf-jobs-jobs-in-dubai-qatar-kuwait-boyen-haddin-the-giant-hr-consultant-3734512087?refId=QF1dT2XBzw%2BEWz18Bf1cRg%3D%3D&trackingId=WlqnbyT2Ws4zZLTwku%2FKOQ%3D%3D&position=18&pageNum=0&trk=public_jobs_jserp-result_search-card


In [None]:
# storing data in to excel file
df.to_excel('LinkedIn_Job_Post.xlsx', index=False)

In [22]:
# Clossing the web driver
driver.quit()

### Rough work

In [None]:
ul_li = "//ul[@class='jobs-search__results-list']//li"

In [6]:
ar = driver.find_elements(By.XPATH, "//ul[@class='jobs-search__results-list']//li")

In [7]:
len(ar)

25

In [9]:
s_ar = ar[0]

In [10]:
s_ar.find_element(By.TAG_NAME, "h3").text

'DATA ANALYST (SQL,Python, Aws)'

In [11]:
s_ar.find_element(By.CLASS_NAME, "base-search-card__title").text

'DATA ANALYST (SQL,Python, Aws)'

In [12]:
s_ar.find_element(By.CLASS_NAME, "base-search-card__subtitle").text

'Coforge'

In [14]:
s_ar.find_element(By.CLASS_NAME, "job-search-card__location").text

'Bengaluru, Karnataka, India'

In [15]:
s_ar.find_element(By.TAG_NAME, 'a').get_attribute('href')

'https://in.linkedin.com/jobs/view/data-analyst-sql-python-aws-at-coforge-3743390893?refId=QF1dT2XBzw%2BEWz18Bf1cRg%3D%3D&trackingId=NGk1K1dGUqruTLxWW%2By%2F1g%3D%3D&position=1&pageNum=0&trk=public_jobs_jserp-result_search-card'