### Importing Packages

In [51]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import time

### Defining Functions

In [52]:
def open_in_new_tab(driver , element):
    ActionChains(driver).scroll_to_element(element)
    ActionChains(driver).key_down(Keys.CONTROL).click(element).key_up(Keys.CONTROL).perform()
    driver.switch_to.window(driver.window_handles[-1])

def get_page_source(driver , element):
    open_in_new_tab(driver , element)
    time.sleep(2)
    try:
        driver.find_element(By.XPATH , '//div[@class = "top-card-layout__entity-info flex-grow flex-shrink-0 basis-0 babybear:flex-none babybear:w-full babybear:flex-none babybear:w-full"]')
        page = driver.page_source
        driver.close()
        driver.switch_to.window(driver.window_handles[0])
        return page
    except:
        driver.close()
        driver.switch_to.window(driver.window_handles[0])
        return get_page_source(driver , element)

def get_pages(name , number):

    chrome_options = Options()
    chrome_options.add_argument("--incognito")    # Open in incognito mode

    service = Service(r'C:\Program Files (x86)\chromedriver.exe')
    driver = webdriver.Chrome(service=service, options=chrome_options)

    driver.get('https://www.linkedin.com/jobs/search?trk=guest_homepage-basic_guest_nav_menu_jobs&position=1&pageNum=0')

    time.sleep(2)

    search = driver.find_element(By.XPATH , '//input[@id = "job-search-bar-location"]')
    search.clear()
    search.send_keys(name)
    search.send_keys(Keys.RETURN)

    time.sleep(2)

    jobs = driver.find_elements(By.XPATH , './/section[@class = "two-pane-serp-page__results-list"]//ul[@class = "jobs-search__results-list"]//li')

    while not (len(jobs) >= number):
        driver.find_element(By.TAG_NAME , 'body').send_keys(Keys.END)
        time.sleep(3)
        jobs = driver.find_elements(By.XPATH , './/section[@class = "two-pane-serp-page__results-list"]//ul[@class = "jobs-search__results-list"]//li')

    pages = []

    for job in jobs[:number]:
        page = get_page_source(driver , job)
        pages.append(page)

    print(len(pages))
    driver.quit()

    return pages

### Getting Page Sources

In [53]:
country_name = input('Enter Country Name : ')
number = int(input('How many jobs you want to get extracted : '))
print(f'Searching for first {number} jobs in {country_name}......')

Searching for first 5 jobs in India......


In [54]:
pages = get_pages(country_name , number)

5


In [55]:
job_names = []
locations = []
seniority_levels = []
employment_types = []
job_functions = []
Industries = []

### Scraping Data

In [56]:
for page in pages:
    
    soup = BeautifulSoup(page ,'html.parser')
    try:
        intro = soup.find('div' , class_ = 'top-card-layout__entity-info flex-grow flex-shrink-0 basis-0 babybear:flex-none babybear:w-full babybear:flex-none babybear:w-full')
    except:
        print(f"Can't find intro...")
        
    try:
        job_name = intro.find('h1' , class_ = 'top-card-layout__title font-sans text-lg papabear:text-xl font-bold leading-open text-color-text mb-0 topcard__title').text
        job_names.append(job_name)
    except:
        print(f"Can't find job_name...")
        job_names.append(np.nan)

    try:
        location = intro.find('span' , class_ = 'topcard__flavor topcard__flavor--bullet')
        locations.append(location.text.strip())
    except:
        print(f"Can't find location...")
        locations.append(np.nan)

    try:
        details = soup.find('ul' , class_ = 'description__job-criteria-list').find_all('span' , class_ = 'description__job-criteria-text description__job-criteria-text--criteria')
        seniority_levels.append(details[0].text.strip())
        employment_types.append(details[1].text.strip())
        job_functions.append(details[2].text.strip())
        Industries.append(details[3].text.strip())    
    except:
        print(f"Can't find details...")
        seniority_levels.append(np.nan)
        employment_types.append(np.nan)
        job_functions.append(np.nan)
        Industries.append(np.nan)


### Creating DataFrame and Saving into File

In [57]:
data = {
    "Job_Name" : job_names,
    "Location" : locations,
    "Seniority_Level" : seniority_levels,
    "Employment_Type" : employment_types,
    "Job_Function" : job_functions,
    "Industry" : Industries
}

df = pd.DataFrame(data)
df.index = pd.RangeIndex(start=1, stop=len(df)+1, step=1)

In [59]:
df.to_csv(f'first_{number}_Jobs_in_{country_name}.csv')