In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from parsel import Selector
from openpyxl.worksheet.table import Table
from selenium.common.exceptions import NoSuchElementException
import random 
import pandas as pd
import time
import openpyxl
import os

In [5]:
class LinkedInJob(webdriver.Chrome):
    def __init__(self, driver_path = r"C:\Users\Mohsin\Documents\Selenium"):
        self.driver_path = driver_path
        os.environ['PATH'] += self.driver_path
        super(LinkedInJob, self).__init__()

    def home_page(self):
        self.get('https://www.linkedin.com/feed/')
        
    def sign_in_page(self):
        sign_in_element = self.find_element(By.CLASS_NAME, "main__sign-in-link")
        sign_in_element.click()
        
    def sign_in(self, username, password):
        username_element = self.find_element(By.ID, "username")
        username_element.send_keys(username)
        password_element = self.find_element(By.ID, "password")
        password_element.send_keys(password)
        submit_button = self.find_element(By.CLASS_NAME, "btn__primary--large")
        submit_button.click()
        
    def minimize_box(self):
        wait = WebDriverWait(self, 20)
        minimize_box_element = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '[type="chevron-down"]')))
        minimize_box_element.click()
    
    def job_bar(self):
        job_bar_element = self.find_element(By.CSS_SELECTOR, '[type="job"]')
        job_bar_element.click()
        
    def show_all_jobs(self):
        wait = WebDriverWait(self, 10)
        show_all_jobs_element = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'a[href="https://www.linkedin.com/jobs/collections/recommended"]')))
        show_all_jobs_element.click()
        
    def extract_job_details(self):
        job_data = []
        for page in range(3):
            time.sleep(random.uniform(8.5,10.9))
            start = page * 24
            self.get(f'https://www.linkedin.com/jobs/collections/recommended/?currentJobId=&start={start}')
            
            
            jobs = self.find_elements(By.XPATH, '//a[@class = "disabled ember-view job-card-container__link job-card-list__title"]')
            
            print(f"Number of jobs found on page {page + 1}: {len(jobs)}")
            
            for job in jobs:
                job.location_once_scrolled_into_view
                job.click()
                time.sleep(random.uniform(4.6,6.9))
                
                try:
                    job_title = job.find_element(By.XPATH, '//h2[@class="t-24 t-bold jobs-unified-top-card__job-title"]').text
                    title = job_title.split('\n')
                except NoSuchElementException:
                    title = ['']

                try:
                    job_location = job.find_element(By.XPATH, '//span[@class="jobs-unified-top-card__bullet"]').text
                    location = job_location.split('\n')
                except NoSuchElementException:
                    location = ['']

                try:
                    job_type = job.find_element(By.XPATH, '//li[@class="jobs-unified-top-card__job-insight"]').text
                    typee = job_type.split('\n')
                except NoSuchElementException:
                    typee = ['']

                try:
                    job_worktype = job.find_element(By.XPATH, '//span[@class="jobs-unified-top-card__workplace-type"]').text
                    worktype = job_worktype.split('\n')
                except NoSuchElementException:
                    worktype = ['']
                    
                    
                data = {'Job Title' : title[0], 'Location' : location[0], 'Type' : typee[0], 'WorkPlace Type' : worktype[0]}
                job_data.append(data)
                print('[*] Saving')
                
            print(f"Total number of jobs extracted: {len(job_data)}")
            df = pd.DataFrame(job_data)
            df.to_excel('LinkedIn Job.xlsx', index = False)
            
            workbook = openpyxl.load_workbook('LinkedIn Job.xlsx')
            worksheet = workbook.active
            worksheet.title = "Job Data"
            
            for column in worksheet.columns:
                max_length = 0
                column = [cell for cell in column]
                for cell in column:
                    try:
                        if len(str(cell.value)) > max_length:
                            max_length = len(cell.value)
                    except TypeError:
                        pass
                adjusted_width = (max_length + 2) * 1.2
                worksheet.column_dimensions[column[0].column_letter].width = adjusted_width
            
            table = Table(displayName = "JobTable", ref=f"A1:D{len(job_data) + 1}")
            worksheet.add_table(table)
            
            workbook.save('LinkedIn Job.xlsx')
            workbook.close()


In [6]:
inst = LinkedInJob()
inst.home_page()
inst.sign_in_page()
inst.sign_in(username = "your_username", password = "your_password")
inst.minimize_box()
inst.job_bar()
inst.show_all_jobs()
inst.extract_job_details()

Number of jobs found on page 1: 7
[*] Saving
[*] Saving
[*] Saving
[*] Saving
[*] Saving
[*] Saving
[*] Saving
Total number of jobs extracted: 7
Number of jobs found on page 2: 7
[*] Saving
[*] Saving
[*] Saving
[*] Saving
[*] Saving
[*] Saving
[*] Saving
Total number of jobs extracted: 14
Number of jobs found on page 3: 7
[*] Saving
[*] Saving
[*] Saving
[*] Saving
[*] Saving
[*] Saving
[*] Saving
Total number of jobs extracted: 21
