In [8]:
# Basic Selenium imports
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys

# For explicit waits
from selenium.webdriver.support import expected_conditions as EC

# Other useful libraries
import time  # For sleep/delays
import random  # For random delays if needed
import pandas as pd  # If you need to save data to CSV
from dotenv import load_dotenv
import os

In [10]:
def get_job_details(job_id):
    
    # Set up Chrome options
    
    load_dotenv()

    # Set up Chrome options for headless mode
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run Chrome in headless mode
    chrome_options.add_argument("--disable-gpu")  # Disable GPU for compatibility
    chrome_options.add_argument("--window-size=1080,5000")  # Set a default window size
    chrome_options.add_argument("--no-sandbox")  # Recommended for Linux environments
    chrome_options.add_argument("--disable-dev-shm-usage")  # Overcome resource limitations on some systems

    # Initialize the Chrome WebDriver with headless mode
    driver = webdriver.Chrome(options=chrome_options)

    driver.get("https://www.linkedin.com/login")

    # Enter credentials
    username = driver.find_element(By.ID, "username")
    password = driver.find_element(By.ID, "password")

    username.send_keys(os.getenv("LINKEDIN_USERNAME"))
    password.send_keys(os.getenv("LINKEDIN_PASSWORD"))

    # Submit login form
    password.send_keys(Keys.RETURN)
    time.sleep(5)
    
    try:
        # Construct and visit URL
        url = f"https://www.linkedin.com/jobs-guest/jobs/api/jobPosting/{job_id}"
        driver.get(url)
       
        time.sleep(5)  # Wait for page to load
        print(url)

        # Initialize variables
        job_details = []
        title = "Not specified"
        company = "Not specified"
        location = "Not specified"
        post_time = "Not specified"
        applicants_number = "Not specified"
        seniority_level = "Not specified"
        employment_type = "Not specified"
        job_function = "Not specified"
        industries = "Not specified"
        job_description = "Not specified"

        # Check page state (uninitialized, loading, loaded, interactive, complete)

        page_state = driver.execute_script('return document.readyState;')
        print(page_state)


        try:
            # Directly find job title
            title = driver.find_element(By.CLASS_NAME, "top-card-layout__title").text
            print(title)
            
        except Exception as e:
                
            print(f"Error: {str(e)}")
        
        try:
            # Directly find company name
            company = driver.find_element(By.CLASS_NAME,"topcard__flavor").text
            print(company)
            
        except Exception as e:
                
            print(f"Error: {str(e)}")        
        
        try:
            # Directly find location
            location = driver.find_element(By.CLASS_NAME,"topcard__flavor--bullet").text
            print(location)
            
        except Exception as e:
                
            print(f"Error: {str(e)}")  

        
        try:
            # Directly find post time
            post_time = driver.find_element(By.CLASS_NAME,"posted-time-ago__text").text
            print(post_time)
            
        except Exception as e:
                
            print(f"Error: {str(e)}")  


        try:
            # Directly find applicants number
            applicants_number = driver.find_element(By.CLASS_NAME,"num-applicants__caption").text
            print(applicants_number)
            
        except Exception as e:
                
            print(f"Error: {str(e)}") 



        try:
            # Directly find seniority level
            seniority_level = driver.find_element(
                By.XPATH, 
                "//h3[contains(text(), 'Seniority level')]/following-sibling::span"
            ).text.strip()
            print(f"Seniority Level: {seniority_level}")

        except:
            print("Seniority level not found")
            

        try:
            # Directly find employment type
            employment_type = driver.find_element(
                By.XPATH, 
                "//h3[contains(text(), 'Employment type')]/following-sibling::span"
            ).text.strip()
            print(f"Employment Type: {employment_type}")
        except:
            print("Employment type not found")


        try:
            # Directly find job function
            job_function = driver.find_element(
                By.XPATH, 
                "//h3[contains(text(), 'Job function')]/following-sibling::span"
            ).text.strip()
            print(f"job function: {job_function}")

        except:
            print("job function not found")


        try:
            # Directly find industries
            industries = driver.find_element(
                By.XPATH, 
                "//h3[contains(text(), 'Industries')]/following-sibling::span"
            ).text.strip()
            print(f"industries: {industries}")

        except:
            print("industries not found")


        try:
            # Directly find job description
            job_description = driver.find_element(By.CLASS_NAME,"show-more-less-html__markup").text

            # Store in a txt

            filename = f'job_description_{job_id}.txt'
            with open(filename, 'w', encoding='utf-8') as f:
                f.write(job_description)

            #print(job_description)
            
        except Exception as e:
                
            print(f"Error: {str(e)}") 

        driver.quit()

        print(f"Job ID: {job_id}")
        
        job_details.append((job_id,title,company,location, seniority_level, employment_type, 
                            post_time, applicants_number, job_function, industries))

        return job_details

        
       
    except Exception as e:
        print(f"Error: {str(e)}")
        return None, None
         

In [11]:
if __name__ == "__main__":
    # Test with specific job ID
    job_id = "4073313173"
    print(f"Getting details for job {job_id}...")
    job_details = get_job_details(job_id)
    print(job_details)

    filename = f'job_description_{job_id}.txt'
    with open(filename, 'r', encoding='utf-8') as f:
    # Split the content into lines
        lines = f.readlines()
    # Display first 10 lines (or all if less than 10)
    for i, line in enumerate(lines[:10]):
        print(f"Line {i+1}: {line.strip()}")

Getting details for job 4073313173...
https://www.linkedin.com/jobs-guest/jobs/api/jobPosting/4073313173
complete
Data Engineer
Suncorp Group
Brisbane City, Queensland, Australia
1 week ago
27 applicants
Seniority Level: Not Applicable
Employment Type: Full-time
job function: Information Technology
industries: Financial Services and Insurance
Job ID: 4073313173
[('4073313173', 'Data Engineer', 'Suncorp Group', 'Brisbane City, Queensland, Australia', 'Not Applicable', 'Full-time', '1 week ago', '27 applicants', 'Information Technology', 'Financial Services and Insurance')]
Line 1: $85,000 - $95,000 (super included) + bonus
Line 2: Melbourne, Sydney or Brisbane
Line 3: 
Line 4: We’re never just satisfied with how things are – because we know how things could be. And it’s our expert Technology team who forge ahead every day to make those ‘what ifs’ a reality.
Line 5: 
Line 6: Welcome to a place where you can chase real progress and drive real change. And that includes your own career. Bei