In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time


def scroll_to_end(driver, pause_time=2):
    # Get initial scroll height
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        # Scroll down to the bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # Wait to load the page
        time.sleep(pause_time)

        # Calculate new scroll height after scrolling
        new_height = driver.execute_script("return document.body.scrollHeight")

        # Check if the scroll height has changed
        if new_height == last_height:
            break  # If heights are the same, exit the loop

        last_height = new_height


def scrape_linkedin_jobs(job_title, location, max_retries=3):
    # Set up Chrome options for headless mode
    chrome_options = Options()
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")
    chrome_options.add_argument("--disable-infobars")  # Disable the "Chrome is being controlled by automated test software" message
    chrome_options.add_argument("--disable-extensions")  # Disable Chrome extensions that may flag the session
    chrome_options.add_argument("--no-sandbox")  # Use this for running on systems without graphical environment (like cloud)

    # Set up the driver (make sure chromedriver is in your PATH)
    driver = webdriver.Chrome(options=chrome_options)

    # Navigate to LinkedIn jobs search page
    linkedin_url = "https://www.linkedin.com/jobs/search/"
    driver.get(linkedin_url)
    time.sleep(2)  # Wait for the page to load

    try:
        driver.find_element(By.CLASS_NAME,'contextual-sign-in-modal__modal-dismiss-icon').click()
    except:
        print('Login dismiss not found')

    # Input the job title
    search_title_box = driver.find_element(By.XPATH, '//input[@aria-label="Search job titles or companies"]')
    search_title_box.send_keys(job_title)
    
    # Input the location
    search_location_box = driver.find_element(By.XPATH, '//input[@aria-label="Location"]')
    search_location_box.clear()  # Clear the default location
    search_location_box.send_keys(location)
    
    # Click the search button
    search_button = driver.find_element(By.CSS_SELECTOR, '#jobs-search-panel > form > button > icon > svg')
    search_button.click()
    
    time.sleep(1)  # Wait for search results to load

    for i in range(8):
        scroll_to_end(driver)

    if  len(driver.find_elements(By.CLASS_NAME, 'infinite-scroller__show-more-button'))>0:
        for i in range(10):
            search_more = driver.find_element(By.CLASS_NAME, 'infinite-scroller__show-more-button')
            search_more.click()
            time.sleep(2)

    # Scrape the job listings
    job_listings = []
    job_elements = driver.find_elements(By.CLASS_NAME, 'jobs-search__results-list')
    job_elements = job_elements[0].find_elements(By.TAG_NAME, 'li')

    for job_element in job_elements:
        try:
            job_title = job_element.find_element(By.CLASS_NAME, 'base-search-card__info').text
            company_name = job_element.find_element(By.CLASS_NAME, 'base-search-card__subtitle').text
            location = job_element.find_element(By.CLASS_NAME, 'job-search-card__location').text
            job_link = job_element.find_element(By.TAG_NAME, 'a').get_attribute('href')
            
            description = None
            for retry in range(max_retries):
                try:
                    link = job_element.find_element(By.CLASS_NAME, 'base-card__full-link')
                    link.click()
                    time.sleep(3)

                    description = driver.find_element(By.CLASS_NAME, 'show-more-less-html__markup').text
                    break  # Exit retry loop if successful
                except Exception as e:
                    print(f"Attempt {retry+1} failed to get description: {e}")
                    if retry == max_retries - 1:
                        print("Max retries reached. Skipping this job.")
                    time.sleep(2)  # Optional: add delay before retry

            if description is None:
                description = '-'  # Use '-' if description could not be retrieved
            
            job_listings.append({
                'Job Title': job_title,
                'Company': company_name,
                'Location': location,
                'Link': job_link,
                'Description': description
            })
        except Exception as e:
            print(f"Error while scraping job: {e}")
            continue

    # Close the driver
    driver.quit()

    return job_listings




In [2]:
# Example usage
if __name__ == "__main__":
    job_title = "Data Engineer"
    location = "Montreal, Quebec, Canada"
    
    jobs = scrape_linkedin_jobs(job_title, location)
    
    for i, job in enumerate(jobs, start=1):
        print(f"Job {i}:")
        print(f"Title: {job['Job Title']}")
        print(f"Company: {job['Company']}")
        print(f"Location: {job['Location']}")
        print(f"Link: {job['Link']}")
        print(f"Description: {job['Description']}")
        print("-" * 20)


Login dismiss not found
Attempt 1 failed to get description: Message: element click intercepted: Element <a class="base-card__full-link absolute top-0 right-0 bottom-0 left-0 p-0 z-[2]" href="https://ca.linkedin.com/jobs/view/data-engineer-part-time-at-daro-4011014318?position=1&amp;pageNum=0&amp;refId=vPPe0kxmjHf8d2hZBatq%2BA%3D%3D&amp;trackingId=1OZniSppsiL2aC%2FZKBbYwQ%3D%3D&amp;trk=public_jobs_jserp-result_search-card" data-tracking-control-name="public_jobs_jserp-result_search-card" data-tracking-client-ingraph="" data-tracking-will-navigate="">...</a> is not clickable at point (447, 66). Other element would receive the click: <div class="artdeco-global-alert__content">...</div>
  (Session info: chrome=128.0.6613.138)
Stacktrace:
	GetHandleVerifier [0x00007FF7CD239412+29090]
	(No symbol) [0x00007FF7CD1AE239]
	(No symbol) [0x00007FF7CD06B1DA]
	(No symbol) [0x00007FF7CD0C6C1E]
	(No symbol) [0x00007FF7CD0C4692]
	(No symbol) [0x00007FF7CD0C1BAB]
	(No symbol) [0x00007FF7CD0C0DC5]
	(No 

In [23]:
import pandas as pd
jobs_df = pd.DataFrame(jobs)
jobs_df

Unnamed: 0,Job Title,Company,Location,Link,Description
0,"Data Engineer, Part Time\nDARO\nMontreal, Queb...",DARO,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-enginee...,-
1,"Data Engineer, Quantitative Hedge Fund\nNascen...",Nascent,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-enginee...,"About Nascent…\n\nFounded in 2020, Nascent exi..."
2,"Data Engineer\nFLiiP\nBrossard, Quebec, Canada...",FLiiP,"Brossard, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-enginee...,FLiiP is a fast-growing company offering a fle...
3,Junior Software Engineer (Remote)\nPolicyMe\nG...,PolicyMe,Greater Montreal Metropolitan Area,https://ca.linkedin.com/jobs/view/junior-softw...,Overview of the role: Join us at PolicyMe! We'...
4,Data Processing Specialist - Python Developer\...,FortNine,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-process...,Hello! We're FortNine. We are one of North Ame...
5,"Data Engineer\nTek Tron IT\nMontreal, Quebec, ...",Tek Tron IT,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-enginee...,Data Engineer with strong experience in DBT SQ...
6,"Data Developer\nUbisoft\nMontreal, Quebec, Can...",Ubisoft,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-develop...,Company Description\n\nJobs for Humanity is co...
7,Python Big Data Developer - FinTech - $200k CA...,Hunter Bond,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/python-big-d...,Job Title: Python Big Data Developer\nFirm: El...
8,Data Science Engineer - remote\nHour Consultin...,Hour Consulting,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-science...,Our client is an established growing part of t...
9,Database Engineer- Canada\nZortech Solutions\n...,Zortech Solutions,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/database-eng...,Role: Database Engineer\n\nLocation: Montreal-...


In [24]:
jobs_df.to_csv('out.csv')

In [25]:
import os
import PyPDF2
import pandas as pd
from openai import OpenAI
import time

# Initialize the OpenAI client
client = OpenAI(api_key='XXXXXXX')  # Use your API key

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    try:
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ''
            for page_num in range(len(reader.pages)):
                page = reader.pages[page_num]
                text += page.extract_text()
            return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return None

# Function to analyze a single job description with the CV using the new OpenAI API
def get_job_analysis(cv_text, job_title, company, location, description):
    try:
        # Construct the prompt
        prompt = f"""
        I have the following CV:
        {cv_text}
        
        Now analyze the following job description for the role of '{job_title}' at '{company}' located in '{location}'.
        Please provide the following:
        1. A match score based on my qualifications.
        2. Key skills or qualifications I meet or don't meet.
        3. Suggestions on how to improve my chances of getting the job.

        Job Description:
        {description}
        """
        
        # Call the OpenAI chat completion using the latest client syntax
        response = client.chat.completions.create(
            model='gpt-4o-mini',
            messages=[
                {"role": "system", "content": "You are a helpful assistant that analyzes job descriptions and compares them to CVs."},
                {"role": "user", "content": prompt}
            ]
        )
        
        # Extract the response text
        return response.choices[0].message.content.strip()

    except Exception as e:
        print(f"Error analyzing job: {e}")
        return None

# Function to analyze multiple jobs and return a DataFrame
def analyze_jobs(jobs_df, cv_text):
    analysis_results = []
    
    for index, job in jobs_df.iterrows():
        job_title = job['Job Title']
        company = job['Company']
        location = job['Location']
        description = job['Description']
        
        # Get analysis from the GPT model
        analysis = get_job_analysis(cv_text, job_title, company, location, description)
        
        analysis_results.append({
            'Job Title': job_title,
            'Company': company,
            'Location': location,
            'Analysis': analysis
        })
        
        # To avoid rate limits, add a delay between API calls
        time.sleep(2)  # Adjust sleep time if necessary

    return pd.DataFrame(analysis_results)

# Main function to extract the CV and analyze job listings
def main(cv_pdf_path, jobs_df):
    # Step 1: Extract CV text from PDF
    cv_text = extract_text_from_pdf(cv_pdf_path)

    if cv_text:
        # Step 2: Analyze the jobs with the extracted CV
        job_analysis_df = analyze_jobs(jobs_df, cv_text)
        return job_analysis_df
    else:
        print("Error: Unable to extract text from the provided CV PDF.")
        return None

# Example usage
if __name__ == "__main__":

    # Path to the CV PDF file
    cv_pdf_path = './CV_En_BABAK.pdf'

    # Run the job analysis
    job_analysis_df = main(cv_pdf_path, jobs_df)

    # Display the DataFrame
    if job_analysis_df is not None:
        print(job_analysis_df)


                                            Job Title  \
0   Data Engineer, Part Time\nDARO\nMontreal, Queb...   
1   Data Engineer, Quantitative Hedge Fund\nNascen...   
2   Data Engineer\nFLiiP\nBrossard, Quebec, Canada...   
3   Junior Software Engineer (Remote)\nPolicyMe\nG...   
4   Data Processing Specialist - Python Developer\...   
5   Data Engineer\nTek Tron IT\nMontreal, Quebec, ...   
6   Data Developer\nUbisoft\nMontreal, Quebec, Can...   
7   Python Big Data Developer - FinTech - $200k CA...   
8   Data Science Engineer - remote\nHour Consultin...   
9   Database Engineer- Canada\nZortech Solutions\n...   
10  Data Engineer\nNascent\nMontreal, Quebec, Cana...   
11  Data Engineer\nIntelcom | Dragonfly\nMontreal,...   
12  Data Engineer\nVaco\nMontreal, Quebec, Canada\...   
13  Data Engineer\nNomic Bio\nMontreal, Quebec, Ca...   
14  Database Engineer\nZortech Solutions\nMontreal...   
15  Data WareHouse Engineer-Canada\nZortech Soluti...   
16  Data WareHouse Engineer- Ca

In [28]:
job_analysis_df.to_csv('test.csv')