In [1]:
# Import the necessary Libraries
from typing import Optional, Dict, Tuple
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from time import sleep
from rich import print

In [2]:
# Add settings and Configurations
chrome_options = webdriver.ChromeOptions()
#chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')

In [3]:
browser = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options = chrome_options)

In [4]:
website = 'https://ng.indeed.com/'
browser.get(website)

In [5]:
browser.maximize_window()

In [6]:
input_search = browser.find_element(By.ID, 'text-input-what')
search_button = browser.find_element(By.CLASS_NAME, 'yosegi-InlineWhatWhere-primaryButton')

In [7]:
input_search.clear()
input_search.send_keys('Dentist Jobs')
browser.execute_script("arguments[0].click();", search_button)

In [8]:
# Get the HTML source code of the page after it has fully loaded
html = browser.page_source

# Parse the HTML using BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')

In [18]:
link = [i.find('a').get('data-jk') for i in soup.find_all('div', class_='job_seen_beacon')]

In [23]:
current_url = browser.current_url

In [22]:
def modify_job_link_id(original_url, new_id):
  """
  Modifies the job link in the provided URL by replacing the last ID with a new ID.

  Args:
      original_url: The original job link URL.
      new_id: The new ID to use in the modified link.

  Returns:
      The modified job link URL with the new ID.
  """

  # Split the URL at the last '&' to separate parameters
  url_parts = original_url.split('&')[:-1]  # Exclude the last part containing the ID

  # Extract the last ID (assuming it's after 'vjk=')
  last_id = url_parts[-1].split('=')[-1]

  # Rebuild the URL with the new ID
  modified_url = '&'.join(url_parts) + '&vjk=' + new_id

  return modified_url

In [19]:
print(link)

In [28]:
def get_data(job_listing):
    # Extract job title
    title = job_listing.find("a").find("span").text.strip()
    
    # Extract company name if available, otherwise assign an empty string
    try:
        company = job_listing.find('span', class_='css-92r8pb eu4oa1w0').text.strip()
    except AttributeError:
        company = ''
    
    # Extract job location if available, otherwise assign an empty string
    try:
        location  = job_listing.find('div', class_='css-1p0sjhy eu4oa1w0').text.strip()
    except AttributeError:
        location = ''
        
    # Extract salary information if available, otherwise assign an empty string
    try:
        salary  = job_listing.find('div', class_='metadata salary-snippet-container css-5zy3wz eu4oa1w0').text.strip()
    except AttributeError:
        salary = ''
    
    # Extract job type if available, otherwise assign an empty string
    try:
        job_type = job_listing.find('div', class_='metadata css-5zy3wz eu4oa1w0').text.strip()
    except AttributeError:
        job_type = ''
    
    # Extract date posted
    date_posted = job_listing.find('span', class_='css-qvloho eu4oa1w0').text.strip()
    
    # Extract job summary
    summary = job_listing.find('div', class_='css-9446fg eu4oa1w0').text.strip()
    
    # Extract Job Link
    link_id = job_listing.find('a').get('data-jk')
    link = modify_job_link_id(current_url, link_id)
    
    
    # Return a tuple containing all the extracted information
    return (title, company, location, salary, job_type, date_posted, summary, link)

In [31]:
job_listings = [get_data(i)for i in soup.find_all('div', class_='job_seen_beacon')]


In [32]:
print(job_listings)

In [33]:
# Convert list of records into a DataFrame
df = pd.DataFrame(job_listings, columns=['Title', 'Company', 'Location', 'Salary', 'Job Type', 'Date Posted', 'Summary', 'Job Link'])

# Save DataFrame to a CSV file
df.to_csv('job_data.csv', index=False)

print("Data saved to indeeds_job_data.csv")

In [34]:
df = pd.read_csv('job_data.csv')
df.head()

Unnamed: 0,Title,Company,Location,Salary,Job Type,Date Posted,Summary,Job Link
0,Dentist,One and Only Health Clinic,Lagos,"₦300,000 a month",Full-time,EmployerActive 6 days ago,As a Dentist at our newly opening dental clini...,https://ng.indeed.com/jobs?q=Dentist+Jobs&l=&f...
1,Dental Assistant,StreSERT Services Limited (Third Party...,Lagos,,Full-time,EmployerActive 2 days ago,Anticipate and assist dentist’s needs.\nRespon...,https://ng.indeed.com/jobs?q=Dentist+Jobs&l=&f...
2,Primary Healthcare Providers,Germiny,Lagos,,Full-time,EmployerActive 5 days ago,* Simplified technology: Connect with patients...,https://ng.indeed.com/jobs?q=Dentist+Jobs&l=&f...
3,COMMUNITY PHARMACIST,Work In Nigeria Recruitment Agency,Lagos,"₦200,000 a month",Full-time,PostedPosted 14 days ago,Compounds and dispense medications as prescrib...,https://ng.indeed.com/jobs?q=Dentist+Jobs&l=&f...
4,Dentist,StreSERT Services Limited (Third Party...,Lagos,,Full-time,EmployerActive 2 days ago,The dentist will be responsible for providing ...,https://ng.indeed.com/jobs?q=Dentist+Jobs&l=&f...
