In [None]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options as ChromeOptions
from time import sleep
import csv

def scrape_jobs(search_term):
    columns = [
        'title',
        'company',
        'job_post_id',
        'address',
        'employment_type',
        'seniority',
        'min_experience',
        'job_category',
        'salary',
        'salary_type',
        'num_of_applications',
        'last_posted_date',
        'expiry_date',
        'description',
        'company_info'
    ]
    jobs = pd.DataFrame(columns=columns)

    # Set up Chrome WebDriver *Important to have!!
    chrome_options = ChromeOptions()
    chrome_options.add_argument('--headless')
    driver = webdriver.Chrome('/Users/kevin/Downloads/chromedriver-win64/chromedriver-win64/chromedriver')

    count = -1
    # Loop through the pages
    for page in range(1, 10):  # Adjust the range according to the number of pages you want to scrape
        url = f'https://www.mycareersfuture.gov.sg/search?search={search_term}&sortBy=new_posting_date&page={page}'
        driver.get(url)
        assert 'MyCareersFuture' in driver.title
        sleep(6)

        
        for i in range(20):
            try:
                # Extract information from the job card
                card_elem = driver.find_element(By.XPATH, f"(//a[@data-testid='job-card-link'])[{i + 1}]")
                title = card_elem.find_element(By.XPATH, ".//span[contains(@class, 'JobCard__jobtitle')]").text.strip()
                loc = card_elem.find_element(By.XPATH, ".//p[@data-cy='job-card__location']").text.strip()

                # Enter into job details
                card_elem.click()
                sleep(8)
                
                # Indicator
                print(f"Scraping page {page} - Job {i + 1}")

                # Extract job details from the job details section
                job_details_elem = driver.find_element(By.XPATH, "//div[@data-cy='JobDetails__job-info']")
                print(job_details_elem)
                title = job_details_elem.find_element(By.XPATH, ".//h1[@id='job_title']").text.strip()
                company = job_details_elem.find_element(By.XPATH, ".//p[@data-cy='company-hire-info__company']").text.strip()
                job_post_id = job_details_elem.find_element(By.XPATH, ".//span[@data-cy='jobinfo__jobpostid--span']").text.strip()

                # Error exception
                try:
                    address_elem = job_details_elem.find_element(By.XPATH, ".//p[@id='address']/a")
                    address = address_elem.text.strip()
                except:
                    address = ""

                employment_type = job_details_elem.find_element(By.XPATH, ".//p[@id='employment_type']").text.strip()
                seniority = job_details_elem.find_element(By.XPATH, ".//p[@id='seniority']").text.strip()
                
                try:
                    min_experience = job_details_elem.find_element(By.XPATH, ".//p[@id='min_experience']").text.strip()
                except:
                    min_experience = ""
                    
                job_category = job_details_elem.find_element(By.XPATH, ".//p[@id='job-categories']").text.strip()
                salary_range_elem = job_details_elem.find_element(By.XPATH, ".//div[@class='lh-solid']")
                salary = salary_range_elem.text.strip().split(' to ')
                salary_type = job_details_elem.find_element(By.XPATH, ".//span[@data-cy='salary-type']").text.strip()
                num_of_applications = job_details_elem.find_element(By.XPATH, ".//span[@id='num_of_applications']").text.strip()
                last_posted_date = job_details_elem.find_element(By.XPATH, ".//span[@id='last_posted_date']").text.strip()
                expiry_date = job_details_elem.find_element(By.XPATH, ".//span[@id='expiry_date']").text.strip()

                # Extract description (roles & responsibilities)
                description_elem = job_details_elem.find_element(By.XPATH, "//div[@id='job_description']")
                description = description_elem.text.strip()
                
                # Extract company information
                try:
                    company_info_elem = driver.find_element(By.XPATH, "//div[@class='company-info']//div[@data-cy='companyinfo-writeup']")
                    company_info = company_info_elem.text.strip()
                except:
                    company_info = ""

                count += 1

                jobs.loc[len(jobs)] = [title, company, job_post_id, address, employment_type, seniority,
                                        min_experience, job_category, salary, salary_type,
                                        num_of_applications, last_posted_date, expiry_date, description, company_info]

                driver.back()
                sleep(3)

            # Terminate the loop at the last page.
            except Exception as e:
                print(f"Error: {e}")
                break

    jobs.to_csv(f'{search_term}_job.csv', index=False)

    driver.quit()

# Read the list of search job terms
search_terms_df = pd.read_csv('cf-search.csv')

# Loop through each search job term and scrape jobs
for index, row in search_terms_df.iterrows():
    search_term = row['search']
    print(f"Scraping jobs for '{search_term}'")
    scrape_jobs(search_term)
    print(f"Finished scraping jobs for '{search_term}'")