In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException
from selenium.webdriver.chrome.options import Options  # Import the Options class
from bs4 import BeautifulSoup
import time
import pandas as pd

In [2]:
# Set up Selenium options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode to avoid opening a browser window
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

In [3]:
# Set up ChromeDriver
service = Service("C:/Users/Ahmad/Downloads/chromedriver-win64/chromedriver-win64/chromedriver.exe")  # Replace with your ChromeDriver path
driver = webdriver.Chrome(service=service, options=chrome_options)

In [4]:
def scrape_job_details(job_links):
    job_details = []
    counter = 0
    # Set up Selenium options
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run in headless mode to avoid opening a browser window
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    
    # Initialize the Selenium WebDriver
    service = Service("C:/Users/Ahmad/Downloads/chromedriver-win64/chromedriver-win64/chromedriver.exe")  # Specify your path to ChromeDriver
    driver = webdriver.Chrome(service=service, options=chrome_options)

    for link in job_links:
        try:
            counter += 1
            print(f"Fetching job details from {link}...")
            driver.get(link)

            # Use WebDriverWait to wait until the job title is present
            time.sleep(2)  # Allow time for the page to load

            # Parse the page with BeautifulSoup
            soup = BeautifulSoup(driver.page_source, 'html.parser')

            # Extract job details
            title = soup.find('h1', class_='h3 t-break').text.strip() if soup.find('h1', class_='h3 t-break') else "N/A"
            company = soup.find('a', class_='is-black t-bold').text.strip() if soup.find('a', class_='is-black t-bold') else "N/A"
            location = soup.find('span', class_='t-mute').text.strip() if soup.find('span', class_='t-mute') else "N/A"
            description = soup.find('div', class_='card-content p20t is-spaced').text.strip() if soup.find('div', class_='card-content p20t is-spaced') else "N/A"
            skills = soup.find('div', class_='card-content is-spaced t-break print-break-before p20t').text.strip() if soup.find('div', class_='card-content is-spaced t-break print-break-before p20t') else "N/A"

            job_details.append({
                'Title': title,
                'Company': company,
                'Location': location,
                'Description': description,
                'Skills': skills,
                'Link': link
            })
            print(f"Details fetched for {title}.")
            print(f"{counter}/{len(job_links)} jobs fetched.")
        except TimeoutException:
            print(f"Timeout while waiting for job details at {link}.")
        except WebDriverException as e:
            print(f"WebDriverException encountered while fetching details from {link}: {e}")
            time.sleep(5)  # Wait for a while before retrying
            continue  # Skip to the next link
        except Exception as e:
            print(f"An unexpected error occurred while fetching details from {link}: {e}")
            continue  # Skip to the next link

    driver.quit()
    return job_details

In [5]:
def scrape_bayt_job_links(num_pages):
    # Construct the base URL for the job search
    base_url = f"https://www.bayt.com/en/international/jobs/?options%5Bsort%5D%5B%5D=l&_gl=1*1vt8eyl*_up*MQ..*_ga*MTkxNjQ3MDAzLjE3Mjg5ODgzMDE.*_ga_1NKPLGNKKD*MTcyODk4ODMwMS4xLjAuMTcyODk4ODMwMS4wLjAuMA.."
    job_links = []

    # Set up Selenium options
    chrome_options = Options()
    chrome_options.add_argument("--start-maximized")  # Start with the window maximized
    # Uncomment the line below to run in headless mode
    # chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")

    # Initialize the Selenium WebDriver
    service = Service("C:/Users/Ahmad/Downloads/chromedriver-win64/chromedriver-win64/chromedriver.exe")  # Update this with your chromedriver path
    driver = webdriver.Chrome(service=service, options=chrome_options)

    for page in range(num_pages):
        try:
            print(f"Fetching page {page + 1}...")
            driver.get(f"{base_url}&page={page + 1}")

            # Wait until job cards are present
            WebDriverWait(driver, 20).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'li[data-js-job]'))  # Using the correct selector for job cards
            )

            # Parse the page with BeautifulSoup
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            job_cards = soup.find_all('li', attrs={'data-js-job': True})  # Find all job cards

            # Extract job links
            for card in job_cards:
                link_tag = card.find('a', href=True)  # Find the anchor tag with href
                if link_tag:  # Ensure the link tag is valid
                    full_link = f"https://www.bayt.com{link_tag['href']}"
                    job_links.append(full_link)

            print(f"Found {len(job_cards)} job links on page {page + 1}.")

        except Exception as e:
            print(f"An error occurred while fetching page {page + 1}: {e}")
            break  # Break the loop for any unexpected errors

    driver.quit()
    return job_links

In [6]:
num_pages = 125  # Set the number of pages to scrape
job_links = scrape_bayt_job_links(num_pages)
print(job_links)

Fetching page 1...
Found 20 job links on page 1.
Fetching page 2...
Found 20 job links on page 2.
Fetching page 3...
Found 20 job links on page 3.
Fetching page 4...
Found 20 job links on page 4.
Fetching page 5...
Found 20 job links on page 5.
Fetching page 6...
Found 20 job links on page 6.
Fetching page 7...
Found 20 job links on page 7.
Fetching page 8...
Found 20 job links on page 8.
Fetching page 9...
Found 20 job links on page 9.
Fetching page 10...
Found 20 job links on page 10.
Fetching page 11...
Found 20 job links on page 11.
Fetching page 12...
Found 20 job links on page 12.
Fetching page 13...
Found 20 job links on page 13.
Fetching page 14...
Found 20 job links on page 14.
Fetching page 15...
Found 20 job links on page 15.
Fetching page 16...
Found 20 job links on page 16.
Fetching page 17...
Found 20 job links on page 17.
Fetching page 18...
Found 20 job links on page 18.
Fetching page 19...
Found 20 job links on page 19.
Fetching page 20...
Found 20 job links on page 20

In [7]:
job_details = scrape_job_details(job_links)

Fetching job details from https://www.bayt.com/en/uae/jobs/quality-controller-5202386/...
Details fetched for Quality Controller.
1/2500 jobs fetched.
Fetching job details from https://www.bayt.com/en/uae/jobs/operations-executive-5202385/...
Details fetched for Operations Executive.
2/2500 jobs fetched.
Fetching job details from https://www.bayt.com/en/uae/jobs/data-scientist-5202384/...
Details fetched for Data Scientist.
3/2500 jobs fetched.
Fetching job details from https://www.bayt.com/en/egypt/jobs/client-relations-manager-5202383/...
Details fetched for Client Relations Manager.
4/2500 jobs fetched.
Fetching job details from https://www.bayt.com/en/uae/jobs/walk-in-drive-mechanical-technician-20th-oct-24-5202382/...
Details fetched for Walk In Drive | Mechanical Technician - 20th Oct' 24.
5/2500 jobs fetched.
Fetching job details from https://www.bayt.com/en/uae/jobs/operations-manager-material-management-5202381/...
Details fetched for Operations Manager - Material Management.


In [11]:
# Clean the job details
for job in job_details:
    # Clean Description
    job['Skills'] = (job['Skills']
                          .replace('Skills\n', '')
                          .replace('\n', ' ')
                          .replace('\xa0', ' ')
                          .strip())
    # Clean Description
    job['Description'] = (job['Description']
                          .replace('Job Description\n\n', '')
                          .replace('\n', ' ')
                          .replace('\xa0', ' ')
                          .strip())
    
    # Clean Skills
    job['Skills'] = job['Skills'].replace('Skills\n', '').strip()

In [12]:
# Create a DataFrame from the cleaned job details
df_jobs = pd.DataFrame(job_details)

In [13]:
df_jobs

Unnamed: 0,Title,Company,Location,Description,Skills,Link
0,Quality Controller,Al Futtaim Group,"Dubai, UAE",Overview of the role:To carry out the inspecti...,Required Skills to be successful:Knowledge of ...,https://www.bayt.com/en/uae/jobs/quality-contr...
1,Operations Executive,Al Futtaim Group,"Dubai, UAE",Overview of the role:The Operations Executive ...,Required Skills to be successful:Minimum Exper...,https://www.bayt.com/en/uae/jobs/operations-ex...
2,Data Scientist,Al Futtaim Group,"Dubai, UAE",Overview of the role:Acceleration of automotiv...,Required Skills to be successful:Python Progra...,https://www.bayt.com/en/uae/jobs/data-scientis...
3,Client Relations Manager,Al Futtaim Group,"Cairo, Egypt",Overview of the role:Ensure enhanced Customer ...,Required Skills to be successful:Minimum of 6 ...,https://www.bayt.com/en/egypt/jobs/client-rela...
4,Walk In Drive | Mechanical Technician - 20th O...,Cars24,"Dubai, UAE","Job Overview:As a Mechanical Technician, Warra...",Proven experience as an automotive or mechanic...,https://www.bayt.com/en/uae/jobs/walk-in-drive...
...,...,...,...,...,...,...
2495,Graphic Designer,Alfardan Group,"Doha, Qatar",Job Summary:We are looking for a creative Grap...,Skills:1. Proficiency in graphic design softwa...,https://www.bayt.com/en/qatar/jobs/graphic-des...
2496,Sales Supervisor,Global Management Solutions (GMS),"Altaif, Saudi Arabia",Supervise and lead the sales team to achieve t...,Communication Skills Management skillsDeal clo...,https://www.bayt.com/en/saudi-arabia/jobs/sale...
2497,CRM Manager,Clover Services,"Dubai, UAE","At Clover Services, Ltd., we excel in connecti...",CRM managementAnalytical skillsEye-for-detailB...,https://www.bayt.com/en/uae/jobs/crm-manager-5...
2498,Landscape Draftsman,RTC1 Recruitment Services,"Dubai, UAE",Position Title: Landscape Draftsma...,Qualifications:Open to Asian nationals with to...,https://www.bayt.com/en/uae/jobs/landscape-dra...


In [18]:
df_jobs['Skills'][250]

'1-2 years of experience in sales, preferably in the field of building materials A degree in civil engineering is considered advantageous Basic knowledge of AutoCAD software and the ability to read construction drawings Native-level Arabic skills and a good level of proficiency in English required Willingness to travel within Saudi Arabia and to our headquarters in Italy for training purposes Possession of a valid Saudi driving license (please attach a copy to your application) Motivated individual capable of working autonomously Saudi national'

In [15]:
# Specify the filename for the CSV file
filename = 'job_postings.csv'

In [16]:
# Save the DataFrame to a CSV file
df_jobs.to_csv(filename, index=False)

print(f"Job details saved to {filename}")

Job details saved to job_postings.csv
