In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, ElementClickInterceptedException
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

In [2]:
def initialize_driver():
    """
    Initialize the Chrome WebDriver and navigate to the target URL.
    """
    url = "https://qaranjobs.com/"
    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
    driver.get(url)
    driver.maximize_window()
    print("Driver initialized and navigated to:", url)
    return driver

In [3]:
def load_all_jobs(driver):
    """
    Scroll the webpage and click the "Load More Listings" button until no more listings are available.
    """
    print("Loading all job listings...")
    scroll_pause_time = 1  # Sleep time set to 1 second
    max_scroll_attempts = 3  # Limit to avoid infinite loops

    while True:
        try:
            # Locate the "Load More Listings" button
            load_more_btn = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, "//a[contains(@class, 'load_more_jobs')]"))
            )
            
            # Scroll to make the button visible and clickable
            driver.execute_script("arguments[0].scrollIntoView(true);", load_more_btn)
            time.sleep(scroll_pause_time)  # Wait for any animations or rendering
            
            try:
                load_more_btn.click()
                print("Clicked 'Load More Listings' button.")
                time.sleep(scroll_pause_time)  # Pause for the next batch of jobs to load
            except ElementClickInterceptedException:
                print("Element click intercepted. Scrolling to retry...")
                for _ in range(max_scroll_attempts):
                    driver.execute_script("window.scrollBy(0, -50);")  # Scroll slightly up
                    time.sleep(scroll_pause_time)
                    try:
                        load_more_btn.click()
                        break  # Exit the retry loop if successful
                    except ElementClickInterceptedException:
                        continue
                else:
                    print("Could not click the button after multiple retries. Skipping.")
                    break

        except TimeoutException:
            print("No more jobs to load.")
            break

In [4]:
def extract_job_data(driver):
    """
    Extract job listings and their details from the fully loaded page.
    """
    print("Extracting job data...")
    soup = BeautifulSoup(driver.page_source, "html.parser")
    job_listings = soup.find("ul", class_="job_listings")
    jobs = []

    if job_listings:
        for job in job_listings.find_all("li", class_="job_listing"):
            try:
                job_data = {}
                # Extract job title, company, location, posted date, and URL
                position_div = job.find("div", class_="position")
                job_data["title"] = position_div.text.split("–")[0].strip()

                company_div = job.find("div", class_="company")
                job_data["company"] = company_div.text.strip() if company_div else "Unknown"

                location_div = job.find("div", class_="location")
                job_data["location"] = location_div.text.strip() if location_div else "Unknown"

                date_div = job.find("li", class_="date")
                job_data["posted"] = date_div.text.split("on")[1].strip() if date_div else "Unknown"

                job_data["url"] = job.find("a").attrs["href"]

                # Extract job description
                job_data["description"] = extract_job_description(driver, job_data["url"])
                jobs.append(job_data)
                print(f"Scraped job: {job_data['title']}")
            except Exception as e:
                print(f"Error extracting job: {e}")
                continue

    return jobs

In [5]:
def extract_job_description(driver, url):
    """
    Navigate to a job's URL to extract the full job description.
    """
    try:
        driver.execute_script("window.open('');")  # Open a new tab
        driver.switch_to.window(driver.window_handles[-1])  # Switch to the new tab
        driver.get(url)
        time.sleep(1)

        soup = BeautifulSoup(driver.page_source, "html.parser")
        description_div = soup.find("div", class_="job_description")
        description = description_div.text.strip() if description_div else "No description available."

        driver.close()  # Close the current tab
        driver.switch_to.window(driver.window_handles[0])  # Switch back to the main tab
        return description
    except Exception as e:
        print(f"Error extracting description from {url}: {e}")
        return "Error extracting description."

In [6]:
def save_to_csv(data, filename="qaran_jobs.csv"):
    """
    Save the job data to a CSV file.
    """
    print(f"Saving {len(data)} jobs to {filename}...")
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False, encoding="utf-8")
    print("Data saved successfully.")

if __name__ == "__main__":
    driver = initialize_driver()
    time.sleep(3)  # Allow the initial page to load

    load_all_jobs(driver)  # Load all job listings
    jobs_data = extract_job_data(driver)  # Extract job data

    save_to_csv(jobs_data, "qaran_jobs.csv")  # Save to CSV
    driver.quit()
    print("Scraping completed!")

Driver initialized and navigated to: https://qaranjobs.com/
Loading all job listings...
Element click intercepted. Scrolling to retry...
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Load More Listings' button.
Clicked 'Loa

In [7]:
df = pd.read_csv('qaran_jobs.csv')

In [12]:
df.head()

Unnamed: 0,title,company,location,posted,url,description
0,REQUEST FOR PROPOSAL: EXTERNAL AUDIT FIRMS,Dhaxalreeb,"Mogadishu, Somalia","January 14, 2025",https://qaranjobs.com/job/request-for-proposal...,REQUEST FOR PROPOSAL: EXTERNAL AUDIT FIRMS – M...
1,Admin and Finance Assistant (Accountant),Shaqodoon Organization,"Mogadishu, Somalia","January 13, 2025",https://qaranjobs.com/job/admin-and-finance-as...,Admin and Finance Assistant (Accountant) – Mog...
2,Coaching Supervisors,AVSI,Somalia,"January 11, 2025",https://qaranjobs.com/job/coaching-supervisors...,Coaching Supervisors – 3 National Positions (D...
3,Pre-qualified enumerators,Islamic Relief,Somalia,"January 11, 2025",https://qaranjobs.com/job/pre-qualified-enumer...,"Pre-qualified enumerators – Mogadishu, Jowhar,..."
4,Fursad Jagooyin Banaan: Xubinimo Guddiga Qaran...,"Wasaaradda Waxbarashada, Hiddaha Iyo Tacliinta...",Somalia,"January 11, 2025",https://qaranjobs.com/job/fursad-jagooyin-bana...,Fursad Jagooyin Banaan: Xubinimo Guddiga Qaran...
