#  Wuzzuf Scraping Engineering Jobs Assignment

# Note:
-> This code limits scrapping to only the first 7 pages if a specific job title has more than this just to save time

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time, json, sys, csv

# Initialize Chrome driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://wuzzuf.net/jobs/egypt")
wait = WebDriverWait(driver, 10)

# Search for job
search_box = wait.until(EC.element_to_be_clickable((By.NAME, "q")))
time.sleep(2)
search_box.click()
search_box.clear() 
search_box.send_keys("Agricultural Engineer")  # this is a small faster search that returns 49 results across 4 pages
# search_box.send_keys("software engineering")  
search_box.send_keys(Keys.RETURN)
time.sleep(5)

all_jobs_data = []
scraped_urls = set()  # To prevent duplicates
current_page_number = 1
MAX_PAGES = 7  # Maximum pages to scrape

while True:
    print(f"\n--- Scraping Page {current_page_number} ---")
    try:
        # Scroll to bottom to load lazy-loaded jobs
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

        # Get all job cards
        job_cards = wait.until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.css-ghe2tq.e1v1l3u10"))
        )

        # Extract job details
        for job in job_cards:
            try:
                application_link = job.find_element(By.CSS_SELECTOR, "h2.css-193uk2c a").get_attribute("href")
                if application_link in scraped_urls:
                    continue
                scraped_urls.add(application_link)

                title = job.find_element(By.CSS_SELECTOR, "h2.css-193uk2c").text.strip()
                company = job.find_element(By.CSS_SELECTOR, "a.css-ipsyv7").text.strip()
                location = job.find_element(By.CSS_SELECTOR, "span.css-16x61xq").text.strip()

                try:
                    exp_level = job.find_element(By.XPATH, ".//a[contains(@class,'css-o171kl')]/following-sibling::span").text.strip()
                except:
                    exp_level = "Not listed"


                try:
                    date = job.find_element(By.CSS_SELECTOR, "div.css-1jldrig").text.strip()
                except:
                    date = "Not listed"


                try:
                    job_type = job.find_element(By.CSS_SELECTOR, "span.css-uc9rga").text.strip()
                except:
                    job_type = "Not listed"

                # Open job detail page for skills
                driver.execute_script("window.open(arguments[0]);", application_link)  # Opens job detail page in new tab.
                driver.switch_to.window(driver.window_handles[1])  # Switches to that tab to scrape skills.
                time.sleep(2)

                try:
                    skills_div = driver.find_element(By.CSS_SELECTOR, "div.css-qe7mba")
                    skill_spans = skills_div.find_elements(By.CSS_SELECTOR, "span.css-1vi25m1")
                    skills = ", ".join([s.text.strip() for s in skill_spans]) if skill_spans else "Not listed"
                except:
                    skills = "Not listed"

                driver.close()
                driver.switch_to.window(driver.window_handles[0]) # Closes the current tab and switches back to main results page.

                # Adds a dictionary for each job to all_jobs_data.
                all_jobs_data.append({
                    "Job Title": title,
                    "Company": company,
                    "Location": location,
                    "Experience Level": exp_level,
                    "Date": date,
                    "Skills": skills,
                    "Job Type": job_type,
                    "Application Link": application_link,
                })
                time.sleep(1)

            except Exception as e:
                print(f"Error extracting job details: {e}", file=sys.stderr)
                continue

        # Pagination
        # Stops if maximum pages reached.
        if current_page_number >= MAX_PAGES:
            print(f"\nReached the maximum page limit of {MAX_PAGES}. Stopping...")
            break

        try:
            # Get all page buttons
            page_buttons = driver.find_elements(By.CSS_SELECTOR, "li.css-2t2q4i button")
            # variable to store which page number we are currently on
            current_page = None

            # Identify current active page
            for idx, btn in enumerate(page_buttons):
                if "css-9ohdr1" in btn.get_attribute("class") or "aria-current" in btn.get_attribute("outerHTML"):
                    current_page = idx
                    break

            # If we successfully found the current page, and there is a next page available then click next page 
            if current_page is not None and current_page + 1 < len(page_buttons):
                driver.execute_script("arguments[0].click();", page_buttons[current_page + 1])
                current_page_number += 1
                time.sleep(5)
            else:
                print("No more pages found.")
                break  # Last page reached

        except Exception as e:
            print(f"Error in pagination: {e}", file=sys.stderr)
            break

    except Exception as e:
        print(f"Error loading job cards: {e}", file=sys.stderr)
        break

# Save results to JSON
with open("wuzzuf_jobs_JSON.json", "w", encoding="utf-8") as f:
    json.dump(all_jobs_data, f, ensure_ascii=False, indent=4)

# Save results to CSV
with open("wuzzuf_jobs_CSV.csv", "w", encoding="utf-8", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=all_jobs_data[0].keys())
    writer.writeheader()
    writer.writerows(all_jobs_data)

print(f"\n✅ Scraping completed. {len(all_jobs_data)} jobs saved to JSON and CSV.")



--- Scraping Page 1 ---

--- Scraping Page 2 ---

--- Scraping Page 3 ---

--- Scraping Page 4 ---
No more pages found.

✅ Scraping completed. 49 jobs saved to JSON and CSV.
