# Web Scraping 

This Python script is designed to automatically collect job listings from the HelloJob.az website. 
We searched and decided to use here Selenium to open and navigate web pages in the background (headless mode), 
making the process smooth and invisible to the user. 
The script visits the first 10 pages of job listings and extracts links to individual job posts. 
Then, it opens each job post one by one and gathers key details like the job title, company name, location, 
salary, work schedule, and even the company logo if available. 
In the end, it prints out all the collected job data in a clean, structured format - csv file. 

In [6]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import time
import os

# Setup Selenium options
options = Options()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# Initialize the WebDriver
webdriver_service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=webdriver_service, options=options)

# Base URL for job listings
base_url = "https://www.hellojob.az/vakansiyalar"
all_job_links = []
max_pages = 10  # Stop at page 10

# Step 1: Collect job listing links
for page in range(1, max_pages + 1):
    url = f"{base_url}?page={page}"
    try:
        driver.get(url)
        time.sleep(2)

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        job_items = soup.find_all('div', class_='vacancies__item')
        found_links_on_page = 0
        for job in job_items:
            link_tag = job.find('a', class_='vacancies__body', href=True)
            if link_tag:
                href = link_tag['href']
                full_link = urljoin("https://www.hellojob.az", href)
                if full_link not in all_job_links:
                    all_job_links.append(full_link)
                    found_links_on_page += 1
        print(f"Scraped page {page}, found {found_links_on_page} new job links.")

    except Exception as e:
        print(f"An error occurred while scraping page {page}: {e}")
        break

print("\nCollected Job Links:")
for link in all_job_links:
    print(link)

# Step 2: Scrape detailed job info from each job page
job_details = []

for link in all_job_links:
    try:
        driver.get(link)
        time.sleep(2)

        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # Set default values
        location = job_time = category = salary = date = deadline = job_title = company_name = company_logo_url = None

        try:
            job_title = soup.find('h1', class_='section-title').text.strip()
        except AttributeError:
            pass  # Handle the case where the element is not found

        try:
            company_name = soup.find('a', class_='vacancies__category text-black').text.strip()
        except AttributeError:
            pass

        try:
            logo_container = soup.find('div', class_='vacancies__logo')
            if logo_container:
                img_tag = logo_container.find('img')
                if img_tag and 'src' in img_tag.attrs:
                    company_logo_url = img_tag['src']
        except AttributeError:
            pass

        # Extract details from the details list
        details_list = soup.find('ul', class_='company__item__details')
        if details_list:
            detail_items = details_list.find_all('li')
            for item in detail_items:
                try:
                    label = item.find('span').text.strip()
                    value = item.find('p').text.strip()

                    if label == "Şəhər":
                        location = value
                    elif label == "İş rejimi":
                        job_time = value
                    elif label == "Kateqoriya":
                        category = value
                    elif label == "Maaş":
                        salary = value
                    elif label == "Yerləşmə tarixi":
                        date = value
                    elif label == "Bitmə tarixi":
                        deadline = value
                except AttributeError:
                    continue  # Handle missing elements within the list

        job_details.append({
            'Job Title': job_title,
            'Company Name': company_name,
            'Location': location,
            'Job Time': job_time,
            'Category': category,
            'Salary': salary,
            'Post Date': date,
            'Deadline': deadline,
            'Job URL': link,
            'Company Logo URL': company_logo_url  # Include the logo URL
        })
        print(f"Scraped details for: {job_title}")

    except Exception as e:
        print(f"An error occurred while scraping details from {link}: {e}")

# Close driver
driver.quit()

# Print the extracted data
print("\n--- Job Details ---")
for job in job_details:
    print(job)


Scraped page 1, found 30 new job links.
Scraped page 2, found 30 new job links.
Scraped page 3, found 30 new job links.
Scraped page 4, found 30 new job links.
Scraped page 5, found 30 new job links.
Scraped page 6, found 30 new job links.
Scraped page 7, found 30 new job links.
Scraped page 8, found 30 new job links.
Scraped page 9, found 30 new job links.
Scraped page 10, found 18 new job links.

Collected Job Links:
https://www.hellojob.az/vakansiya/kredit-mutexessisi
https://www.hellojob.az/vakansiya/magaza-saticisi-905042
https://www.hellojob.az/vakansiya/kuryer
https://www.hellojob.az/vakansiya/satis-uzre-agent-904974
https://www.hellojob.az/vakansiya/sosial-media-marketinq-smm-mutexessisixanim
https://www.hellojob.az/vakansiya/online-satis-meneceri-960501
https://www.hellojob.az/vakansiya/ingilis-dili-muellimi-959418
https://www.hellojob.az/vakansiya/satis-mutexessisi-959636
https://www.hellojob.az/vakansiya/satis-meslehetcisi-957264
https://www.hellojob.az/vakansiya/muhendis
ht

Scraped details for: Kredit mütəxəssisi
Scraped details for: Mağaza satıcısı
Scraped details for: Kuryer
Scraped details for: Satış üzrə agent
Scraped details for: SMM mütəxəssisi (xanım)
Scraped details for: Online satış meneceri
Scraped details for: İngilis dili müəllimi
Scraped details for: Satış mütəxəssisi
Scraped details for: Satış məsləhətçisi
Scraped details for: Mühəndis
Scraped details for: Ümumi İngilis dili müəllimi (Sumqayıt Ofisi)
Scraped details for: Anbardar
Scraped details for: Satınalma meneceri
Scraped details for: Satış Meneceri
Scraped details for: TikTok Canlı Yayım üzrə Satış Meneceri
Scraped details for: Əczaçı
Scraped details for: Regyon Üzrə Satış təmsilçisi
Scraped details for: Direktor assistenti
Scraped details for: Müqavilələr və bank əməliyyatlarına hüquqi dəstək şöbəsi üzrə hüquqşünas
Scraped details for: Şəbəkə və əməliyyatların təhlükəsizliyi arxitekturası üzrə baş mütəxəssis
Scraped details for: Əməliyyat risklərinin idarə edilməsi şöbəsinin aparıcı m

Scraped details for: Ofisiant
Scraped details for: Sosial Media Marketinq (SMM) mütəxəssisi
Scraped details for: Call-center operator
Scraped details for: Daxili nəzarət üzrə mütəxəssis
Scraped details for: Diş texniki
Scraped details for: Mal qəbulçusu
Scraped details for: Qeydiyyatçı
Scraped details for: Avropada Təhsil üzrə mütəxəssis
Scraped details for: Rus dili müəllimi
Scraped details for: Müştəri xidmətləri üzrə mütəxəssis
Scraped details for: Senior Product Owner
Scraped details for: Lizinq mütəxəssisi
Scraped details for: Sushimen
Scraped details for: Ofisiant
Scraped details for: Təmizlikçi
Scraped details for: Qızıl girovlu kreditlər üzrə Mütəxəssis
Scraped details for: Qeydiyyatçı
Scraped details for: Avtomobil Brend Menecer
Scraped details for: Qeydiyyatçı
Scraped details for: Satış mütəxəssisi
Scraped details for: Mühasib
Scraped details for: Satış təmsilçisi
Scraped details for: Satış üzrə mütəxəssis
Scraped details for: Logistika üzrə mütəxəssis
Scraped details for: 1C

In [7]:
import csv
import os

# Step 3: Save the extracted data to a CSV file in the specified directory
output_directory = r"C:\Users\HP\OneDrive - ADA University\Documents\ADA University\!!Spring2025\Data & Information Engineering 20576"
csv_file_path = os.path.join(output_directory, 'hellojob_vacancies_final.csv')
csv_columns = job_details[0].keys() if job_details else []

try:
    with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_columns)

        writer.writeheader()
        for data in job_details:
            writer.writerow(data)

    print(f"\nData saved to {csv_file_path}")

except Exception as e:
    print(f"An error occurred while saving to CSV: {e}")


Data saved to C:\Users\HP\OneDrive - ADA University\Documents\ADA University\!!Spring2025\Data & Information Engineering 20576\hellojob_vacancies_final.csv
