SEEK JOB POSTINGS IN SYDNEY AREA

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [3]:
# Lists to store the data
Posting_date = []
Link = []

# Loop through pages
for page in range(1, 25):
    # Construct the URL for the current page
    url = f'https://www.seek.com.au/data-analyst-jobs/in-All-Sydney-NSW?page={page}&sortmode=ListedDate'
    
    # Send GET request to the page
    response = requests.get(url)
    
    # If the page is fetched successfully (status code 200), parse the HTML content
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Find all job listings on the page
        job_listings = soup.find_all('article', {'aria-label': True})
        
        # Loop through each job listing and extract the job listing date
        for job in job_listings:
            try:
                # Extract the job listing date
                job_date = job.find('span', {'data-automation': "jobListingDate"}).text.strip()
                Posting_date.append(job_date)
                print(f"Job Listing Date: {job_date}")

                # Extract the job link
                link_url = job.find('a', href=True)['href']  # Get the href attribute of the anchor tag
                full_link = f"https://www.seek.com.au{link_url}"  # Form the full link by appending base URL
                Link.append(full_link)
                print(f"Job Link: {full_link}")
            
            except Exception as e:
                print(f"Error extracting data for a job: {e}")
    else:
        print(f"Failed to fetch page {page}. Status code: {response.status_code}")

# Optionally, print the first few collected dates and links
print(f"Collected {len(Posting_date)} job listings.")


Job Listing Date: 20m ago
Job Link: https://www.seek.com.au/job/80881587?type=standout&ref=search-standalone
Error extracting data for a job: 'NoneType' object has no attribute 'text'
Job Listing Date: 38m ago
Job Link: https://www.seek.com.au/job/80881017?type=standout&ref=search-standalone
Job Listing Date: 41m ago
Job Link: https://www.seek.com.au/job/80881004?type=standout&ref=search-standalone
Job Listing Date: 42m ago
Job Link: https://www.seek.com.au/job/80880997?type=standout&ref=search-standalone
Job Listing Date: 42m ago
Job Link: https://www.seek.com.au/job/80880978?type=standout&ref=search-standalone
Job Listing Date: 55m ago
Job Link: https://www.seek.com.au/job/80880653?type=standard&ref=search-standalone
Job Listing Date: 1h ago
Job Link: https://www.seek.com.au/job/80880017?type=standout&ref=search-standalone
Job Listing Date: 1h ago
Job Link: https://www.seek.com.au/job/80879911?type=standout&ref=search-standalone
Job Listing Date: 1h ago
Job Link: https://www.seek.com

In [4]:
len(Link)

522

In [5]:
# Function to extract job details from a single page
def extract_job_details(page_number):
    url = f'https://www.seek.com.au/data-analyst-jobs/in-All-Sydney-NSW?page={page_number}&sortmode=ListedDate'
    
    # Send GET request to the page
    response = requests.get(url)
    
    # If the page is fetched successfully (status code 200), parse the HTML content
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Find all job listing articles (each with the 'aria-label' attribute)
        job_listings = soup.find_all('article', {'aria-label': True})
        
        job_data = []
        
        # Loop through each job listing and extract the relevant information
        for article in job_listings:
            try:
                # Extract the job title (from aria-label)
                job_title = article.get('aria-label')

                # Extract the company name
                company_name = article.find('a', {'data-automation': 'jobCompany'}).text.strip()

                # Extract the job type (Full time, etc.)
                job_type = article.find('p', text=lambda text: text and "job" in text).text.strip().replace('This is a ', '')

                # Extract the job location
                job_location = article.find('a', {'data-automation': 'jobLocation'}).text.strip()

                # Extract the job highlights (located in <li> elements)
                job_highlights = [highlight.get_text(strip=True) for highlight in article.find_all('li')]

                # Extract the job short description
                job_description = article.find('span', {'data-automation': 'jobShortDescription'}).text.strip()

                # Extract the salary (if available)
                salary = article.find('span', {'data-automation':'jobSalary'})
                salary = salary.get_text(strip=True) if salary else 'Not Provided'

                # Extract the job classification (if available)
                classification = article.find('span', string=lambda text: text and text.startswith('subClassification:'))
                classification = classification.text.replace('subClassification: ', '').replace('&amp;', '&') if classification else 'Not Provided'

                # Extract the job listing date
                job_date = article.find('span', {'data-automation': "jobListingDate"}).text.strip()

                # Extract the job link
                link_url = article.find('a', href=True)['href']  # Get the href attribute of the anchor tag
                full_link = f"https://www.seek.com.au{link_url}"  # Form the full link by appending base URL

                # Append the job details to the list
                job_data.append({
                    'Job Title': job_title,
                    'Company Name': company_name,
                    'Classification': classification,
                    'Job Type': job_type,
                    'Location': job_location,
                    'Job Highlights': job_highlights,
                    'Job Description': job_description,
                    'Salary': salary,
                    'Posting Date': job_date,
                    'Reference': full_link
                })
            except Exception as e:
                print(f"Error extracting data for a job: {e}")
        
        return job_data
    else:
        print(f"Failed to fetch page {page_number}. Status code: {response.status_code}")
        return []

# Initialize an empty list to store all the job data
all_jobs = []

# Loop through pages and collect job details
for page in range(1, 25):  
    print(f"Scraping page {page}...")
    job_details = extract_job_details(page)
    all_jobs.extend(job_details)
    time.sleep(1)  # Adding a delay to avoid overwhelming the server

# Create a pandas DataFrame from the collected job data
df = pd.DataFrame(all_jobs)

# Display the first few rows of the DataFrame
print("\nDataFrame created with job listings:")
print(df.head())

Scraping page 1...


  job_type = article.find('p', text=lambda text: text and "job" in text).text.strip().replace('This is a ', '')


Error extracting data for a job: 'NoneType' object has no attribute 'text'
Scraping page 2...
Scraping page 3...
Error extracting data for a job: 'NoneType' object has no attribute 'text'
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Error extracting data for a job: 'NoneType' object has no attribute 'text'
Error extracting data for a job: 'NoneType' object has no attribute 'text'
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Error extracting data for a job: 'NoneType' object has no attribute 'text'
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Error extracting data for a job: 'NoneType' object has no attribute 'text'
Error extracting data for a job: 'NoneType' object has no attribute 'text'

DataFrame created with

In [6]:
df.to_csv('Job_Listing_Dec2024.csv', index = False)

In [7]:
df.head()

Unnamed: 0,Job Title,Company Name,Classification,Job Type,Location,Job Highlights,Job Description,Salary,Posting Date,Reference
0,Observability Business Analyst,Talenza,Business/Systems Analysts,Contract/Temp job,Macquarie Park,"[6 Month Contract, $950 per day including Supe...",Exciting Business Analyst opportunity to kick ...,$900 - $950 p.d. + Including Super,12m ago,https://www.seek.com.au/job/80881587?type=stan...
1,Human Resources Business Partner,MindChamps Early Learning & PreSchool,Consulting & Generalist HR,Full time job,Parramatta,[Join a movement transforming education and in...,"Join MindChamps to uplift global education, an...",Not Provided,30m ago,https://www.seek.com.au/job/80881017?type=stan...
2,"Office Manager (Data, Technology, Leader)",EXPHYS,Office Management,Full time job,Woolooware,[Commence January 2025 | Lead the Head Office ...,Exciting office leadership role in a vibrant &...,"$85,000 – $105,000 per year",33m ago,https://www.seek.com.au/job/80881004?type=stan...
3,Training and Consulting Services Administratio...,Full Stop Australia,Administrative Assistants,Full time job,Balmain,[],"Do you thrive on a culture of diversity, inclu...",Salary plus opt in salary packaging.,34m ago,https://www.seek.com.au/job/80880997?type=stan...
4,Senior HIM and Clinical Coding Lead,Beamtree Pty Ltd,Management,Full time job,North Sydney,[],The Senior HIM and Clinical Coding Lead will c...,Not Provided,35m ago,https://www.seek.com.au/job/80880978?type=stan...
