# Web Scraping (Jobstreet)
## Done by: Yann
### Source: https://www.jobstreet.com.sg/data-analyst-jobs/in-Singapore?pg=1

##

### Import Libraries

In [12]:
import csv
import requests
from bs4 import BeautifulSoup

##

### Extract Raw HTML

In [2]:
# This is only for the first page of Jobstreet
url = 'https://www.jobstreet.com.sg/data-analyst-jobs/in-Singapore'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

job_listings = soup.find_all('div', class_='z1s6m00 _1hbhsw67i _1hbhsw66e _1hbhsw69q _1hbhsw68m _1hbhsw6n _1hbhsw65a _1hbhsw6ga _1hbhsw6fy')

# Optional. I'm checking how many postings are there per page
len(job_listings)



##

### Scrape Information


In [14]:
data_rows = []

for job in job_listings:    

    # Get Title
    title = job.find('span', class_='z1s6m00').text.strip()

    # Get company name
    company = job.find('a', class_='_6xa4xb0 z1s6m00 z1s6m0f rqoqz4').text.strip()  

    # Get Salary info
    salary_element = job.find('span', class_='z1s6m00 _1hbhsw64y y44q7i0 y44q7i3 y44q7i21 y44q7ih')    
    if salary_element and salary_element.find_next_sibling('span', class_='z1s6m00 _1hbhsw64y y44q7i0 y44q7i3 y44q7i21 y44q7ih'):
        salary = salary_element.find_next_sibling('span', class_='z1s6m00 _1hbhsw64y y44q7i0 y44q7i3 y44q7i21 y44q7ih').text.strip()
    else:
        salary = 'No Salary Displayed'
      
    # Get description    
    selling_points = job.find('div', class_='z1s6m00 _1hbhsw6ba _1hbhsw64y')
    if selling_points:
        points = selling_points.find_all('span', class_='z1s6m00 _1hbhsw64y y44q7i0 y44q7i1 y44q7i21 _1d0g9qk4 y44q7i7')
        if points:
            selling_points_text = ', '.join(point.get_text() for point in points)
        else:
            selling_points_text = 'No selling points available'
    else:
        selling_points_text = 'No selling points available'


    # Get date posted
    exact_date = job.find('time', class_='z1s6m00 _1hbhsw64y')
    date_posted = exact_date['datetime'].split('T')[0] if exact_date else 'No Date Available'
    how_long_ago = job.find('span', class_='z1s6m00 _1hbhsw64y y44q7i0 y44q7i1 y44q7i22 y44q7ih').text.strip()

    data_rows.append([title, company, salary, selling_points_text, date_posted, how_long_ago])
   

    print(f"Title: {title}")
    print(f"Company: {company}")
    print(f"Salary: {salary}")
    print(f"Description:\n{selling_points_text}")
    print(f"Date Posted: {date_posted}")
    print(f"Days elapsed: {how_long_ago}")
    print("\n")


    with open('job_listings.csv', 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Title', 'Company', 'Salary', 'Description', 'Date Posted', 'Days Elapsed'])
        writer.writerows(data_rows)

Title: Business Analyst
Company: Rockwell Automation Singapore
Salary: No Salary Displayed
Description:
No selling points available
Date Posted: 2023-05-16
Days elapsed: 5d ago


Title: Data Analyst
Company: TÜV SÜD PSB Pte Ltd
Salary: SGD 4.5K - 8,000 monthly
Description:
Be the first to evaluate and optimise new technologies, Deliver safe, secure & sustainable solutions, Be the expert our customer trust
Date Posted: 2023-05-12
Days elapsed: 8d ago


Title: Data Scientist (Partner Company)
Company: SGInnovate
Salary: SGD 5K - 7,000 monthly
Description:
No selling points available
Date Posted: 2023-05-17
Days elapsed: 4d ago


Title: Business Analyst
Company: Cargo Community Network Pte Ltd
Salary: SGD 4.1K - 6,240 monthly
Description:
Hybrid, AWS and Variable Bonus, Flexible Benefits, Positive learning environment
Date Posted: 2023-05-16
Days elapsed: 5d ago


Title: Healthcare Business Analyst
Company: DCH AURIGA SINGAPORE
Salary: No Salary Displayed
Description:
5 days work week, Co