# Jobstreet Job Scrapper

In [1]:
import csv
from datetime import datetime
import requests
from bs4 import BeautifulSoup

In [2]:
def get_url(position,location):
    """Generate a url from position and location"""
    template = 'https://malaysia.indeed.com/jobs?q={}&l={}'
    url = template.format(position,location)
    return url

In [3]:
test2='https://www.jobstreet.com.my/internship-for-computer-science-students-jobs'
test3= 'https://www.jobstreet.com.my/companies/petronas-168551397678682/reviews'

### Extract raw html

In [4]:
response = requests.get(test2)

In [5]:
response

<Response [200]>

In [6]:
response.reason

'OK'

In [7]:
soup = BeautifulSoup (response.content, 'html.parser')

In [8]:
cards = soup.find_all ('div','y735df0 _1akoxc50 _1akoxc56')

In [9]:
len(cards)

37

### Prototype the model with a single record 

### Job Title

In [10]:
for card in cards:
        job_title_tag = card.find('a',
                                  attrs={'data-automation': 'jobTitle'})
        if job_title_tag:
            job_title = job_title_tag.text.strip()
            print("Job Title:", job_title)
        else:
            print("Job title not found.")


Job Title: Internship for Computer/IT Students
Job Title: Internship for IT Students
Job Title: Internship - Business Administration
Job Title: Intern, IT - Digitalization Intern
Job Title: Internship - Business Consultant
Job Title: Internship Program – IT  (Selangor)
Job Title: Internship Program – IT  (Selangor)
Job Title: Information Technology (IT) Internship (Ipoh, Perak)
Job Title: Internship Software Engineer (6 Months)
Job Title: Digital Technology Intern
Job Title: Internship for Computer / IT Students
Job Title: Scientist for Computer Science
Job Title: Software Support Consultant (Full Time & Internship)
Job Title: Internship for IT Students
Job Title: Internship for Various Department
Job Title: Internship for Various Department
Job Title: IT Intern
Job Title: Internship for IT Students (Java, Mobile Developer) - RM2,000 for Top Students
Job Title: Internship Program – Finance Shared Service Centre (Johor Bahru):
Job Title: Internship Program – Finance Shared Service Centr

### Job URL 

In [11]:
for card in cards:
    job_title_tag = card.find('a', href=True)    
    if job_title_tag:
            job_title = job_title_tag.text.strip()
            job_url = 'https://www.jobstreet.com.my' + job_title_tag['href']
            print("Job URL:", job_url)
    else:
            print("Job URL not found.")


Job URL: https://www.jobstreet.com.my/job/75270814?type=standout&ref=search-standalone&origin=cardTitle
Job URL: https://www.jobstreet.com.my/job/75133242?type=standout&ref=search-standalone&origin=cardTitle
Job URL: https://www.jobstreet.com.my/job/75292101?type=standard&ref=search-standalone&origin=cardTitle
Job URL: https://www.jobstreet.com.my/job/75196143?type=standout&ref=search-standalone&origin=cardTitle
Job URL: https://www.jobstreet.com.my/job/75123268?type=standout&ref=search-standalone&origin=cardTitle
Job URL: https://www.jobstreet.com.my/job/75185033?type=standard&ref=search-standalone&origin=cardTitle
Job URL: https://www.jobstreet.com.my/job/75185033?type=standard&ref=search-standalone&origin=cardTitle
Job URL: https://www.jobstreet.com.my/job/75081291?type=standout&ref=search-standalone&origin=cardTitle
Job URL: https://www.jobstreet.com.my/job/75215829?type=standout&ref=search-standalone&origin=cardTitle
Job URL: https://www.jobstreet.com.my/job/75149813?type=standout

### Company

In [17]:
for card in cards:
        job_company = card.find('a', 
                                attrs={'data-automation': 'jobCompany'})
        if job_company:
            company = job_company.text.strip().replace("at ", "")
            print("Company:", company)
        else:
            print("Company not found.")


Company: Rocketsview Management Sdn Bhd
Company: Tony Ng & Associates Sdn Bhd
Company: QARMAKROME PRODUCTIONS SDN. BHD.
Company: KLK OLEO
Company: Hitachi eBworx Sdn. Bhd.
Company: Arkema Coating Resins Malaysia Sdn. Bhd.
Company: Arkema Coating Resins Malaysia Sdn. Bhd.
Company: KUALA LUMPUR KEPONG BERHAD (KLK)
Company: Exact Asia Development Centre Sdn Bhd
Company: The Alice Smith School
Company: FootfallCam
Company: ANHSIN TECHNOLOGY SDN BHD
Company: KEMM ADVISORY SDN. BHD.
Company: Zen Computer Systems Sdn Bhd
Company: SUZUKI MALAYSIA SDN. BHD.
Company: SUZUKI MALAYSIA SDN. BHD.
Company: Safran Landing Systems Malaysia Sdn Bhd
Company: iFAST Capital Sdn Bhd
Company: Arkema Coating Resins Malaysia Sdn. Bhd.
Company: Arkema Coating Resins Malaysia Sdn. Bhd.
Company: Roche Services & Solutions Operations APAC
Company: ABC COOKING STUDIO MALAYSIA SDN. BHD.
Company: iFAST Capital Sdn Bhd
Company: Maestro Swiss Industries Sdn. Bhd.
Company: Ninja Logistics (Thailand) Limited.
Company: MY

### Location

In [13]:
for card in cards:
        job_loc = card.find('a', 
                            attrs={'data-automation': 'jobLocation'})
        if job_loc:
            location = job_loc.text.strip()
            print("Location:", location)
        else:
            print("Location not found.")

Location: Kuala Lumpur
Location: Penang
Location: Selangor
Location: Petaling Jaya
Location: Petaling Jaya
Location: Johor Bahru
Location not found.
Location: Ipoh
Location: Kuala Lumpur
Location: Kajang/Bangi/Serdang
Location: Kuala Lumpur
Location: Kuala Lumpur
Location: George Town
Location: Cyberjaya
Location: Glenmarie
Location not found.
Location: Others
Location: Kuala Lumpur
Location: Johor Bahru
Location not found.
Location: Petaling Jaya
Location: Kuala Lumpur
Location: Kuala Lumpur
Location: Butterworth
Location: Subang Jaya
Location: Ipoh
Location: Bukit Jelutong
Location: Miri
Location not found.
Location: Ulu Tiram
Location: Kuala Lumpur
Location: Bukit Jelutong
Location: Penang Island
Location: Bandar Baru Sentul
Location: Teluk Panglima Garang
Location: Kuala Lumpur
Location not found.


### Salary

In [19]:
for card in cards:
        job_sal = card.find('span',                           
                            attrs={'data-automation': 'jobSalary'})
        if job_sal:
            salary = job_sal.text.strip()
            print("Salary:", salary)
        else:
            print("Salary undisclosed.")

Salary: MYR 800 - 1,000
Salary: RM 800 – RM 1,200 per month
Salary: RM 800 – RM 1,000 per month
Salary undisclosed.
Salary undisclosed.
Salary undisclosed.
Salary undisclosed.
Salary: RM 1,500 per month
Salary undisclosed.
Salary undisclosed.
Salary: RM 800 – RM 1,200 per month
Salary: RM 4,500 – RM 6,500 per month
Salary: MYR 2,500 - 3,500
Salary undisclosed.
Salary: RM 400 per month
Salary undisclosed.
Salary undisclosed.
Salary: RM 1,500 – RM 2,000 per month
Salary undisclosed.
Salary undisclosed.
Salary undisclosed.
Salary: RM 500 – RM 700 per month
Salary: RM 1,500 – RM 2,000 per month
Salary undisclosed.
Salary: RM 800 – RM 1,000 per month
Salary undisclosed.
Salary undisclosed.
Salary undisclosed.
Salary undisclosed.
Salary: RM 650 – RM 950 per month
Salary: RM 1,000 – RM 1,100 per month
Salary: MYR 400 - 600
Salary: RM 800 – RM 1,000 per month
Salary: RM 4,200 – RM 6,200 per month
Salary undisclosed.
Salary: RM 600 – RM 800 per month
Salary undisclosed.


### Summary

In [24]:
for card in cards:
        job_sum = card.find('span', {"data-automation": 'jobListingDate'})
        if job_sum:
            summary = job_sum.text.strip()
            print("Summary:", summary)
        else:
            print("Summary not found.")

Summary: 1d ago
Summary: 5d ago
Summary: 7h ago
Summary: 3d ago
Summary: 5d ago
Summary: 3d ago
Summary not found.
Summary: 8d ago
Summary: 2d ago
Summary: 4d ago
Summary: 8d ago
Summary: 17d ago
Summary: 15d ago
Summary: 12d ago
Summary: 2d ago
Summary not found.
Summary: 11d ago
Summary: 12d ago
Summary: 2d ago
Summary not found.
Summary: 4d ago
Summary: 12d ago
Summary: 12d ago
Summary: 12d ago
Summary: 23d ago
Summary: 5d ago
Summary: 15d ago
Summary: 5d ago
Summary not found.
Summary: 12d ago
Summary: 16d ago
Summary: 17d ago
Summary: 18d ago
Summary: 11d ago
Summary: 11d ago
Summary: 11d ago
Summary not found.


### Date Posted

In [22]:
for card in cards:
        job_sum = card.find('span',  
                            attrs={'data-automation': 'jobListingDate'})
        if job_sum:
            summary = job_sum.text.strip()
            print("Date Posted:", summary)
        else:
            print("Date Posted not found.")

Date Posted: 1d ago
Date Posted: 5d ago
Date Posted: 7h ago
Date Posted: 3d ago
Date Posted: 5d ago
Date Posted: 3d ago
Date Posted not found.
Date Posted: 8d ago
Date Posted: 2d ago
Date Posted: 4d ago
Date Posted: 8d ago
Date Posted: 17d ago
Date Posted: 15d ago
Date Posted: 12d ago
Date Posted: 2d ago
Date Posted not found.
Date Posted: 11d ago
Date Posted: 12d ago
Date Posted: 2d ago
Date Posted not found.
Date Posted: 4d ago
Date Posted: 12d ago
Date Posted: 12d ago
Date Posted: 12d ago
Date Posted: 23d ago
Date Posted: 5d ago
Date Posted: 15d ago
Date Posted: 5d ago
Date Posted not found.
Date Posted: 12d ago
Date Posted: 16d ago
Date Posted: 17d ago
Date Posted: 18d ago
Date Posted: 11d ago
Date Posted: 11d ago
Date Posted: 11d ago
Date Posted not found.
