In [74]:
import requests
from bs4 import BeautifulSoup
import time

# to make a GET request
def make_request(url):
    try:
        response=requests.get(url)
        response.raise_for_status()
        return response
    except requests.exceptions.RequestsException as e:
        print(f"Failed to make a request.Error as :{e}")
        return None

make_request(url="https://realpython.github.io/fake-jobs/")




<Response [200]>

In [65]:
#to parse html using bs4
def parse_html(response):
    if response:
        try:
            soup=BeautifulSoup(response.content,'html.parser')
            return soup
        except Exception as e:
            print(f"Failed to parse HTML. Error as:{e}")
            return None
    else:
        return None  
                  
        
response=make_request(url="https://realpython.github.io/fake-jobs/")
parse_html(response)

<!DOCTYPE html>

<html>
<head>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<title>Fake Python</title>
<link href="https://cdn.jsdelivr.net/npm/bulma@0.9.2/css/bulma.min.css" rel="stylesheet"/>
</head>
<body>
<section class="section">
<div class="container mb-5">
<h1 class="title is-1">
        Fake Python
      </h1>
<p class="subtitle is-3">
        Fake Jobs for Your Web Scraping Journey
      </p>
</div>
<div class="container">
<div class="columns is-multiline" id="ResultsContainer">
<div class="column is-half">
<div class="card">
<div class="card-content">
<div class="media">
<div class="media-left">
<figure class="image is-48x48">
<img alt="Real Python Logo" src="https://files.realpython.com/media/real-python-logo-thumbnail.7f0db70c2ed2.jpg?__no_cf_polish=1"/>
</figure>
</div>
<div class="media-content">
<h2 class="title is-5">Senior Python Developer</h2>
<h3 class="subtitle is-6 company">Payne, Roberts and Davis</h3>
</div>
</div>

In [83]:
#to fetch job title,company,location and date of Posting
def get_html_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Failed to make a request. Error: {e}")
        return None

url = "https://realpython.github.io/fake-jobs/"
html_content = get_html_content(url)
if html_content:
    soup = BeautifulSoup(html_content, 'html.parser')
    
def fetch_job_info(soup):
    job_info_list = []
    job_elements = soup.find_all('div', class_="card-content")
    for job_element in job_elements:
        title_element = job_element.find("h2", class_="title is-5")
        company_element = job_element.find("h3", class_="subtitle is-6 company")
        location_element = job_element.find("p", class_="location")
        date_of_post = job_element.find("p", class_="is-small has-text-grey")

        job_title = title_element.text.strip() if title_element else ""
        company_name = company_element.text.strip() if company_element else ""
        job_location = location_element.text.strip() if location_element else ""
        posting_date = date_of_post.text.strip() if date_of_post else ""
        job_info = {
            "Job Title": job_title,
            "Company Name": company_name,
            "Location": job_location,
            "Date of Posting": posting_date
        }

        job_info_list.append(job_info)

    return job_info_list

job_info_list = fetch_job_info(soup)
for job_info in job_info_list:
    print(job_info)
    print()

{'Job Title': 'Senior Python Developer', 'Company Name': 'Payne, Roberts and Davis', 'Location': 'Stewartbury, AA', 'Date of Posting': '2021-04-08'}

{'Job Title': 'Energy engineer', 'Company Name': 'Vasquez-Davidson', 'Location': 'Christopherville, AA', 'Date of Posting': '2021-04-08'}

{'Job Title': 'Legal executive', 'Company Name': 'Jackson, Chambers and Levy', 'Location': 'Port Ericaburgh, AA', 'Date of Posting': '2021-04-08'}

{'Job Title': 'Fitness centre manager', 'Company Name': 'Savage-Bradley', 'Location': 'East Seanview, AP', 'Date of Posting': '2021-04-08'}

{'Job Title': 'Product manager', 'Company Name': 'Ramirez Inc', 'Location': 'North Jamieview, AP', 'Date of Posting': '2021-04-08'}

{'Job Title': 'Medical technical officer', 'Company Name': 'Rogers-Yates', 'Location': 'Davidville, AP', 'Date of Posting': '2021-04-08'}

{'Job Title': 'Physiological scientist', 'Company Name': 'Kramer-Klein', 'Location': 'South Christopher, AE', 'Date of Posting': '2021-04-08'}

{'Job 

In [68]:
# to extract application job_links with  links to  description 
def extract_links_to_descriptions(job_footer):
    link_elements = job_footer.find_all("a")
    links_to_descriptions = []

    for link in link_elements:
        link_url = link.get("href")
        link_description = link.text.strip()
        links_to_descriptions.append({
            'Link': link_url,
            'Description': link_description
        })

    return links_to_descriptions

def main():
    base_url = 'https://realpython.github.io'
    url = base_url + '/fake-jobs/'

    start_time = time.time()

    # Make the HTTP request
    response = make_request(url)

    # Parse HTML content
    soup = parse_html(response)

    # Find all job offer divs
    job_offer_divs = soup.find_all('div', class_='column is-half')

    for job_div in job_offer_divs:
        # Fetch job application links for each job offer
        job_footer = job_div.find('footer', class_='card-footer')

        if job_footer:
            links_to_descriptions = extract_links_to_descriptions(job_footer)

            if links_to_descriptions:
                print("\nApplication Links for the Job:")
                for link_info in links_to_descriptions:
                    print("Link:", link_info['Link'])
                    print("Description:", link_info['Description'])
            else:
                print("\nNo application links found for the job.")
        else:
            print("\nNo job footer found.")

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"\nTotal time taken: {elapsed_time:.2f} seconds")

if __name__ == "__main__":
    main()



Application Links for the Job:
Link: https://www.realpython.com
Description: Learn
Link: https://realpython.github.io/fake-jobs/jobs/senior-python-developer-0.html
Description: Apply

Application Links for the Job:
Link: https://www.realpython.com
Description: Learn
Link: https://realpython.github.io/fake-jobs/jobs/energy-engineer-1.html
Description: Apply

Application Links for the Job:
Link: https://www.realpython.com
Description: Learn
Link: https://realpython.github.io/fake-jobs/jobs/legal-executive-2.html
Description: Apply

Application Links for the Job:
Link: https://www.realpython.com
Description: Learn
Link: https://realpython.github.io/fake-jobs/jobs/fitness-centre-manager-3.html
Description: Apply

Application Links for the Job:
Link: https://www.realpython.com
Description: Learn
Link: https://realpython.github.io/fake-jobs/jobs/product-manager-4.html
Description: Apply

Application Links for the Job:
Link: https://www.realpython.com
Description: Learn
Link: https://realpyt

In [76]:
# to extract description from the application link
def get_job_description(html_content):
    try:
        soup = BeautifulSoup(html_content, 'html.parser')
        description_element = soup.find('div', class_='content')
        if description_element:
            return description_element.get_text(strip=True)
        else:
            print("Description element not found on the page.")
            return None

    except Exception as e:
        print(f"Failed to retrieve job description. Error: {e}")
        return None

job_page_url = 'https://realpython.github.io/fake-jobs/jobs/energy-engineer-1.html'
html_content = requests.get(job_page_url).text

description = get_job_description(html_content)

if description:
    print("Job Description:")
    print(description)
else:
    print("Failed to retrieve job description.")

Job Description:
Party prevent live. Quickly candidate change although. Together type music hospital. Every speech support time operation wear often.Location:Christopherville, AAPosted:2021-04-08


In [8]:
import requests
from bs4 import BeautifulSoup

def get_html_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Failed to make a request. Error: {e}")
        return None

url = "https://realpython.github.io/fake-jobs/"
html_content = get_html_content(url)

if html_content:
    soup = BeautifulSoup(html_content, 'html.parser')

def find_specific_jobs():
    python_jobs = soup.find_all("h2", string=lambda text: "officer" in text.lower())
    python_job_parent_elements = [h2_element.parent.parent.parent for h2_element in python_jobs]
    
    job_list = []

    for job_elements in python_job_parent_elements:
        title_element = job_elements.find("h2", class_="title is-5")
        company_element = job_elements.find("h3", class_="subtitle is-6 company")
        location_element = job_elements.find("p", class_="location")
        date_of_post = job_elements.find("p", class_="is-small has-text-grey")

        job_details = {
            "Job Title": title_element.text.strip(),
            "Company Name": company_element.text.strip(),
            "Location": location_element.text.strip(),
            "Date of Posting": date_of_post.text.strip(),
        }

        job_list.append(job_details)

    return job_list
jobs = find_specific_jobs()

# Print the job details stored in dictionaries
for job in jobs:
    print("Job Title:", job["Job Title"])
    print("Company Name:", job["Company Name"])
    print("Location:", job["Location"])
    print("Date of Posting:", job["Date of Posting"])
    print()

Job Title: Medical technical officer
Company Name: Rogers-Yates
Location: Davidville, AP
Date of Posting: 2021-04-08

Job Title: Waste management officer
Company Name: Jones, Williams and Villa
Location: Scotttown, AP
Date of Posting: 2021-04-08

Job Title: Historic buildings inspector/conservation officer
Company Name: Smith LLC
Location: North Brandonville, AP
Date of Posting: 2021-04-08

Job Title: Immigration officer
Company Name: Walker-Simpson
Location: Christopherport, AP
Date of Posting: 2021-04-08

Job Title: Trade union research officer
Company Name: Aguilar, Rivera and Quinn
Location: New Jimmyton, AE
Date of Posting: 2021-04-08

Job Title: Chief Strategy Officer
Company Name: Kramer-Edwards
Location: Williambury, AA
Date of Posting: 2021-04-08

Job Title: Arts development officer
Company Name: Camacho-Sanchez
Location: Philipland, AP
Date of Posting: 2021-04-08

Job Title: Museum/gallery exhibitions officer
Company Name: Nguyen, Yoder and Petty
Location: Lake Abigail, AE
Da